{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.2179989799258544, "eval_steps": 5000000.0, "global_step": 320000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.806246812268295e-05, "grad_norm": 0.08437860012054443, "learning_rate": 5e-06, "loss": 2.1394, "step": 10 }, { "epoch": 7.61249362453659e-05, "grad_norm": 0.08035453408956528, "learning_rate": 1e-05, "loss": 2.1428, "step": 20 }, { "epoch": 0.00011418740436804885, "grad_norm": 0.07435434311628342, "learning_rate": 1.5e-05, "loss": 2.1325, "step": 30 }, { "epoch": 0.0001522498724907318, "grad_norm": 0.07114154100418091, "learning_rate": 2e-05, "loss": 2.1374, "step": 40 }, { "epoch": 0.00019031234061341474, "grad_norm": 0.07513192296028137, "learning_rate": 2.5e-05, "loss": 2.1407, "step": 50 }, { "epoch": 0.0002283748087360977, "grad_norm": 0.07081104069948196, "learning_rate": 3e-05, "loss": 2.1129, "step": 60 }, { "epoch": 0.00026643727685878065, "grad_norm": 0.0714515671133995, "learning_rate": 3.5000000000000004e-05, "loss": 2.1324, "step": 70 }, { "epoch": 0.0003044997449814636, "grad_norm": 0.07307267189025879, "learning_rate": 4e-05, "loss": 2.1071, "step": 80 }, { "epoch": 0.0003425622131041465, "grad_norm": 0.07330012321472168, "learning_rate": 4.4999999999999996e-05, "loss": 2.1344, "step": 90 }, { "epoch": 0.0003806246812268295, "grad_norm": 0.07287479937076569, "learning_rate": 5e-05, "loss": 2.1276, "step": 100 }, { "epoch": 0.0004186871493495124, "grad_norm": 0.07311001420021057, "learning_rate": 5.5e-05, "loss": 2.13, "step": 110 }, { "epoch": 0.0004567496174721954, "grad_norm": 0.07323328405618668, "learning_rate": 6e-05, "loss": 2.0992, "step": 120 }, { "epoch": 0.0004948120855948783, "grad_norm": 0.07578736543655396, "learning_rate": 6.500000000000001e-05, "loss": 2.1201, "step": 130 }, { "epoch": 0.0005328745537175613, "grad_norm": 0.073748379945755, "learning_rate": 7.000000000000001e-05, "loss": 2.1309, "step": 140 }, { "epoch": 0.0005709370218402442, "grad_norm": 0.07653962820768356, "learning_rate": 7.5e-05, "loss": 2.115, "step": 150 }, { "epoch": 0.0006089994899629272, "grad_norm": 0.07610829174518585, "learning_rate": 8e-05, "loss": 2.1164, "step": 160 }, { "epoch": 0.0006470619580856101, "grad_norm": 0.07669228315353394, "learning_rate": 8.5e-05, "loss": 2.1047, "step": 170 }, { "epoch": 0.000685124426208293, "grad_norm": 0.07672448456287384, "learning_rate": 8.999999999999999e-05, "loss": 2.1072, "step": 180 }, { "epoch": 0.000723186894330976, "grad_norm": 0.07850458472967148, "learning_rate": 9.5e-05, "loss": 2.1163, "step": 190 }, { "epoch": 0.000761249362453659, "grad_norm": 0.07940211147069931, "learning_rate": 0.0001, "loss": 2.1152, "step": 200 }, { "epoch": 0.0007993118305763419, "grad_norm": 0.07731697708368301, "learning_rate": 0.000105, "loss": 2.1049, "step": 210 }, { "epoch": 0.0008373742986990248, "grad_norm": 0.07781223952770233, "learning_rate": 0.00011, "loss": 2.1002, "step": 220 }, { "epoch": 0.0008754367668217077, "grad_norm": 0.07948898524045944, "learning_rate": 0.000115, "loss": 2.0993, "step": 230 }, { "epoch": 0.0009134992349443908, "grad_norm": 0.07564356923103333, "learning_rate": 0.00012, "loss": 2.1122, "step": 240 }, { "epoch": 0.0009515617030670737, "grad_norm": 0.0799378901720047, "learning_rate": 0.000125, "loss": 2.1155, "step": 250 }, { "epoch": 0.0009896241711897565, "grad_norm": 0.07956661283969879, "learning_rate": 0.00013000000000000002, "loss": 2.1079, "step": 260 }, { "epoch": 0.0010276866393124397, "grad_norm": 0.08274099230766296, "learning_rate": 0.000135, "loss": 2.0886, "step": 270 }, { "epoch": 0.0010657491074351226, "grad_norm": 0.08240476250648499, "learning_rate": 0.00014000000000000001, "loss": 2.1123, "step": 280 }, { "epoch": 0.0011038115755578055, "grad_norm": 0.08515627682209015, "learning_rate": 0.000145, "loss": 2.1003, "step": 290 }, { "epoch": 0.0011418740436804885, "grad_norm": 0.08448918908834457, "learning_rate": 0.00015, "loss": 2.1146, "step": 300 }, { "epoch": 0.0011799365118031714, "grad_norm": 0.08467483520507812, "learning_rate": 0.000155, "loss": 2.1044, "step": 310 }, { "epoch": 0.0012179989799258543, "grad_norm": 0.08878104388713837, "learning_rate": 0.00016, "loss": 2.1088, "step": 320 }, { "epoch": 0.0012560614480485372, "grad_norm": 0.08743339031934738, "learning_rate": 0.000165, "loss": 2.1146, "step": 330 }, { "epoch": 0.0012941239161712202, "grad_norm": 0.08558017015457153, "learning_rate": 0.00017, "loss": 2.1011, "step": 340 }, { "epoch": 0.001332186384293903, "grad_norm": 0.08609547466039658, "learning_rate": 0.000175, "loss": 2.1014, "step": 350 }, { "epoch": 0.001370248852416586, "grad_norm": 0.0850830003619194, "learning_rate": 0.00017999999999999998, "loss": 2.0965, "step": 360 }, { "epoch": 0.0014083113205392692, "grad_norm": 0.08757133781909943, "learning_rate": 0.000185, "loss": 2.0986, "step": 370 }, { "epoch": 0.001446373788661952, "grad_norm": 0.08911364525556564, "learning_rate": 0.00019, "loss": 2.1004, "step": 380 }, { "epoch": 0.001484436256784635, "grad_norm": 0.09093070030212402, "learning_rate": 0.00019500000000000002, "loss": 2.1119, "step": 390 }, { "epoch": 0.001522498724907318, "grad_norm": 0.10527298599481583, "learning_rate": 0.0002, "loss": 2.1042, "step": 400 }, { "epoch": 0.0015605611930300009, "grad_norm": 0.0866197869181633, "learning_rate": 0.000205, "loss": 2.1035, "step": 410 }, { "epoch": 0.0015986236611526838, "grad_norm": 0.09171626716852188, "learning_rate": 0.00021, "loss": 2.0852, "step": 420 }, { "epoch": 0.0016366861292753667, "grad_norm": 0.0898546651005745, "learning_rate": 0.000215, "loss": 2.0928, "step": 430 }, { "epoch": 0.0016747485973980496, "grad_norm": 0.09225795418024063, "learning_rate": 0.00022, "loss": 2.1046, "step": 440 }, { "epoch": 0.0017128110655207326, "grad_norm": 0.09323623031377792, "learning_rate": 0.00022500000000000002, "loss": 2.1001, "step": 450 }, { "epoch": 0.0017508735336434155, "grad_norm": 0.08722110092639923, "learning_rate": 0.00023, "loss": 2.1023, "step": 460 }, { "epoch": 0.0017889360017660984, "grad_norm": 0.09408703446388245, "learning_rate": 0.000235, "loss": 2.0973, "step": 470 }, { "epoch": 0.0018269984698887816, "grad_norm": 0.0959773138165474, "learning_rate": 0.00024, "loss": 2.1032, "step": 480 }, { "epoch": 0.0018650609380114645, "grad_norm": 0.0968349426984787, "learning_rate": 0.000245, "loss": 2.093, "step": 490 }, { "epoch": 0.0019031234061341474, "grad_norm": 0.0896330177783966, "learning_rate": 0.00025, "loss": 2.1037, "step": 500 }, { "epoch": 0.0019411858742568303, "grad_norm": 0.09475599974393845, "learning_rate": 0.000255, "loss": 2.1015, "step": 510 }, { "epoch": 0.001979248342379513, "grad_norm": 0.09628993272781372, "learning_rate": 0.00026000000000000003, "loss": 2.1125, "step": 520 }, { "epoch": 0.002017310810502196, "grad_norm": 0.10538001358509064, "learning_rate": 0.00026500000000000004, "loss": 2.1087, "step": 530 }, { "epoch": 0.0020553732786248793, "grad_norm": 0.09868626296520233, "learning_rate": 0.00027, "loss": 2.0996, "step": 540 }, { "epoch": 0.002093435746747562, "grad_norm": 0.09002415835857391, "learning_rate": 0.000275, "loss": 2.1165, "step": 550 }, { "epoch": 0.002131498214870245, "grad_norm": 0.09453532099723816, "learning_rate": 0.00028000000000000003, "loss": 2.0897, "step": 560 }, { "epoch": 0.002169560682992928, "grad_norm": 0.09969169646501541, "learning_rate": 0.000285, "loss": 2.0933, "step": 570 }, { "epoch": 0.002207623151115611, "grad_norm": 0.09832243621349335, "learning_rate": 0.00029, "loss": 2.0854, "step": 580 }, { "epoch": 0.0022456856192382938, "grad_norm": 0.10161517560482025, "learning_rate": 0.000295, "loss": 2.0979, "step": 590 }, { "epoch": 0.002283748087360977, "grad_norm": 0.10710626095533371, "learning_rate": 0.0003, "loss": 2.1076, "step": 600 }, { "epoch": 0.0023218105554836596, "grad_norm": 0.09586170315742493, "learning_rate": 0.000305, "loss": 2.1116, "step": 610 }, { "epoch": 0.0023598730236063428, "grad_norm": 0.09288761764764786, "learning_rate": 0.00031, "loss": 2.1174, "step": 620 }, { "epoch": 0.0023979354917290255, "grad_norm": 0.10027331113815308, "learning_rate": 0.000315, "loss": 2.1152, "step": 630 }, { "epoch": 0.0024359979598517086, "grad_norm": 0.0982818529009819, "learning_rate": 0.00032, "loss": 2.1039, "step": 640 }, { "epoch": 0.0024740604279743918, "grad_norm": 0.09577590972185135, "learning_rate": 0.00032500000000000004, "loss": 2.1071, "step": 650 }, { "epoch": 0.0025121228960970745, "grad_norm": 0.09431636333465576, "learning_rate": 0.00033, "loss": 2.1073, "step": 660 }, { "epoch": 0.0025501853642197576, "grad_norm": 0.10308602452278137, "learning_rate": 0.000335, "loss": 2.1145, "step": 670 }, { "epoch": 0.0025882478323424403, "grad_norm": 0.0990699827671051, "learning_rate": 0.00034, "loss": 2.1081, "step": 680 }, { "epoch": 0.0026263103004651235, "grad_norm": 0.10600564628839493, "learning_rate": 0.000345, "loss": 2.11, "step": 690 }, { "epoch": 0.002664372768587806, "grad_norm": 0.10044153034687042, "learning_rate": 0.00035, "loss": 2.1018, "step": 700 }, { "epoch": 0.0027024352367104893, "grad_norm": 0.09696255624294281, "learning_rate": 0.000355, "loss": 2.1055, "step": 710 }, { "epoch": 0.002740497704833172, "grad_norm": 0.10313771665096283, "learning_rate": 0.00035999999999999997, "loss": 2.1031, "step": 720 }, { "epoch": 0.002778560172955855, "grad_norm": 0.10236144065856934, "learning_rate": 0.000365, "loss": 2.1022, "step": 730 }, { "epoch": 0.0028166226410785383, "grad_norm": 0.10610686242580414, "learning_rate": 0.00037, "loss": 2.1216, "step": 740 }, { "epoch": 0.002854685109201221, "grad_norm": 0.10392218083143234, "learning_rate": 0.000375, "loss": 2.1294, "step": 750 }, { "epoch": 0.002892747577323904, "grad_norm": 0.11248600482940674, "learning_rate": 0.00038, "loss": 2.1082, "step": 760 }, { "epoch": 0.002930810045446587, "grad_norm": 0.10452433675527573, "learning_rate": 0.00038500000000000003, "loss": 2.1289, "step": 770 }, { "epoch": 0.00296887251356927, "grad_norm": 0.1146014854311943, "learning_rate": 0.00039000000000000005, "loss": 2.1147, "step": 780 }, { "epoch": 0.0030069349816919527, "grad_norm": 0.10917292535305023, "learning_rate": 0.000395, "loss": 2.1198, "step": 790 }, { "epoch": 0.003044997449814636, "grad_norm": 0.10151583701372147, "learning_rate": 0.0004, "loss": 2.1119, "step": 800 }, { "epoch": 0.0030830599179373186, "grad_norm": 0.10754556208848953, "learning_rate": 0.00040500000000000003, "loss": 2.1106, "step": 810 }, { "epoch": 0.0031211223860600017, "grad_norm": 0.11548687517642975, "learning_rate": 0.00041, "loss": 2.1151, "step": 820 }, { "epoch": 0.0031591848541826844, "grad_norm": 0.1291622668504715, "learning_rate": 0.000415, "loss": 2.1142, "step": 830 }, { "epoch": 0.0031972473223053676, "grad_norm": 0.10699468106031418, "learning_rate": 0.00042, "loss": 2.1266, "step": 840 }, { "epoch": 0.0032353097904280507, "grad_norm": 0.11056250333786011, "learning_rate": 0.000425, "loss": 2.1279, "step": 850 }, { "epoch": 0.0032733722585507334, "grad_norm": 0.10794071853160858, "learning_rate": 0.00043, "loss": 2.1094, "step": 860 }, { "epoch": 0.0033114347266734166, "grad_norm": 0.10671833902597427, "learning_rate": 0.000435, "loss": 2.1074, "step": 870 }, { "epoch": 0.0033494971947960993, "grad_norm": 0.12604257464408875, "learning_rate": 0.00044, "loss": 2.1375, "step": 880 }, { "epoch": 0.0033875596629187824, "grad_norm": 0.12434367090463638, "learning_rate": 0.00044500000000000003, "loss": 2.1147, "step": 890 }, { "epoch": 0.003425622131041465, "grad_norm": 0.11782653629779816, "learning_rate": 0.00045000000000000004, "loss": 2.1343, "step": 900 }, { "epoch": 0.0034636845991641483, "grad_norm": 0.11729437857866287, "learning_rate": 0.000455, "loss": 2.1274, "step": 910 }, { "epoch": 0.003501747067286831, "grad_norm": 0.10995844751596451, "learning_rate": 0.00046, "loss": 2.1398, "step": 920 }, { "epoch": 0.003539809535409514, "grad_norm": 0.09970034658908844, "learning_rate": 0.000465, "loss": 2.1258, "step": 930 }, { "epoch": 0.003577872003532197, "grad_norm": 0.11986082047224045, "learning_rate": 0.00047, "loss": 2.1359, "step": 940 }, { "epoch": 0.00361593447165488, "grad_norm": 0.12323372066020966, "learning_rate": 0.000475, "loss": 2.1323, "step": 950 }, { "epoch": 0.003653996939777563, "grad_norm": 0.10662589967250824, "learning_rate": 0.00048, "loss": 2.1449, "step": 960 }, { "epoch": 0.003692059407900246, "grad_norm": 0.10080249607563019, "learning_rate": 0.00048499999999999997, "loss": 2.1253, "step": 970 }, { "epoch": 0.003730121876022929, "grad_norm": 0.11241836100816727, "learning_rate": 0.00049, "loss": 2.1465, "step": 980 }, { "epoch": 0.0037681843441456117, "grad_norm": 0.11886326968669891, "learning_rate": 0.000495, "loss": 2.141, "step": 990 }, { "epoch": 0.003806246812268295, "grad_norm": 0.12044759094715118, "learning_rate": 0.0005, "loss": 2.1339, "step": 1000 }, { "epoch": 0.0038443092803909775, "grad_norm": 0.11921434849500656, "learning_rate": 0.0005, "loss": 2.1449, "step": 1010 }, { "epoch": 0.0038823717485136607, "grad_norm": 0.10983549058437347, "learning_rate": 0.0005, "loss": 2.1313, "step": 1020 }, { "epoch": 0.003920434216636344, "grad_norm": 0.11801790446043015, "learning_rate": 0.0005, "loss": 2.123, "step": 1030 }, { "epoch": 0.003958496684759026, "grad_norm": 0.12907488644123077, "learning_rate": 0.0005, "loss": 2.1423, "step": 1040 }, { "epoch": 0.003996559152881709, "grad_norm": 0.11516977846622467, "learning_rate": 0.0005, "loss": 2.1381, "step": 1050 }, { "epoch": 0.004034621621004392, "grad_norm": 0.1066596731543541, "learning_rate": 0.0005, "loss": 2.1424, "step": 1060 }, { "epoch": 0.0040726840891270755, "grad_norm": 0.12312052398920059, "learning_rate": 0.0005, "loss": 2.1452, "step": 1070 }, { "epoch": 0.004110746557249759, "grad_norm": 0.11293191462755203, "learning_rate": 0.0005, "loss": 2.1338, "step": 1080 }, { "epoch": 0.004148809025372441, "grad_norm": 0.1253630816936493, "learning_rate": 0.0005, "loss": 2.1493, "step": 1090 }, { "epoch": 0.004186871493495124, "grad_norm": 0.10422249883413315, "learning_rate": 0.0005, "loss": 2.1507, "step": 1100 }, { "epoch": 0.004224933961617807, "grad_norm": 0.1098194494843483, "learning_rate": 0.0005, "loss": 2.1408, "step": 1110 }, { "epoch": 0.00426299642974049, "grad_norm": 0.10751495510339737, "learning_rate": 0.0005, "loss": 2.12, "step": 1120 }, { "epoch": 0.004301058897863173, "grad_norm": 0.12427380681037903, "learning_rate": 0.0005, "loss": 2.1429, "step": 1130 }, { "epoch": 0.004339121365985856, "grad_norm": 0.13727758824825287, "learning_rate": 0.0005, "loss": 2.1494, "step": 1140 }, { "epoch": 0.004377183834108539, "grad_norm": 0.10616891831159592, "learning_rate": 0.0005, "loss": 2.1287, "step": 1150 }, { "epoch": 0.004415246302231222, "grad_norm": 0.10865821689367294, "learning_rate": 0.0005, "loss": 2.1446, "step": 1160 }, { "epoch": 0.004453308770353905, "grad_norm": 0.11342296004295349, "learning_rate": 0.0005, "loss": 2.1313, "step": 1170 }, { "epoch": 0.0044913712384765875, "grad_norm": 0.10417460650205612, "learning_rate": 0.0005, "loss": 2.1358, "step": 1180 }, { "epoch": 0.004529433706599271, "grad_norm": 0.11205831915140152, "learning_rate": 0.0005, "loss": 2.1393, "step": 1190 }, { "epoch": 0.004567496174721954, "grad_norm": 0.10783912241458893, "learning_rate": 0.0005, "loss": 2.1282, "step": 1200 }, { "epoch": 0.004605558642844637, "grad_norm": 0.10432789474725723, "learning_rate": 0.0005, "loss": 2.1394, "step": 1210 }, { "epoch": 0.004643621110967319, "grad_norm": 0.10828524082899094, "learning_rate": 0.0005, "loss": 2.1317, "step": 1220 }, { "epoch": 0.004681683579090002, "grad_norm": 0.10294743627309799, "learning_rate": 0.0005, "loss": 2.1477, "step": 1230 }, { "epoch": 0.0047197460472126855, "grad_norm": 0.10957608371973038, "learning_rate": 0.0005, "loss": 2.1429, "step": 1240 }, { "epoch": 0.004757808515335369, "grad_norm": 0.11409583687782288, "learning_rate": 0.0005, "loss": 2.1261, "step": 1250 }, { "epoch": 0.004795870983458051, "grad_norm": 0.10860385745763779, "learning_rate": 0.0005, "loss": 2.1435, "step": 1260 }, { "epoch": 0.004833933451580734, "grad_norm": 0.10973881185054779, "learning_rate": 0.0005, "loss": 2.1568, "step": 1270 }, { "epoch": 0.004871995919703417, "grad_norm": 0.10787376016378403, "learning_rate": 0.0005, "loss": 2.1314, "step": 1280 }, { "epoch": 0.0049100583878261, "grad_norm": 0.11786238849163055, "learning_rate": 0.0005, "loss": 2.1388, "step": 1290 }, { "epoch": 0.0049481208559487835, "grad_norm": 0.11037887632846832, "learning_rate": 0.0005, "loss": 2.128, "step": 1300 }, { "epoch": 0.004986183324071466, "grad_norm": 0.11480342596769333, "learning_rate": 0.0005, "loss": 2.1456, "step": 1310 }, { "epoch": 0.005024245792194149, "grad_norm": 0.10816387087106705, "learning_rate": 0.0005, "loss": 2.1485, "step": 1320 }, { "epoch": 0.005062308260316832, "grad_norm": 0.11930803954601288, "learning_rate": 0.0005, "loss": 2.1374, "step": 1330 }, { "epoch": 0.005100370728439515, "grad_norm": 0.12375199794769287, "learning_rate": 0.0005, "loss": 2.1453, "step": 1340 }, { "epoch": 0.0051384331965621975, "grad_norm": 0.10290851444005966, "learning_rate": 0.0005, "loss": 2.1449, "step": 1350 }, { "epoch": 0.005176495664684881, "grad_norm": 0.11710334569215775, "learning_rate": 0.0005, "loss": 2.1378, "step": 1360 }, { "epoch": 0.005214558132807564, "grad_norm": 0.10058943927288055, "learning_rate": 0.0005, "loss": 2.1467, "step": 1370 }, { "epoch": 0.005252620600930247, "grad_norm": 0.11752262711524963, "learning_rate": 0.0005, "loss": 2.14, "step": 1380 }, { "epoch": 0.00529068306905293, "grad_norm": 0.12212508171796799, "learning_rate": 0.0005, "loss": 2.1479, "step": 1390 }, { "epoch": 0.005328745537175612, "grad_norm": 0.11722426861524582, "learning_rate": 0.0005, "loss": 2.1462, "step": 1400 }, { "epoch": 0.0053668080052982955, "grad_norm": 0.10403724014759064, "learning_rate": 0.0005, "loss": 2.1408, "step": 1410 }, { "epoch": 0.005404870473420979, "grad_norm": 0.11112242937088013, "learning_rate": 0.0005, "loss": 2.1525, "step": 1420 }, { "epoch": 0.005442932941543662, "grad_norm": 0.1107005923986435, "learning_rate": 0.0005, "loss": 2.1496, "step": 1430 }, { "epoch": 0.005480995409666344, "grad_norm": 0.11069675534963608, "learning_rate": 0.0005, "loss": 2.1444, "step": 1440 }, { "epoch": 0.005519057877789027, "grad_norm": 0.1197921559214592, "learning_rate": 0.0005, "loss": 2.1278, "step": 1450 }, { "epoch": 0.00555712034591171, "grad_norm": 0.1074242815375328, "learning_rate": 0.0005, "loss": 2.1439, "step": 1460 }, { "epoch": 0.0055951828140343935, "grad_norm": 0.11167777329683304, "learning_rate": 0.0005, "loss": 2.1386, "step": 1470 }, { "epoch": 0.005633245282157077, "grad_norm": 0.11702080816030502, "learning_rate": 0.0005, "loss": 2.1487, "step": 1480 }, { "epoch": 0.005671307750279759, "grad_norm": 0.13201381266117096, "learning_rate": 0.0005, "loss": 2.1592, "step": 1490 }, { "epoch": 0.005709370218402442, "grad_norm": 0.1159653514623642, "learning_rate": 0.0005, "loss": 2.1546, "step": 1500 }, { "epoch": 0.005747432686525125, "grad_norm": 0.09878107905387878, "learning_rate": 0.0005, "loss": 2.1375, "step": 1510 }, { "epoch": 0.005785495154647808, "grad_norm": 0.10091791301965714, "learning_rate": 0.0005, "loss": 2.1668, "step": 1520 }, { "epoch": 0.005823557622770491, "grad_norm": 0.11111395061016083, "learning_rate": 0.0005, "loss": 2.13, "step": 1530 }, { "epoch": 0.005861620090893174, "grad_norm": 0.13084878027439117, "learning_rate": 0.0005, "loss": 2.1393, "step": 1540 }, { "epoch": 0.005899682559015857, "grad_norm": 0.11034942418336868, "learning_rate": 0.0005, "loss": 2.13, "step": 1550 }, { "epoch": 0.00593774502713854, "grad_norm": 0.11699171364307404, "learning_rate": 0.0005, "loss": 2.1414, "step": 1560 }, { "epoch": 0.005975807495261222, "grad_norm": 0.11403294652700424, "learning_rate": 0.0005, "loss": 2.1442, "step": 1570 }, { "epoch": 0.0060138699633839054, "grad_norm": 0.13072063028812408, "learning_rate": 0.0005, "loss": 2.1493, "step": 1580 }, { "epoch": 0.006051932431506589, "grad_norm": 0.11782855540513992, "learning_rate": 0.0005, "loss": 2.1406, "step": 1590 }, { "epoch": 0.006089994899629272, "grad_norm": 0.10452011972665787, "learning_rate": 0.0005, "loss": 2.1393, "step": 1600 }, { "epoch": 0.006128057367751955, "grad_norm": 0.11926303058862686, "learning_rate": 0.0005, "loss": 2.1456, "step": 1610 }, { "epoch": 0.006166119835874637, "grad_norm": 0.11260377615690231, "learning_rate": 0.0005, "loss": 2.129, "step": 1620 }, { "epoch": 0.00620418230399732, "grad_norm": 0.10686420649290085, "learning_rate": 0.0005, "loss": 2.1284, "step": 1630 }, { "epoch": 0.0062422447721200034, "grad_norm": 0.10754341632127762, "learning_rate": 0.0005, "loss": 2.1458, "step": 1640 }, { "epoch": 0.006280307240242687, "grad_norm": 0.1057191789150238, "learning_rate": 0.0005, "loss": 2.1405, "step": 1650 }, { "epoch": 0.006318369708365369, "grad_norm": 0.1099468246102333, "learning_rate": 0.0005, "loss": 2.1395, "step": 1660 }, { "epoch": 0.006356432176488052, "grad_norm": 0.11434461176395416, "learning_rate": 0.0005, "loss": 2.134, "step": 1670 }, { "epoch": 0.006394494644610735, "grad_norm": 0.11896966397762299, "learning_rate": 0.0005, "loss": 2.1423, "step": 1680 }, { "epoch": 0.006432557112733418, "grad_norm": 0.11888067424297333, "learning_rate": 0.0005, "loss": 2.1559, "step": 1690 }, { "epoch": 0.0064706195808561014, "grad_norm": 0.1014968678355217, "learning_rate": 0.0005, "loss": 2.138, "step": 1700 }, { "epoch": 0.006508682048978784, "grad_norm": 0.1254124641418457, "learning_rate": 0.0005, "loss": 2.1466, "step": 1710 }, { "epoch": 0.006546744517101467, "grad_norm": 0.10429880768060684, "learning_rate": 0.0005, "loss": 2.1436, "step": 1720 }, { "epoch": 0.00658480698522415, "grad_norm": 0.10479571670293808, "learning_rate": 0.0005, "loss": 2.1333, "step": 1730 }, { "epoch": 0.006622869453346833, "grad_norm": 0.1161293312907219, "learning_rate": 0.0005, "loss": 2.1296, "step": 1740 }, { "epoch": 0.006660931921469515, "grad_norm": 0.11300167441368103, "learning_rate": 0.0005, "loss": 2.1493, "step": 1750 }, { "epoch": 0.0066989943895921986, "grad_norm": 0.11762259900569916, "learning_rate": 0.0005, "loss": 2.1501, "step": 1760 }, { "epoch": 0.006737056857714882, "grad_norm": 0.11976729333400726, "learning_rate": 0.0005, "loss": 2.1399, "step": 1770 }, { "epoch": 0.006775119325837565, "grad_norm": 0.13087745010852814, "learning_rate": 0.0005, "loss": 2.141, "step": 1780 }, { "epoch": 0.006813181793960247, "grad_norm": 0.11606195569038391, "learning_rate": 0.0005, "loss": 2.1451, "step": 1790 }, { "epoch": 0.00685124426208293, "grad_norm": 0.12213372439146042, "learning_rate": 0.0005, "loss": 2.1434, "step": 1800 }, { "epoch": 0.006889306730205613, "grad_norm": 0.11877444386482239, "learning_rate": 0.0005, "loss": 2.1391, "step": 1810 }, { "epoch": 0.0069273691983282966, "grad_norm": 0.11366769671440125, "learning_rate": 0.0005, "loss": 2.1611, "step": 1820 }, { "epoch": 0.00696543166645098, "grad_norm": 0.11194705218076706, "learning_rate": 0.0005, "loss": 2.1607, "step": 1830 }, { "epoch": 0.007003494134573662, "grad_norm": 0.12459465116262436, "learning_rate": 0.0005, "loss": 2.1334, "step": 1840 }, { "epoch": 0.007041556602696345, "grad_norm": 0.10596070438623428, "learning_rate": 0.0005, "loss": 2.154, "step": 1850 }, { "epoch": 0.007079619070819028, "grad_norm": 0.11263356357812881, "learning_rate": 0.0005, "loss": 2.1522, "step": 1860 }, { "epoch": 0.007117681538941711, "grad_norm": 0.11101698130369186, "learning_rate": 0.0005, "loss": 2.1406, "step": 1870 }, { "epoch": 0.007155744007064394, "grad_norm": 0.10336757451295853, "learning_rate": 0.0005, "loss": 2.1492, "step": 1880 }, { "epoch": 0.007193806475187077, "grad_norm": 0.10375799983739853, "learning_rate": 0.0005, "loss": 2.1344, "step": 1890 }, { "epoch": 0.00723186894330976, "grad_norm": 0.11153291165828705, "learning_rate": 0.0005, "loss": 2.1587, "step": 1900 }, { "epoch": 0.007269931411432443, "grad_norm": 0.1019439771771431, "learning_rate": 0.0005, "loss": 2.1502, "step": 1910 }, { "epoch": 0.007307993879555126, "grad_norm": 0.1037280336022377, "learning_rate": 0.0005, "loss": 2.1525, "step": 1920 }, { "epoch": 0.0073460563476778085, "grad_norm": 0.12573005259037018, "learning_rate": 0.0005, "loss": 2.1375, "step": 1930 }, { "epoch": 0.007384118815800492, "grad_norm": 0.11176995187997818, "learning_rate": 0.0005, "loss": 2.1367, "step": 1940 }, { "epoch": 0.007422181283923175, "grad_norm": 0.10850751399993896, "learning_rate": 0.0005, "loss": 2.143, "step": 1950 }, { "epoch": 0.007460243752045858, "grad_norm": 0.10029775649309158, "learning_rate": 0.0005, "loss": 2.1448, "step": 1960 }, { "epoch": 0.00749830622016854, "grad_norm": 0.10943976044654846, "learning_rate": 0.0005, "loss": 2.1469, "step": 1970 }, { "epoch": 0.007536368688291223, "grad_norm": 0.11815327405929565, "learning_rate": 0.0005, "loss": 2.1479, "step": 1980 }, { "epoch": 0.0075744311564139065, "grad_norm": 0.10207410156726837, "learning_rate": 0.0005, "loss": 2.1407, "step": 1990 }, { "epoch": 0.00761249362453659, "grad_norm": 0.12041871249675751, "learning_rate": 0.0005, "loss": 2.1341, "step": 2000 }, { "epoch": 0.007650556092659273, "grad_norm": 0.11289781332015991, "learning_rate": 0.0005, "loss": 2.1497, "step": 2010 }, { "epoch": 0.007688618560781955, "grad_norm": 0.11642885953187943, "learning_rate": 0.0005, "loss": 2.15, "step": 2020 }, { "epoch": 0.007726681028904638, "grad_norm": 0.1056954562664032, "learning_rate": 0.0005, "loss": 2.1414, "step": 2030 }, { "epoch": 0.007764743497027321, "grad_norm": 0.11253539472818375, "learning_rate": 0.0005, "loss": 2.1484, "step": 2040 }, { "epoch": 0.0078028059651500045, "grad_norm": 0.11248568445444107, "learning_rate": 0.0005, "loss": 2.1635, "step": 2050 }, { "epoch": 0.007840868433272688, "grad_norm": 0.1113501563668251, "learning_rate": 0.0005, "loss": 2.1456, "step": 2060 }, { "epoch": 0.00787893090139537, "grad_norm": 0.12106958031654358, "learning_rate": 0.0005, "loss": 2.1467, "step": 2070 }, { "epoch": 0.007916993369518052, "grad_norm": 0.11136061698198318, "learning_rate": 0.0005, "loss": 2.1474, "step": 2080 }, { "epoch": 0.007955055837640735, "grad_norm": 0.11817717552185059, "learning_rate": 0.0005, "loss": 2.1401, "step": 2090 }, { "epoch": 0.007993118305763419, "grad_norm": 0.11543906480073929, "learning_rate": 0.0005, "loss": 2.1373, "step": 2100 }, { "epoch": 0.008031180773886102, "grad_norm": 0.11145801842212677, "learning_rate": 0.0005, "loss": 2.1439, "step": 2110 }, { "epoch": 0.008069243242008785, "grad_norm": 0.10091929137706757, "learning_rate": 0.0005, "loss": 2.1363, "step": 2120 }, { "epoch": 0.008107305710131468, "grad_norm": 0.10481547564268112, "learning_rate": 0.0005, "loss": 2.153, "step": 2130 }, { "epoch": 0.008145368178254151, "grad_norm": 0.10759226232767105, "learning_rate": 0.0005, "loss": 2.1397, "step": 2140 }, { "epoch": 0.008183430646376834, "grad_norm": 0.12797632813453674, "learning_rate": 0.0005, "loss": 2.1404, "step": 2150 }, { "epoch": 0.008221493114499517, "grad_norm": 0.11188928782939911, "learning_rate": 0.0005, "loss": 2.1589, "step": 2160 }, { "epoch": 0.008259555582622199, "grad_norm": 0.11084867268800735, "learning_rate": 0.0005, "loss": 2.1537, "step": 2170 }, { "epoch": 0.008297618050744882, "grad_norm": 0.11851538717746735, "learning_rate": 0.0005, "loss": 2.131, "step": 2180 }, { "epoch": 0.008335680518867565, "grad_norm": 0.11423757672309875, "learning_rate": 0.0005, "loss": 2.1374, "step": 2190 }, { "epoch": 0.008373742986990248, "grad_norm": 0.12670071423053741, "learning_rate": 0.0005, "loss": 2.1477, "step": 2200 }, { "epoch": 0.008411805455112931, "grad_norm": 0.11328286677598953, "learning_rate": 0.0005, "loss": 2.148, "step": 2210 }, { "epoch": 0.008449867923235614, "grad_norm": 0.10992307960987091, "learning_rate": 0.0005, "loss": 2.1454, "step": 2220 }, { "epoch": 0.008487930391358298, "grad_norm": 0.11549299955368042, "learning_rate": 0.0005, "loss": 2.1405, "step": 2230 }, { "epoch": 0.00852599285948098, "grad_norm": 0.1016901507973671, "learning_rate": 0.0005, "loss": 2.1468, "step": 2240 }, { "epoch": 0.008564055327603664, "grad_norm": 0.11092566698789597, "learning_rate": 0.0005, "loss": 2.1457, "step": 2250 }, { "epoch": 0.008602117795726345, "grad_norm": 0.10870914906263351, "learning_rate": 0.0005, "loss": 2.1502, "step": 2260 }, { "epoch": 0.008640180263849028, "grad_norm": 0.10901869088411331, "learning_rate": 0.0005, "loss": 2.134, "step": 2270 }, { "epoch": 0.008678242731971712, "grad_norm": 0.11331748217344284, "learning_rate": 0.0005, "loss": 2.1489, "step": 2280 }, { "epoch": 0.008716305200094395, "grad_norm": 0.10923583805561066, "learning_rate": 0.0005, "loss": 2.1541, "step": 2290 }, { "epoch": 0.008754367668217078, "grad_norm": 0.10727989673614502, "learning_rate": 0.0005, "loss": 2.1472, "step": 2300 }, { "epoch": 0.008792430136339761, "grad_norm": 0.10583169013261795, "learning_rate": 0.0005, "loss": 2.1315, "step": 2310 }, { "epoch": 0.008830492604462444, "grad_norm": 0.10441339015960693, "learning_rate": 0.0005, "loss": 2.1506, "step": 2320 }, { "epoch": 0.008868555072585127, "grad_norm": 0.1082761138677597, "learning_rate": 0.0005, "loss": 2.1363, "step": 2330 }, { "epoch": 0.00890661754070781, "grad_norm": 0.12606731057167053, "learning_rate": 0.0005, "loss": 2.1523, "step": 2340 }, { "epoch": 0.008944680008830492, "grad_norm": 0.12127711623907089, "learning_rate": 0.0005, "loss": 2.1324, "step": 2350 }, { "epoch": 0.008982742476953175, "grad_norm": 0.13403546810150146, "learning_rate": 0.0005, "loss": 2.151, "step": 2360 }, { "epoch": 0.009020804945075858, "grad_norm": 0.12472402304410934, "learning_rate": 0.0005, "loss": 2.1535, "step": 2370 }, { "epoch": 0.009058867413198541, "grad_norm": 0.10507071018218994, "learning_rate": 0.0005, "loss": 2.1633, "step": 2380 }, { "epoch": 0.009096929881321224, "grad_norm": 0.12194889783859253, "learning_rate": 0.0005, "loss": 2.1456, "step": 2390 }, { "epoch": 0.009134992349443908, "grad_norm": 0.11172129213809967, "learning_rate": 0.0005, "loss": 2.1402, "step": 2400 }, { "epoch": 0.00917305481756659, "grad_norm": 0.10743540525436401, "learning_rate": 0.0005, "loss": 2.1398, "step": 2410 }, { "epoch": 0.009211117285689274, "grad_norm": 0.1062987893819809, "learning_rate": 0.0005, "loss": 2.1579, "step": 2420 }, { "epoch": 0.009249179753811957, "grad_norm": 0.12290962040424347, "learning_rate": 0.0005, "loss": 2.1545, "step": 2430 }, { "epoch": 0.009287242221934638, "grad_norm": 0.10917174816131592, "learning_rate": 0.0005, "loss": 2.1195, "step": 2440 }, { "epoch": 0.009325304690057322, "grad_norm": 0.11427836120128632, "learning_rate": 0.0005, "loss": 2.1442, "step": 2450 }, { "epoch": 0.009363367158180005, "grad_norm": 0.11217708885669708, "learning_rate": 0.0005, "loss": 2.1502, "step": 2460 }, { "epoch": 0.009401429626302688, "grad_norm": 0.10235556215047836, "learning_rate": 0.0005, "loss": 2.1477, "step": 2470 }, { "epoch": 0.009439492094425371, "grad_norm": 0.10068736970424652, "learning_rate": 0.0005, "loss": 2.142, "step": 2480 }, { "epoch": 0.009477554562548054, "grad_norm": 0.10361825674772263, "learning_rate": 0.0005, "loss": 2.148, "step": 2490 }, { "epoch": 0.009515617030670737, "grad_norm": 0.13224199414253235, "learning_rate": 0.0005, "loss": 2.1457, "step": 2500 }, { "epoch": 0.00955367949879342, "grad_norm": 0.11844684928655624, "learning_rate": 0.0005, "loss": 2.1307, "step": 2510 }, { "epoch": 0.009591741966916102, "grad_norm": 0.11305680871009827, "learning_rate": 0.0005, "loss": 2.1484, "step": 2520 }, { "epoch": 0.009629804435038785, "grad_norm": 0.11989112198352814, "learning_rate": 0.0005, "loss": 2.1449, "step": 2530 }, { "epoch": 0.009667866903161468, "grad_norm": 0.09989438951015472, "learning_rate": 0.0005, "loss": 2.1433, "step": 2540 }, { "epoch": 0.009705929371284151, "grad_norm": 0.10064006596803665, "learning_rate": 0.0005, "loss": 2.144, "step": 2550 }, { "epoch": 0.009743991839406834, "grad_norm": 0.11770729720592499, "learning_rate": 0.0005, "loss": 2.1576, "step": 2560 }, { "epoch": 0.009782054307529518, "grad_norm": 0.10948820412158966, "learning_rate": 0.0005, "loss": 2.1459, "step": 2570 }, { "epoch": 0.0098201167756522, "grad_norm": 0.11830486357212067, "learning_rate": 0.0005, "loss": 2.151, "step": 2580 }, { "epoch": 0.009858179243774884, "grad_norm": 0.1152670606970787, "learning_rate": 0.0005, "loss": 2.1482, "step": 2590 }, { "epoch": 0.009896241711897567, "grad_norm": 0.10491305589675903, "learning_rate": 0.0005, "loss": 2.1376, "step": 2600 }, { "epoch": 0.009934304180020248, "grad_norm": 0.12046907842159271, "learning_rate": 0.0005, "loss": 2.141, "step": 2610 }, { "epoch": 0.009972366648142932, "grad_norm": 0.11437215656042099, "learning_rate": 0.0005, "loss": 2.1543, "step": 2620 }, { "epoch": 0.010010429116265615, "grad_norm": 0.10899262130260468, "learning_rate": 0.0005, "loss": 2.1475, "step": 2630 }, { "epoch": 0.010048491584388298, "grad_norm": 0.11079401522874832, "learning_rate": 0.0005, "loss": 2.1379, "step": 2640 }, { "epoch": 0.010086554052510981, "grad_norm": 0.10931838303804398, "learning_rate": 0.0005, "loss": 2.1567, "step": 2650 }, { "epoch": 0.010124616520633664, "grad_norm": 0.10471278429031372, "learning_rate": 0.0005, "loss": 2.1323, "step": 2660 }, { "epoch": 0.010162678988756347, "grad_norm": 0.1277533322572708, "learning_rate": 0.0005, "loss": 2.1506, "step": 2670 }, { "epoch": 0.01020074145687903, "grad_norm": 0.11684451997280121, "learning_rate": 0.0005, "loss": 2.1516, "step": 2680 }, { "epoch": 0.010238803925001714, "grad_norm": 0.11240722239017487, "learning_rate": 0.0005, "loss": 2.1448, "step": 2690 }, { "epoch": 0.010276866393124395, "grad_norm": 0.11600897461175919, "learning_rate": 0.0005, "loss": 2.1322, "step": 2700 }, { "epoch": 0.010314928861247078, "grad_norm": 0.11586230248212814, "learning_rate": 0.0005, "loss": 2.1495, "step": 2710 }, { "epoch": 0.010352991329369761, "grad_norm": 0.1122283861041069, "learning_rate": 0.0005, "loss": 2.1528, "step": 2720 }, { "epoch": 0.010391053797492444, "grad_norm": 0.11556751281023026, "learning_rate": 0.0005, "loss": 2.1418, "step": 2730 }, { "epoch": 0.010429116265615128, "grad_norm": 0.10538724064826965, "learning_rate": 0.0005, "loss": 2.1592, "step": 2740 }, { "epoch": 0.01046717873373781, "grad_norm": 0.11462972313165665, "learning_rate": 0.0005, "loss": 2.1495, "step": 2750 }, { "epoch": 0.010505241201860494, "grad_norm": 0.1125258207321167, "learning_rate": 0.0005, "loss": 2.1453, "step": 2760 }, { "epoch": 0.010543303669983177, "grad_norm": 0.11863648891448975, "learning_rate": 0.0005, "loss": 2.1419, "step": 2770 }, { "epoch": 0.01058136613810586, "grad_norm": 0.1135948970913887, "learning_rate": 0.0005, "loss": 2.1439, "step": 2780 }, { "epoch": 0.010619428606228542, "grad_norm": 0.10585552453994751, "learning_rate": 0.0005, "loss": 2.1365, "step": 2790 }, { "epoch": 0.010657491074351225, "grad_norm": 0.10917206853628159, "learning_rate": 0.0005, "loss": 2.1366, "step": 2800 }, { "epoch": 0.010695553542473908, "grad_norm": 0.11167892813682556, "learning_rate": 0.0005, "loss": 2.1423, "step": 2810 }, { "epoch": 0.010733616010596591, "grad_norm": 0.12436781078577042, "learning_rate": 0.0005, "loss": 2.1496, "step": 2820 }, { "epoch": 0.010771678478719274, "grad_norm": 0.11940976977348328, "learning_rate": 0.0005, "loss": 2.1281, "step": 2830 }, { "epoch": 0.010809740946841957, "grad_norm": 0.10679332166910172, "learning_rate": 0.0005, "loss": 2.1499, "step": 2840 }, { "epoch": 0.01084780341496464, "grad_norm": 0.10838009417057037, "learning_rate": 0.0005, "loss": 2.1396, "step": 2850 }, { "epoch": 0.010885865883087324, "grad_norm": 0.11248808354139328, "learning_rate": 0.0005, "loss": 2.1489, "step": 2860 }, { "epoch": 0.010923928351210007, "grad_norm": 0.12218758463859558, "learning_rate": 0.0005, "loss": 2.1504, "step": 2870 }, { "epoch": 0.010961990819332688, "grad_norm": 0.12311123311519623, "learning_rate": 0.0005, "loss": 2.1612, "step": 2880 }, { "epoch": 0.011000053287455371, "grad_norm": 0.12107503414154053, "learning_rate": 0.0005, "loss": 2.1521, "step": 2890 }, { "epoch": 0.011038115755578054, "grad_norm": 0.1265459507703781, "learning_rate": 0.0005, "loss": 2.1458, "step": 2900 }, { "epoch": 0.011076178223700738, "grad_norm": 0.10080008208751678, "learning_rate": 0.0005, "loss": 2.1449, "step": 2910 }, { "epoch": 0.01111424069182342, "grad_norm": 0.1411270946264267, "learning_rate": 0.0005, "loss": 2.1427, "step": 2920 }, { "epoch": 0.011152303159946104, "grad_norm": 0.10671328008174896, "learning_rate": 0.0005, "loss": 2.149, "step": 2930 }, { "epoch": 0.011190365628068787, "grad_norm": 0.1083766371011734, "learning_rate": 0.0005, "loss": 2.1421, "step": 2940 }, { "epoch": 0.01122842809619147, "grad_norm": 0.10850624740123749, "learning_rate": 0.0005, "loss": 2.1453, "step": 2950 }, { "epoch": 0.011266490564314153, "grad_norm": 0.11468614637851715, "learning_rate": 0.0005, "loss": 2.1523, "step": 2960 }, { "epoch": 0.011304553032436835, "grad_norm": 0.11435503512620926, "learning_rate": 0.0005, "loss": 2.154, "step": 2970 }, { "epoch": 0.011342615500559518, "grad_norm": 0.12531743943691254, "learning_rate": 0.0005, "loss": 2.1354, "step": 2980 }, { "epoch": 0.011380677968682201, "grad_norm": 0.12513144314289093, "learning_rate": 0.0005, "loss": 2.1408, "step": 2990 }, { "epoch": 0.011418740436804884, "grad_norm": 0.10959810763597488, "learning_rate": 0.0005, "loss": 2.1571, "step": 3000 }, { "epoch": 0.011456802904927567, "grad_norm": 0.11499848961830139, "learning_rate": 0.0005, "loss": 2.1529, "step": 3010 }, { "epoch": 0.01149486537305025, "grad_norm": 0.11116579920053482, "learning_rate": 0.0005, "loss": 2.1493, "step": 3020 }, { "epoch": 0.011532927841172934, "grad_norm": 0.11482273787260056, "learning_rate": 0.0005, "loss": 2.1508, "step": 3030 }, { "epoch": 0.011570990309295617, "grad_norm": 0.11653285473585129, "learning_rate": 0.0005, "loss": 2.1624, "step": 3040 }, { "epoch": 0.011609052777418298, "grad_norm": 0.10759188234806061, "learning_rate": 0.0005, "loss": 2.1453, "step": 3050 }, { "epoch": 0.011647115245540981, "grad_norm": 0.10869356244802475, "learning_rate": 0.0005, "loss": 2.1489, "step": 3060 }, { "epoch": 0.011685177713663664, "grad_norm": 0.11057727783918381, "learning_rate": 0.0005, "loss": 2.1549, "step": 3070 }, { "epoch": 0.011723240181786347, "grad_norm": 0.12141691893339157, "learning_rate": 0.0005, "loss": 2.146, "step": 3080 }, { "epoch": 0.01176130264990903, "grad_norm": 0.11846601963043213, "learning_rate": 0.0005, "loss": 2.1543, "step": 3090 }, { "epoch": 0.011799365118031714, "grad_norm": 0.10390357673168182, "learning_rate": 0.0005, "loss": 2.1404, "step": 3100 }, { "epoch": 0.011837427586154397, "grad_norm": 0.10856925696134567, "learning_rate": 0.0005, "loss": 2.1344, "step": 3110 }, { "epoch": 0.01187549005427708, "grad_norm": 0.11197613179683685, "learning_rate": 0.0005, "loss": 2.1251, "step": 3120 }, { "epoch": 0.011913552522399763, "grad_norm": 0.10197338461875916, "learning_rate": 0.0005, "loss": 2.147, "step": 3130 }, { "epoch": 0.011951614990522445, "grad_norm": 0.10315605998039246, "learning_rate": 0.0005, "loss": 2.1456, "step": 3140 }, { "epoch": 0.011989677458645128, "grad_norm": 0.11616446822881699, "learning_rate": 0.0005, "loss": 2.152, "step": 3150 }, { "epoch": 0.012027739926767811, "grad_norm": 0.1289011538028717, "learning_rate": 0.0005, "loss": 2.1466, "step": 3160 }, { "epoch": 0.012065802394890494, "grad_norm": 0.11502829194068909, "learning_rate": 0.0005, "loss": 2.1532, "step": 3170 }, { "epoch": 0.012103864863013177, "grad_norm": 0.11335300654172897, "learning_rate": 0.0005, "loss": 2.1519, "step": 3180 }, { "epoch": 0.01214192733113586, "grad_norm": 0.1180783212184906, "learning_rate": 0.0005, "loss": 2.1551, "step": 3190 }, { "epoch": 0.012179989799258543, "grad_norm": 0.10899759829044342, "learning_rate": 0.0005, "loss": 2.1391, "step": 3200 }, { "epoch": 0.012218052267381227, "grad_norm": 0.1238299235701561, "learning_rate": 0.0005, "loss": 2.1462, "step": 3210 }, { "epoch": 0.01225611473550391, "grad_norm": 0.1062338650226593, "learning_rate": 0.0005, "loss": 2.1386, "step": 3220 }, { "epoch": 0.012294177203626591, "grad_norm": 0.10229603201150894, "learning_rate": 0.0005, "loss": 2.1365, "step": 3230 }, { "epoch": 0.012332239671749274, "grad_norm": 0.109988272190094, "learning_rate": 0.0005, "loss": 2.1279, "step": 3240 }, { "epoch": 0.012370302139871957, "grad_norm": 0.11193165183067322, "learning_rate": 0.0005, "loss": 2.151, "step": 3250 }, { "epoch": 0.01240836460799464, "grad_norm": 0.10783321410417557, "learning_rate": 0.0005, "loss": 2.1551, "step": 3260 }, { "epoch": 0.012446427076117324, "grad_norm": 0.1088133156299591, "learning_rate": 0.0005, "loss": 2.1659, "step": 3270 }, { "epoch": 0.012484489544240007, "grad_norm": 0.11209650337696075, "learning_rate": 0.0005, "loss": 2.1455, "step": 3280 }, { "epoch": 0.01252255201236269, "grad_norm": 0.11094122380018234, "learning_rate": 0.0005, "loss": 2.1439, "step": 3290 }, { "epoch": 0.012560614480485373, "grad_norm": 0.11281769722700119, "learning_rate": 0.0005, "loss": 2.1387, "step": 3300 }, { "epoch": 0.012598676948608056, "grad_norm": 0.11721902340650558, "learning_rate": 0.0005, "loss": 2.1358, "step": 3310 }, { "epoch": 0.012636739416730738, "grad_norm": 0.1123766303062439, "learning_rate": 0.0005, "loss": 2.145, "step": 3320 }, { "epoch": 0.01267480188485342, "grad_norm": 0.11648537218570709, "learning_rate": 0.0005, "loss": 2.1423, "step": 3330 }, { "epoch": 0.012712864352976104, "grad_norm": 0.11745714396238327, "learning_rate": 0.0005, "loss": 2.1521, "step": 3340 }, { "epoch": 0.012750926821098787, "grad_norm": 0.10987821221351624, "learning_rate": 0.0005, "loss": 2.1391, "step": 3350 }, { "epoch": 0.01278898928922147, "grad_norm": 0.11140109598636627, "learning_rate": 0.0005, "loss": 2.1564, "step": 3360 }, { "epoch": 0.012827051757344153, "grad_norm": 0.1149124875664711, "learning_rate": 0.0005, "loss": 2.1509, "step": 3370 }, { "epoch": 0.012865114225466837, "grad_norm": 0.10883333534002304, "learning_rate": 0.0005, "loss": 2.1362, "step": 3380 }, { "epoch": 0.01290317669358952, "grad_norm": 0.11000484973192215, "learning_rate": 0.0005, "loss": 2.1411, "step": 3390 }, { "epoch": 0.012941239161712203, "grad_norm": 0.11543362587690353, "learning_rate": 0.0005, "loss": 2.1549, "step": 3400 }, { "epoch": 0.012979301629834884, "grad_norm": 0.1056831106543541, "learning_rate": 0.0005, "loss": 2.1515, "step": 3410 }, { "epoch": 0.013017364097957567, "grad_norm": 0.11518535017967224, "learning_rate": 0.0005, "loss": 2.1402, "step": 3420 }, { "epoch": 0.01305542656608025, "grad_norm": 0.09811803698539734, "learning_rate": 0.0005, "loss": 2.1557, "step": 3430 }, { "epoch": 0.013093489034202934, "grad_norm": 0.11671673506498337, "learning_rate": 0.0005, "loss": 2.1378, "step": 3440 }, { "epoch": 0.013131551502325617, "grad_norm": 0.12312810868024826, "learning_rate": 0.0005, "loss": 2.1371, "step": 3450 }, { "epoch": 0.0131696139704483, "grad_norm": 0.10879986733198166, "learning_rate": 0.0005, "loss": 2.1483, "step": 3460 }, { "epoch": 0.013207676438570983, "grad_norm": 0.11917275190353394, "learning_rate": 0.0005, "loss": 2.1505, "step": 3470 }, { "epoch": 0.013245738906693666, "grad_norm": 0.10777828842401505, "learning_rate": 0.0005, "loss": 2.1466, "step": 3480 }, { "epoch": 0.01328380137481635, "grad_norm": 0.10327920317649841, "learning_rate": 0.0005, "loss": 2.1602, "step": 3490 }, { "epoch": 0.01332186384293903, "grad_norm": 0.10455843061208725, "learning_rate": 0.0005, "loss": 2.1375, "step": 3500 }, { "epoch": 0.013359926311061714, "grad_norm": 0.11596754938364029, "learning_rate": 0.0005, "loss": 2.1587, "step": 3510 }, { "epoch": 0.013397988779184397, "grad_norm": 0.12860432267189026, "learning_rate": 0.0005, "loss": 2.1333, "step": 3520 }, { "epoch": 0.01343605124730708, "grad_norm": 0.12856276333332062, "learning_rate": 0.0005, "loss": 2.1381, "step": 3530 }, { "epoch": 0.013474113715429763, "grad_norm": 0.10185491293668747, "learning_rate": 0.0005, "loss": 2.1438, "step": 3540 }, { "epoch": 0.013512176183552447, "grad_norm": 0.10582708567380905, "learning_rate": 0.0005, "loss": 2.149, "step": 3550 }, { "epoch": 0.01355023865167513, "grad_norm": 0.10672181844711304, "learning_rate": 0.0005, "loss": 2.1632, "step": 3560 }, { "epoch": 0.013588301119797813, "grad_norm": 0.0988573208451271, "learning_rate": 0.0005, "loss": 2.1372, "step": 3570 }, { "epoch": 0.013626363587920494, "grad_norm": 0.11891688406467438, "learning_rate": 0.0005, "loss": 2.154, "step": 3580 }, { "epoch": 0.013664426056043177, "grad_norm": 0.12185022234916687, "learning_rate": 0.0005, "loss": 2.1539, "step": 3590 }, { "epoch": 0.01370248852416586, "grad_norm": 0.12364726513624191, "learning_rate": 0.0005, "loss": 2.1463, "step": 3600 }, { "epoch": 0.013740550992288544, "grad_norm": 0.10995651036500931, "learning_rate": 0.0005, "loss": 2.1506, "step": 3610 }, { "epoch": 0.013778613460411227, "grad_norm": 0.10640208423137665, "learning_rate": 0.0005, "loss": 2.1456, "step": 3620 }, { "epoch": 0.01381667592853391, "grad_norm": 0.10756561160087585, "learning_rate": 0.0005, "loss": 2.1334, "step": 3630 }, { "epoch": 0.013854738396656593, "grad_norm": 0.11378846317529678, "learning_rate": 0.0005, "loss": 2.1458, "step": 3640 }, { "epoch": 0.013892800864779276, "grad_norm": 0.11161532998085022, "learning_rate": 0.0005, "loss": 2.1455, "step": 3650 }, { "epoch": 0.01393086333290196, "grad_norm": 0.1208384558558464, "learning_rate": 0.0005, "loss": 2.147, "step": 3660 }, { "epoch": 0.01396892580102464, "grad_norm": 0.1076679527759552, "learning_rate": 0.0005, "loss": 2.1513, "step": 3670 }, { "epoch": 0.014006988269147324, "grad_norm": 0.1010458767414093, "learning_rate": 0.0005, "loss": 2.15, "step": 3680 }, { "epoch": 0.014045050737270007, "grad_norm": 0.10920700430870056, "learning_rate": 0.0005, "loss": 2.1499, "step": 3690 }, { "epoch": 0.01408311320539269, "grad_norm": 0.1006520465016365, "learning_rate": 0.0005, "loss": 2.1554, "step": 3700 }, { "epoch": 0.014121175673515373, "grad_norm": 0.11252916604280472, "learning_rate": 0.0005, "loss": 2.1495, "step": 3710 }, { "epoch": 0.014159238141638057, "grad_norm": 0.1261662244796753, "learning_rate": 0.0005, "loss": 2.1382, "step": 3720 }, { "epoch": 0.01419730060976074, "grad_norm": 0.11702711135149002, "learning_rate": 0.0005, "loss": 2.1267, "step": 3730 }, { "epoch": 0.014235363077883423, "grad_norm": 0.1109805703163147, "learning_rate": 0.0005, "loss": 2.1557, "step": 3740 }, { "epoch": 0.014273425546006106, "grad_norm": 0.11875700950622559, "learning_rate": 0.0005, "loss": 2.1425, "step": 3750 }, { "epoch": 0.014311488014128787, "grad_norm": 0.11200769245624542, "learning_rate": 0.0005, "loss": 2.1437, "step": 3760 }, { "epoch": 0.01434955048225147, "grad_norm": 0.11239437758922577, "learning_rate": 0.0005, "loss": 2.1515, "step": 3770 }, { "epoch": 0.014387612950374154, "grad_norm": 0.10733044147491455, "learning_rate": 0.0005, "loss": 2.1465, "step": 3780 }, { "epoch": 0.014425675418496837, "grad_norm": 0.1107863038778305, "learning_rate": 0.0005, "loss": 2.135, "step": 3790 }, { "epoch": 0.01446373788661952, "grad_norm": 0.11729779094457626, "learning_rate": 0.0005, "loss": 2.137, "step": 3800 }, { "epoch": 0.014501800354742203, "grad_norm": 0.13996534049510956, "learning_rate": 0.0005, "loss": 2.1524, "step": 3810 }, { "epoch": 0.014539862822864886, "grad_norm": 0.1317211240530014, "learning_rate": 0.0005, "loss": 2.1499, "step": 3820 }, { "epoch": 0.01457792529098757, "grad_norm": 0.11421209573745728, "learning_rate": 0.0005, "loss": 2.1392, "step": 3830 }, { "epoch": 0.014615987759110253, "grad_norm": 0.1259276568889618, "learning_rate": 0.0005, "loss": 2.15, "step": 3840 }, { "epoch": 0.014654050227232934, "grad_norm": 0.10297126322984695, "learning_rate": 0.0005, "loss": 2.1423, "step": 3850 }, { "epoch": 0.014692112695355617, "grad_norm": 0.11537662148475647, "learning_rate": 0.0005, "loss": 2.1423, "step": 3860 }, { "epoch": 0.0147301751634783, "grad_norm": 0.10959656536579132, "learning_rate": 0.0005, "loss": 2.1349, "step": 3870 }, { "epoch": 0.014768237631600983, "grad_norm": 0.13356612622737885, "learning_rate": 0.0005, "loss": 2.1421, "step": 3880 }, { "epoch": 0.014806300099723666, "grad_norm": 0.11244034022092819, "learning_rate": 0.0005, "loss": 2.1283, "step": 3890 }, { "epoch": 0.01484436256784635, "grad_norm": 0.10692012310028076, "learning_rate": 0.0005, "loss": 2.1413, "step": 3900 }, { "epoch": 0.014882425035969033, "grad_norm": 0.12048804759979248, "learning_rate": 0.0005, "loss": 2.1355, "step": 3910 }, { "epoch": 0.014920487504091716, "grad_norm": 0.11501602083444595, "learning_rate": 0.0005, "loss": 2.1519, "step": 3920 }, { "epoch": 0.014958549972214399, "grad_norm": 0.13394662737846375, "learning_rate": 0.0005, "loss": 2.144, "step": 3930 }, { "epoch": 0.01499661244033708, "grad_norm": 0.1142013743519783, "learning_rate": 0.0005, "loss": 2.1489, "step": 3940 }, { "epoch": 0.015034674908459764, "grad_norm": 0.12087413668632507, "learning_rate": 0.0005, "loss": 2.1635, "step": 3950 }, { "epoch": 0.015072737376582447, "grad_norm": 0.12638744711875916, "learning_rate": 0.0005, "loss": 2.1508, "step": 3960 }, { "epoch": 0.01511079984470513, "grad_norm": 0.11318952590227127, "learning_rate": 0.0005, "loss": 2.1301, "step": 3970 }, { "epoch": 0.015148862312827813, "grad_norm": 0.1457756757736206, "learning_rate": 0.0005, "loss": 2.1463, "step": 3980 }, { "epoch": 0.015186924780950496, "grad_norm": 0.11943541467189789, "learning_rate": 0.0005, "loss": 2.1455, "step": 3990 }, { "epoch": 0.01522498724907318, "grad_norm": 0.12239276617765427, "learning_rate": 0.0005, "loss": 2.1479, "step": 4000 }, { "epoch": 0.015263049717195862, "grad_norm": 0.11262499541044235, "learning_rate": 0.0005, "loss": 2.1524, "step": 4010 }, { "epoch": 0.015301112185318546, "grad_norm": 0.10902900248765945, "learning_rate": 0.0005, "loss": 2.155, "step": 4020 }, { "epoch": 0.015339174653441227, "grad_norm": 0.13082574307918549, "learning_rate": 0.0005, "loss": 2.1405, "step": 4030 }, { "epoch": 0.01537723712156391, "grad_norm": 0.11959017068147659, "learning_rate": 0.0005, "loss": 2.1454, "step": 4040 }, { "epoch": 0.015415299589686593, "grad_norm": 0.1089482307434082, "learning_rate": 0.0005, "loss": 2.1525, "step": 4050 }, { "epoch": 0.015453362057809276, "grad_norm": 0.11102756857872009, "learning_rate": 0.0005, "loss": 2.1616, "step": 4060 }, { "epoch": 0.01549142452593196, "grad_norm": 0.11872225999832153, "learning_rate": 0.0005, "loss": 2.1478, "step": 4070 }, { "epoch": 0.015529486994054643, "grad_norm": 0.11290697008371353, "learning_rate": 0.0005, "loss": 2.1473, "step": 4080 }, { "epoch": 0.015567549462177326, "grad_norm": 0.12047409266233444, "learning_rate": 0.0005, "loss": 2.1383, "step": 4090 }, { "epoch": 0.015605611930300009, "grad_norm": 0.1093122810125351, "learning_rate": 0.0005, "loss": 2.1385, "step": 4100 }, { "epoch": 0.01564367439842269, "grad_norm": 0.11345285177230835, "learning_rate": 0.0005, "loss": 2.1393, "step": 4110 }, { "epoch": 0.015681736866545375, "grad_norm": 0.1162080317735672, "learning_rate": 0.0005, "loss": 2.1325, "step": 4120 }, { "epoch": 0.015719799334668057, "grad_norm": 0.11164089292287827, "learning_rate": 0.0005, "loss": 2.1529, "step": 4130 }, { "epoch": 0.01575786180279074, "grad_norm": 0.1144440621137619, "learning_rate": 0.0005, "loss": 2.1341, "step": 4140 }, { "epoch": 0.015795924270913423, "grad_norm": 0.11888343840837479, "learning_rate": 0.0005, "loss": 2.1581, "step": 4150 }, { "epoch": 0.015833986739036104, "grad_norm": 0.12672144174575806, "learning_rate": 0.0005, "loss": 2.1607, "step": 4160 }, { "epoch": 0.01587204920715879, "grad_norm": 0.11533960700035095, "learning_rate": 0.0005, "loss": 2.1507, "step": 4170 }, { "epoch": 0.01591011167528147, "grad_norm": 0.11102886497974396, "learning_rate": 0.0005, "loss": 2.1413, "step": 4180 }, { "epoch": 0.015948174143404156, "grad_norm": 0.11495087295770645, "learning_rate": 0.0005, "loss": 2.1345, "step": 4190 }, { "epoch": 0.015986236611526837, "grad_norm": 0.11841031163930893, "learning_rate": 0.0005, "loss": 2.1453, "step": 4200 }, { "epoch": 0.016024299079649522, "grad_norm": 0.0984388217329979, "learning_rate": 0.0005, "loss": 2.1342, "step": 4210 }, { "epoch": 0.016062361547772203, "grad_norm": 0.10668789595365524, "learning_rate": 0.0005, "loss": 2.1667, "step": 4220 }, { "epoch": 0.016100424015894888, "grad_norm": 0.10329697281122208, "learning_rate": 0.0005, "loss": 2.1572, "step": 4230 }, { "epoch": 0.01613848648401757, "grad_norm": 0.12043692916631699, "learning_rate": 0.0005, "loss": 2.1383, "step": 4240 }, { "epoch": 0.01617654895214025, "grad_norm": 0.09935463964939117, "learning_rate": 0.0005, "loss": 2.1449, "step": 4250 }, { "epoch": 0.016214611420262936, "grad_norm": 0.10463161766529083, "learning_rate": 0.0005, "loss": 2.139, "step": 4260 }, { "epoch": 0.016252673888385617, "grad_norm": 0.10367552191019058, "learning_rate": 0.0005, "loss": 2.1518, "step": 4270 }, { "epoch": 0.016290736356508302, "grad_norm": 0.1247159019112587, "learning_rate": 0.0005, "loss": 2.1563, "step": 4280 }, { "epoch": 0.016328798824630984, "grad_norm": 0.1147475615143776, "learning_rate": 0.0005, "loss": 2.1341, "step": 4290 }, { "epoch": 0.01636686129275367, "grad_norm": 0.12010012567043304, "learning_rate": 0.0005, "loss": 2.1578, "step": 4300 }, { "epoch": 0.01640492376087635, "grad_norm": 0.10670970380306244, "learning_rate": 0.0005, "loss": 2.1448, "step": 4310 }, { "epoch": 0.016442986228999035, "grad_norm": 0.11666595935821533, "learning_rate": 0.0005, "loss": 2.1484, "step": 4320 }, { "epoch": 0.016481048697121716, "grad_norm": 0.10902661085128784, "learning_rate": 0.0005, "loss": 2.1483, "step": 4330 }, { "epoch": 0.016519111165244398, "grad_norm": 0.10571938008069992, "learning_rate": 0.0005, "loss": 2.1515, "step": 4340 }, { "epoch": 0.016557173633367082, "grad_norm": 0.10284340381622314, "learning_rate": 0.0005, "loss": 2.1512, "step": 4350 }, { "epoch": 0.016595236101489764, "grad_norm": 0.10644084960222244, "learning_rate": 0.0005, "loss": 2.1468, "step": 4360 }, { "epoch": 0.01663329856961245, "grad_norm": 0.12925571203231812, "learning_rate": 0.0005, "loss": 2.1422, "step": 4370 }, { "epoch": 0.01667136103773513, "grad_norm": 0.11615116149187088, "learning_rate": 0.0005, "loss": 2.1632, "step": 4380 }, { "epoch": 0.016709423505857815, "grad_norm": 0.1273653209209442, "learning_rate": 0.0005, "loss": 2.1595, "step": 4390 }, { "epoch": 0.016747485973980496, "grad_norm": 0.11785674840211868, "learning_rate": 0.0005, "loss": 2.1347, "step": 4400 }, { "epoch": 0.01678554844210318, "grad_norm": 0.10750816017389297, "learning_rate": 0.0005, "loss": 2.1615, "step": 4410 }, { "epoch": 0.016823610910225863, "grad_norm": 0.11100894957780838, "learning_rate": 0.0005, "loss": 2.1483, "step": 4420 }, { "epoch": 0.016861673378348544, "grad_norm": 0.11811844259500504, "learning_rate": 0.0005, "loss": 2.1534, "step": 4430 }, { "epoch": 0.01689973584647123, "grad_norm": 0.12537986040115356, "learning_rate": 0.0005, "loss": 2.1318, "step": 4440 }, { "epoch": 0.01693779831459391, "grad_norm": 0.12277396768331528, "learning_rate": 0.0005, "loss": 2.1477, "step": 4450 }, { "epoch": 0.016975860782716595, "grad_norm": 0.10384443402290344, "learning_rate": 0.0005, "loss": 2.145, "step": 4460 }, { "epoch": 0.017013923250839277, "grad_norm": 0.10271551460027695, "learning_rate": 0.0005, "loss": 2.1392, "step": 4470 }, { "epoch": 0.01705198571896196, "grad_norm": 0.10529985278844833, "learning_rate": 0.0005, "loss": 2.1285, "step": 4480 }, { "epoch": 0.017090048187084643, "grad_norm": 0.1150999516248703, "learning_rate": 0.0005, "loss": 2.1446, "step": 4490 }, { "epoch": 0.017128110655207328, "grad_norm": 0.1289505660533905, "learning_rate": 0.0005, "loss": 2.159, "step": 4500 }, { "epoch": 0.01716617312333001, "grad_norm": 0.12131360918283463, "learning_rate": 0.0005, "loss": 2.1437, "step": 4510 }, { "epoch": 0.01720423559145269, "grad_norm": 0.12527745962142944, "learning_rate": 0.0005, "loss": 2.1413, "step": 4520 }, { "epoch": 0.017242298059575376, "grad_norm": 0.11226322501897812, "learning_rate": 0.0005, "loss": 2.1409, "step": 4530 }, { "epoch": 0.017280360527698057, "grad_norm": 0.11783391982316971, "learning_rate": 0.0005, "loss": 2.1456, "step": 4540 }, { "epoch": 0.017318422995820742, "grad_norm": 0.09702415764331818, "learning_rate": 0.0005, "loss": 2.1331, "step": 4550 }, { "epoch": 0.017356485463943423, "grad_norm": 0.1273190677165985, "learning_rate": 0.0005, "loss": 2.153, "step": 4560 }, { "epoch": 0.017394547932066108, "grad_norm": 0.10814052075147629, "learning_rate": 0.0005, "loss": 2.1375, "step": 4570 }, { "epoch": 0.01743261040018879, "grad_norm": 0.11285334080457687, "learning_rate": 0.0005, "loss": 2.1528, "step": 4580 }, { "epoch": 0.017470672868311474, "grad_norm": 0.1097639873623848, "learning_rate": 0.0005, "loss": 2.1505, "step": 4590 }, { "epoch": 0.017508735336434156, "grad_norm": 0.10255081206560135, "learning_rate": 0.0005, "loss": 2.1509, "step": 4600 }, { "epoch": 0.017546797804556837, "grad_norm": 0.12281888723373413, "learning_rate": 0.0005, "loss": 2.1527, "step": 4610 }, { "epoch": 0.017584860272679522, "grad_norm": 0.10445702821016312, "learning_rate": 0.0005, "loss": 2.1368, "step": 4620 }, { "epoch": 0.017622922740802204, "grad_norm": 0.1302148401737213, "learning_rate": 0.0005, "loss": 2.1328, "step": 4630 }, { "epoch": 0.01766098520892489, "grad_norm": 0.11009400337934494, "learning_rate": 0.0005, "loss": 2.1475, "step": 4640 }, { "epoch": 0.01769904767704757, "grad_norm": 0.11754176765680313, "learning_rate": 0.0005, "loss": 2.1403, "step": 4650 }, { "epoch": 0.017737110145170255, "grad_norm": 0.12567470967769623, "learning_rate": 0.0005, "loss": 2.1248, "step": 4660 }, { "epoch": 0.017775172613292936, "grad_norm": 0.11794472485780716, "learning_rate": 0.0005, "loss": 2.1595, "step": 4670 }, { "epoch": 0.01781323508141562, "grad_norm": 0.11542123556137085, "learning_rate": 0.0005, "loss": 2.1515, "step": 4680 }, { "epoch": 0.017851297549538302, "grad_norm": 0.10805080085992813, "learning_rate": 0.0005, "loss": 2.1428, "step": 4690 }, { "epoch": 0.017889360017660984, "grad_norm": 0.10144393891096115, "learning_rate": 0.0005, "loss": 2.1421, "step": 4700 }, { "epoch": 0.01792742248578367, "grad_norm": 0.10997345298528671, "learning_rate": 0.0005, "loss": 2.1591, "step": 4710 }, { "epoch": 0.01796548495390635, "grad_norm": 0.12483686953783035, "learning_rate": 0.0005, "loss": 2.1287, "step": 4720 }, { "epoch": 0.018003547422029035, "grad_norm": 0.12187418341636658, "learning_rate": 0.0005, "loss": 2.1227, "step": 4730 }, { "epoch": 0.018041609890151716, "grad_norm": 0.10523146390914917, "learning_rate": 0.0005, "loss": 2.1452, "step": 4740 }, { "epoch": 0.0180796723582744, "grad_norm": 0.11619247496128082, "learning_rate": 0.0005, "loss": 2.1262, "step": 4750 }, { "epoch": 0.018117734826397083, "grad_norm": 0.1118154227733612, "learning_rate": 0.0005, "loss": 2.1542, "step": 4760 }, { "epoch": 0.018155797294519768, "grad_norm": 0.12364581227302551, "learning_rate": 0.0005, "loss": 2.1519, "step": 4770 }, { "epoch": 0.01819385976264245, "grad_norm": 0.10529709607362747, "learning_rate": 0.0005, "loss": 2.1432, "step": 4780 }, { "epoch": 0.01823192223076513, "grad_norm": 0.11116600036621094, "learning_rate": 0.0005, "loss": 2.1529, "step": 4790 }, { "epoch": 0.018269984698887815, "grad_norm": 0.10533681511878967, "learning_rate": 0.0005, "loss": 2.1629, "step": 4800 }, { "epoch": 0.018308047167010497, "grad_norm": 0.10860110819339752, "learning_rate": 0.0005, "loss": 2.1602, "step": 4810 }, { "epoch": 0.01834610963513318, "grad_norm": 0.1140654981136322, "learning_rate": 0.0005, "loss": 2.1381, "step": 4820 }, { "epoch": 0.018384172103255863, "grad_norm": 0.1118798777461052, "learning_rate": 0.0005, "loss": 2.1607, "step": 4830 }, { "epoch": 0.018422234571378548, "grad_norm": 0.10930532217025757, "learning_rate": 0.0005, "loss": 2.1507, "step": 4840 }, { "epoch": 0.01846029703950123, "grad_norm": 0.11511654406785965, "learning_rate": 0.0005, "loss": 2.1458, "step": 4850 }, { "epoch": 0.018498359507623914, "grad_norm": 0.12256434559822083, "learning_rate": 0.0005, "loss": 2.1394, "step": 4860 }, { "epoch": 0.018536421975746595, "grad_norm": 0.10739784687757492, "learning_rate": 0.0005, "loss": 2.1544, "step": 4870 }, { "epoch": 0.018574484443869277, "grad_norm": 0.1213674396276474, "learning_rate": 0.0005, "loss": 2.159, "step": 4880 }, { "epoch": 0.018612546911991962, "grad_norm": 0.10939610004425049, "learning_rate": 0.0005, "loss": 2.16, "step": 4890 }, { "epoch": 0.018650609380114643, "grad_norm": 0.12793177366256714, "learning_rate": 0.0005, "loss": 2.1427, "step": 4900 }, { "epoch": 0.018688671848237328, "grad_norm": 0.10633699595928192, "learning_rate": 0.0005, "loss": 2.1476, "step": 4910 }, { "epoch": 0.01872673431636001, "grad_norm": 0.10457268357276917, "learning_rate": 0.0005, "loss": 2.164, "step": 4920 }, { "epoch": 0.018764796784482694, "grad_norm": 0.10391423106193542, "learning_rate": 0.0005, "loss": 2.1287, "step": 4930 }, { "epoch": 0.018802859252605376, "grad_norm": 0.11842848360538483, "learning_rate": 0.0005, "loss": 2.1433, "step": 4940 }, { "epoch": 0.01884092172072806, "grad_norm": 0.10565365105867386, "learning_rate": 0.0005, "loss": 2.1227, "step": 4950 }, { "epoch": 0.018878984188850742, "grad_norm": 0.11975981295108795, "learning_rate": 0.0005, "loss": 2.1542, "step": 4960 }, { "epoch": 0.018917046656973423, "grad_norm": 0.11828301846981049, "learning_rate": 0.0005, "loss": 2.1515, "step": 4970 }, { "epoch": 0.01895510912509611, "grad_norm": 0.11009760200977325, "learning_rate": 0.0005, "loss": 2.1505, "step": 4980 }, { "epoch": 0.01899317159321879, "grad_norm": 0.10771029442548752, "learning_rate": 0.0005, "loss": 2.1286, "step": 4990 }, { "epoch": 0.019031234061341475, "grad_norm": 0.1059429720044136, "learning_rate": 0.0005, "loss": 2.1383, "step": 5000 }, { "epoch": 0.019069296529464156, "grad_norm": 0.12671475112438202, "learning_rate": 0.0005, "loss": 2.1593, "step": 5010 }, { "epoch": 0.01910735899758684, "grad_norm": 0.1201770231127739, "learning_rate": 0.0005, "loss": 2.1395, "step": 5020 }, { "epoch": 0.019145421465709522, "grad_norm": 0.1037500873208046, "learning_rate": 0.0005, "loss": 2.1307, "step": 5030 }, { "epoch": 0.019183483933832204, "grad_norm": 0.11439839750528336, "learning_rate": 0.0005, "loss": 2.1452, "step": 5040 }, { "epoch": 0.01922154640195489, "grad_norm": 0.1112758219242096, "learning_rate": 0.0005, "loss": 2.1582, "step": 5050 }, { "epoch": 0.01925960887007757, "grad_norm": 0.10737047344446182, "learning_rate": 0.0005, "loss": 2.151, "step": 5060 }, { "epoch": 0.019297671338200255, "grad_norm": 0.10944429039955139, "learning_rate": 0.0005, "loss": 2.1324, "step": 5070 }, { "epoch": 0.019335733806322936, "grad_norm": 0.09820980578660965, "learning_rate": 0.0005, "loss": 2.1531, "step": 5080 }, { "epoch": 0.01937379627444562, "grad_norm": 0.12407433986663818, "learning_rate": 0.0005, "loss": 2.1368, "step": 5090 }, { "epoch": 0.019411858742568303, "grad_norm": 0.10912308096885681, "learning_rate": 0.0005, "loss": 2.1322, "step": 5100 }, { "epoch": 0.019449921210690987, "grad_norm": 0.11376690119504929, "learning_rate": 0.0005, "loss": 2.1513, "step": 5110 }, { "epoch": 0.01948798367881367, "grad_norm": 0.10237011313438416, "learning_rate": 0.0005, "loss": 2.1469, "step": 5120 }, { "epoch": 0.01952604614693635, "grad_norm": 0.12656770646572113, "learning_rate": 0.0005, "loss": 2.1534, "step": 5130 }, { "epoch": 0.019564108615059035, "grad_norm": 0.11813058704137802, "learning_rate": 0.0005, "loss": 2.143, "step": 5140 }, { "epoch": 0.019602171083181717, "grad_norm": 0.11121872812509537, "learning_rate": 0.0005, "loss": 2.1561, "step": 5150 }, { "epoch": 0.0196402335513044, "grad_norm": 0.10498569905757904, "learning_rate": 0.0005, "loss": 2.1488, "step": 5160 }, { "epoch": 0.019678296019427083, "grad_norm": 0.12507279217243195, "learning_rate": 0.0005, "loss": 2.1553, "step": 5170 }, { "epoch": 0.019716358487549768, "grad_norm": 0.11539608985185623, "learning_rate": 0.0005, "loss": 2.1582, "step": 5180 }, { "epoch": 0.01975442095567245, "grad_norm": 0.11775651574134827, "learning_rate": 0.0005, "loss": 2.1479, "step": 5190 }, { "epoch": 0.019792483423795134, "grad_norm": 0.1406317800283432, "learning_rate": 0.0005, "loss": 2.1395, "step": 5200 }, { "epoch": 0.019830545891917815, "grad_norm": 0.1074119284749031, "learning_rate": 0.0005, "loss": 2.1484, "step": 5210 }, { "epoch": 0.019868608360040497, "grad_norm": 0.1114739403128624, "learning_rate": 0.0005, "loss": 2.1444, "step": 5220 }, { "epoch": 0.01990667082816318, "grad_norm": 0.107333704829216, "learning_rate": 0.0005, "loss": 2.1494, "step": 5230 }, { "epoch": 0.019944733296285863, "grad_norm": 0.10788275301456451, "learning_rate": 0.0005, "loss": 2.1344, "step": 5240 }, { "epoch": 0.019982795764408548, "grad_norm": 0.11002978682518005, "learning_rate": 0.0005, "loss": 2.1402, "step": 5250 }, { "epoch": 0.02002085823253123, "grad_norm": 0.10800745338201523, "learning_rate": 0.0005, "loss": 2.1429, "step": 5260 }, { "epoch": 0.020058920700653914, "grad_norm": 0.09979681670665741, "learning_rate": 0.0005, "loss": 2.1558, "step": 5270 }, { "epoch": 0.020096983168776596, "grad_norm": 0.10672004520893097, "learning_rate": 0.0005, "loss": 2.14, "step": 5280 }, { "epoch": 0.02013504563689928, "grad_norm": 0.10285453498363495, "learning_rate": 0.0005, "loss": 2.1467, "step": 5290 }, { "epoch": 0.020173108105021962, "grad_norm": 0.11391220986843109, "learning_rate": 0.0005, "loss": 2.1541, "step": 5300 }, { "epoch": 0.020211170573144643, "grad_norm": 0.1035693883895874, "learning_rate": 0.0005, "loss": 2.1424, "step": 5310 }, { "epoch": 0.020249233041267328, "grad_norm": 0.0980655699968338, "learning_rate": 0.0005, "loss": 2.1456, "step": 5320 }, { "epoch": 0.02028729550939001, "grad_norm": 0.10977238416671753, "learning_rate": 0.0005, "loss": 2.144, "step": 5330 }, { "epoch": 0.020325357977512695, "grad_norm": 0.10729347169399261, "learning_rate": 0.0005, "loss": 2.1511, "step": 5340 }, { "epoch": 0.020363420445635376, "grad_norm": 0.11655550450086594, "learning_rate": 0.0005, "loss": 2.1498, "step": 5350 }, { "epoch": 0.02040148291375806, "grad_norm": 0.10931521654129028, "learning_rate": 0.0005, "loss": 2.1308, "step": 5360 }, { "epoch": 0.020439545381880742, "grad_norm": 0.119588702917099, "learning_rate": 0.0005, "loss": 2.1523, "step": 5370 }, { "epoch": 0.020477607850003427, "grad_norm": 0.12008684128522873, "learning_rate": 0.0005, "loss": 2.1304, "step": 5380 }, { "epoch": 0.02051567031812611, "grad_norm": 0.10880262404680252, "learning_rate": 0.0005, "loss": 2.1393, "step": 5390 }, { "epoch": 0.02055373278624879, "grad_norm": 0.12208819389343262, "learning_rate": 0.0005, "loss": 2.155, "step": 5400 }, { "epoch": 0.020591795254371475, "grad_norm": 0.11892218887805939, "learning_rate": 0.0005, "loss": 2.1546, "step": 5410 }, { "epoch": 0.020629857722494156, "grad_norm": 0.10370012372732162, "learning_rate": 0.0005, "loss": 2.1424, "step": 5420 }, { "epoch": 0.02066792019061684, "grad_norm": 0.11022347956895828, "learning_rate": 0.0005, "loss": 2.1576, "step": 5430 }, { "epoch": 0.020705982658739523, "grad_norm": 0.11062967777252197, "learning_rate": 0.0005, "loss": 2.1431, "step": 5440 }, { "epoch": 0.020744045126862207, "grad_norm": 0.09825879335403442, "learning_rate": 0.0005, "loss": 2.1466, "step": 5450 }, { "epoch": 0.02078210759498489, "grad_norm": 0.10920194536447525, "learning_rate": 0.0005, "loss": 2.1221, "step": 5460 }, { "epoch": 0.020820170063107574, "grad_norm": 0.10575275868177414, "learning_rate": 0.0005, "loss": 2.1436, "step": 5470 }, { "epoch": 0.020858232531230255, "grad_norm": 0.11406227946281433, "learning_rate": 0.0005, "loss": 2.16, "step": 5480 }, { "epoch": 0.020896294999352936, "grad_norm": 0.11863389611244202, "learning_rate": 0.0005, "loss": 2.152, "step": 5490 }, { "epoch": 0.02093435746747562, "grad_norm": 0.10407883673906326, "learning_rate": 0.0005, "loss": 2.1453, "step": 5500 }, { "epoch": 0.020972419935598303, "grad_norm": 0.1140441820025444, "learning_rate": 0.0005, "loss": 2.1491, "step": 5510 }, { "epoch": 0.021010482403720988, "grad_norm": 0.11707664281129837, "learning_rate": 0.0005, "loss": 2.1476, "step": 5520 }, { "epoch": 0.02104854487184367, "grad_norm": 0.11001245677471161, "learning_rate": 0.0005, "loss": 2.1254, "step": 5530 }, { "epoch": 0.021086607339966354, "grad_norm": 0.12250164896249771, "learning_rate": 0.0005, "loss": 2.1532, "step": 5540 }, { "epoch": 0.021124669808089035, "grad_norm": 0.11971727758646011, "learning_rate": 0.0005, "loss": 2.1386, "step": 5550 }, { "epoch": 0.02116273227621172, "grad_norm": 0.10103358328342438, "learning_rate": 0.0005, "loss": 2.1341, "step": 5560 }, { "epoch": 0.0212007947443344, "grad_norm": 0.10445115715265274, "learning_rate": 0.0005, "loss": 2.1468, "step": 5570 }, { "epoch": 0.021238857212457083, "grad_norm": 0.10383953154087067, "learning_rate": 0.0005, "loss": 2.1568, "step": 5580 }, { "epoch": 0.021276919680579768, "grad_norm": 0.12046512961387634, "learning_rate": 0.0005, "loss": 2.1551, "step": 5590 }, { "epoch": 0.02131498214870245, "grad_norm": 0.11003930121660233, "learning_rate": 0.0005, "loss": 2.1411, "step": 5600 }, { "epoch": 0.021353044616825134, "grad_norm": 0.1069195419549942, "learning_rate": 0.0005, "loss": 2.1491, "step": 5610 }, { "epoch": 0.021391107084947816, "grad_norm": 0.12431596964597702, "learning_rate": 0.0005, "loss": 2.1462, "step": 5620 }, { "epoch": 0.0214291695530705, "grad_norm": 0.1099320501089096, "learning_rate": 0.0005, "loss": 2.1371, "step": 5630 }, { "epoch": 0.021467232021193182, "grad_norm": 0.12026389688253403, "learning_rate": 0.0005, "loss": 2.1366, "step": 5640 }, { "epoch": 0.021505294489315867, "grad_norm": 0.10532669723033905, "learning_rate": 0.0005, "loss": 2.1427, "step": 5650 }, { "epoch": 0.021543356957438548, "grad_norm": 0.10554736107587814, "learning_rate": 0.0005, "loss": 2.1487, "step": 5660 }, { "epoch": 0.02158141942556123, "grad_norm": 0.11826571822166443, "learning_rate": 0.0005, "loss": 2.1459, "step": 5670 }, { "epoch": 0.021619481893683914, "grad_norm": 0.10795663297176361, "learning_rate": 0.0005, "loss": 2.1453, "step": 5680 }, { "epoch": 0.021657544361806596, "grad_norm": 0.11151348054409027, "learning_rate": 0.0005, "loss": 2.1475, "step": 5690 }, { "epoch": 0.02169560682992928, "grad_norm": 0.10803024470806122, "learning_rate": 0.0005, "loss": 2.1503, "step": 5700 }, { "epoch": 0.021733669298051962, "grad_norm": 0.13073943555355072, "learning_rate": 0.0005, "loss": 2.1417, "step": 5710 }, { "epoch": 0.021771731766174647, "grad_norm": 0.11931940913200378, "learning_rate": 0.0005, "loss": 2.1545, "step": 5720 }, { "epoch": 0.02180979423429733, "grad_norm": 0.1153026893734932, "learning_rate": 0.0005, "loss": 2.1488, "step": 5730 }, { "epoch": 0.021847856702420013, "grad_norm": 0.11887199431657791, "learning_rate": 0.0005, "loss": 2.1294, "step": 5740 }, { "epoch": 0.021885919170542695, "grad_norm": 0.1110357940196991, "learning_rate": 0.0005, "loss": 2.1381, "step": 5750 }, { "epoch": 0.021923981638665376, "grad_norm": 0.13553187251091003, "learning_rate": 0.0005, "loss": 2.1479, "step": 5760 }, { "epoch": 0.02196204410678806, "grad_norm": 0.10310986638069153, "learning_rate": 0.0005, "loss": 2.1425, "step": 5770 }, { "epoch": 0.022000106574910742, "grad_norm": 0.11090600490570068, "learning_rate": 0.0005, "loss": 2.1591, "step": 5780 }, { "epoch": 0.022038169043033427, "grad_norm": 0.1099453866481781, "learning_rate": 0.0005, "loss": 2.1287, "step": 5790 }, { "epoch": 0.02207623151115611, "grad_norm": 0.10403940826654434, "learning_rate": 0.0005, "loss": 2.145, "step": 5800 }, { "epoch": 0.022114293979278794, "grad_norm": 0.113205686211586, "learning_rate": 0.0005, "loss": 2.1586, "step": 5810 }, { "epoch": 0.022152356447401475, "grad_norm": 0.12265747785568237, "learning_rate": 0.0005, "loss": 2.1512, "step": 5820 }, { "epoch": 0.02219041891552416, "grad_norm": 0.1367434561252594, "learning_rate": 0.0005, "loss": 2.1396, "step": 5830 }, { "epoch": 0.02222848138364684, "grad_norm": 0.11390835791826248, "learning_rate": 0.0005, "loss": 2.1448, "step": 5840 }, { "epoch": 0.022266543851769523, "grad_norm": 0.11761996150016785, "learning_rate": 0.0005, "loss": 2.1428, "step": 5850 }, { "epoch": 0.022304606319892208, "grad_norm": 0.10379810631275177, "learning_rate": 0.0005, "loss": 2.1504, "step": 5860 }, { "epoch": 0.02234266878801489, "grad_norm": 0.12037274986505508, "learning_rate": 0.0005, "loss": 2.138, "step": 5870 }, { "epoch": 0.022380731256137574, "grad_norm": 0.11196594685316086, "learning_rate": 0.0005, "loss": 2.1507, "step": 5880 }, { "epoch": 0.022418793724260255, "grad_norm": 0.1106754019856453, "learning_rate": 0.0005, "loss": 2.122, "step": 5890 }, { "epoch": 0.02245685619238294, "grad_norm": 0.11773858219385147, "learning_rate": 0.0005, "loss": 2.131, "step": 5900 }, { "epoch": 0.02249491866050562, "grad_norm": 0.11126694828271866, "learning_rate": 0.0005, "loss": 2.1563, "step": 5910 }, { "epoch": 0.022532981128628306, "grad_norm": 0.10651341825723648, "learning_rate": 0.0005, "loss": 2.1435, "step": 5920 }, { "epoch": 0.022571043596750988, "grad_norm": 0.12159726768732071, "learning_rate": 0.0005, "loss": 2.1443, "step": 5930 }, { "epoch": 0.02260910606487367, "grad_norm": 0.10570742934942245, "learning_rate": 0.0005, "loss": 2.1545, "step": 5940 }, { "epoch": 0.022647168532996354, "grad_norm": 0.1210443377494812, "learning_rate": 0.0005, "loss": 2.1559, "step": 5950 }, { "epoch": 0.022685231001119036, "grad_norm": 0.12900283932685852, "learning_rate": 0.0005, "loss": 2.1613, "step": 5960 }, { "epoch": 0.02272329346924172, "grad_norm": 0.10966738313436508, "learning_rate": 0.0005, "loss": 2.1219, "step": 5970 }, { "epoch": 0.022761355937364402, "grad_norm": 0.10426750034093857, "learning_rate": 0.0005, "loss": 2.1468, "step": 5980 }, { "epoch": 0.022799418405487087, "grad_norm": 0.12919247150421143, "learning_rate": 0.0005, "loss": 2.1431, "step": 5990 }, { "epoch": 0.022837480873609768, "grad_norm": 0.13425137102603912, "learning_rate": 0.0005, "loss": 2.1427, "step": 6000 }, { "epoch": 0.022875543341732453, "grad_norm": 0.09929858148097992, "learning_rate": 0.0005, "loss": 2.1488, "step": 6010 }, { "epoch": 0.022913605809855134, "grad_norm": 0.10623922199010849, "learning_rate": 0.0005, "loss": 2.1411, "step": 6020 }, { "epoch": 0.022951668277977816, "grad_norm": 0.10986531525850296, "learning_rate": 0.0005, "loss": 2.1449, "step": 6030 }, { "epoch": 0.0229897307461005, "grad_norm": 0.11614275723695755, "learning_rate": 0.0005, "loss": 2.1424, "step": 6040 }, { "epoch": 0.023027793214223182, "grad_norm": 0.12838366627693176, "learning_rate": 0.0005, "loss": 2.1452, "step": 6050 }, { "epoch": 0.023065855682345867, "grad_norm": 0.10890116542577744, "learning_rate": 0.0005, "loss": 2.1407, "step": 6060 }, { "epoch": 0.02310391815046855, "grad_norm": 0.10804659128189087, "learning_rate": 0.0005, "loss": 2.1509, "step": 6070 }, { "epoch": 0.023141980618591233, "grad_norm": 0.1182192713022232, "learning_rate": 0.0005, "loss": 2.1557, "step": 6080 }, { "epoch": 0.023180043086713915, "grad_norm": 0.11064272373914719, "learning_rate": 0.0005, "loss": 2.1439, "step": 6090 }, { "epoch": 0.023218105554836596, "grad_norm": 0.10852929204702377, "learning_rate": 0.0005, "loss": 2.1391, "step": 6100 }, { "epoch": 0.02325616802295928, "grad_norm": 0.11034633219242096, "learning_rate": 0.0005, "loss": 2.1343, "step": 6110 }, { "epoch": 0.023294230491081962, "grad_norm": 0.1135808527469635, "learning_rate": 0.0005, "loss": 2.1466, "step": 6120 }, { "epoch": 0.023332292959204647, "grad_norm": 0.11563987284898758, "learning_rate": 0.0005, "loss": 2.1304, "step": 6130 }, { "epoch": 0.02337035542732733, "grad_norm": 0.10818443447351456, "learning_rate": 0.0005, "loss": 2.1432, "step": 6140 }, { "epoch": 0.023408417895450014, "grad_norm": 0.11164136230945587, "learning_rate": 0.0005, "loss": 2.1527, "step": 6150 }, { "epoch": 0.023446480363572695, "grad_norm": 0.12221473455429077, "learning_rate": 0.0005, "loss": 2.151, "step": 6160 }, { "epoch": 0.02348454283169538, "grad_norm": 0.10707166790962219, "learning_rate": 0.0005, "loss": 2.1443, "step": 6170 }, { "epoch": 0.02352260529981806, "grad_norm": 0.10560087114572525, "learning_rate": 0.0005, "loss": 2.1482, "step": 6180 }, { "epoch": 0.023560667767940743, "grad_norm": 0.12548519670963287, "learning_rate": 0.0005, "loss": 2.1444, "step": 6190 }, { "epoch": 0.023598730236063428, "grad_norm": 0.1081615537405014, "learning_rate": 0.0005, "loss": 2.1615, "step": 6200 }, { "epoch": 0.02363679270418611, "grad_norm": 0.11263230443000793, "learning_rate": 0.0005, "loss": 2.1631, "step": 6210 }, { "epoch": 0.023674855172308794, "grad_norm": 0.12632977962493896, "learning_rate": 0.0005, "loss": 2.1531, "step": 6220 }, { "epoch": 0.023712917640431475, "grad_norm": 0.12329486012458801, "learning_rate": 0.0005, "loss": 2.135, "step": 6230 }, { "epoch": 0.02375098010855416, "grad_norm": 0.11623676121234894, "learning_rate": 0.0005, "loss": 2.1521, "step": 6240 }, { "epoch": 0.02378904257667684, "grad_norm": 0.1167716458439827, "learning_rate": 0.0005, "loss": 2.1459, "step": 6250 }, { "epoch": 0.023827105044799526, "grad_norm": 0.12007655948400497, "learning_rate": 0.0005, "loss": 2.1567, "step": 6260 }, { "epoch": 0.023865167512922208, "grad_norm": 0.11647479981184006, "learning_rate": 0.0005, "loss": 2.1235, "step": 6270 }, { "epoch": 0.02390322998104489, "grad_norm": 0.12291767448186874, "learning_rate": 0.0005, "loss": 2.1509, "step": 6280 }, { "epoch": 0.023941292449167574, "grad_norm": 0.11392831802368164, "learning_rate": 0.0005, "loss": 2.142, "step": 6290 }, { "epoch": 0.023979354917290256, "grad_norm": 0.13158921897411346, "learning_rate": 0.0005, "loss": 2.1372, "step": 6300 }, { "epoch": 0.02401741738541294, "grad_norm": 0.11344356834888458, "learning_rate": 0.0005, "loss": 2.1504, "step": 6310 }, { "epoch": 0.024055479853535622, "grad_norm": 0.1382695436477661, "learning_rate": 0.0005, "loss": 2.1425, "step": 6320 }, { "epoch": 0.024093542321658307, "grad_norm": 0.11335012316703796, "learning_rate": 0.0005, "loss": 2.1313, "step": 6330 }, { "epoch": 0.024131604789780988, "grad_norm": 0.11333515495061874, "learning_rate": 0.0005, "loss": 2.1595, "step": 6340 }, { "epoch": 0.024169667257903673, "grad_norm": 0.12147689610719681, "learning_rate": 0.0005, "loss": 2.124, "step": 6350 }, { "epoch": 0.024207729726026354, "grad_norm": 0.1204022616147995, "learning_rate": 0.0005, "loss": 2.154, "step": 6360 }, { "epoch": 0.024245792194149036, "grad_norm": 0.11988788098096848, "learning_rate": 0.0005, "loss": 2.1419, "step": 6370 }, { "epoch": 0.02428385466227172, "grad_norm": 0.11760450154542923, "learning_rate": 0.0005, "loss": 2.1459, "step": 6380 }, { "epoch": 0.024321917130394402, "grad_norm": 0.11097273975610733, "learning_rate": 0.0005, "loss": 2.1432, "step": 6390 }, { "epoch": 0.024359979598517087, "grad_norm": 0.11635033041238785, "learning_rate": 0.0005, "loss": 2.1388, "step": 6400 }, { "epoch": 0.02439804206663977, "grad_norm": 0.10954947024583817, "learning_rate": 0.0005, "loss": 2.1267, "step": 6410 }, { "epoch": 0.024436104534762453, "grad_norm": 0.12753041088581085, "learning_rate": 0.0005, "loss": 2.1472, "step": 6420 }, { "epoch": 0.024474167002885135, "grad_norm": 0.11430566757917404, "learning_rate": 0.0005, "loss": 2.1434, "step": 6430 }, { "epoch": 0.02451222947100782, "grad_norm": 0.10539361089468002, "learning_rate": 0.0005, "loss": 2.1494, "step": 6440 }, { "epoch": 0.0245502919391305, "grad_norm": 0.10601361095905304, "learning_rate": 0.0005, "loss": 2.1452, "step": 6450 }, { "epoch": 0.024588354407253182, "grad_norm": 0.12343835085630417, "learning_rate": 0.0005, "loss": 2.142, "step": 6460 }, { "epoch": 0.024626416875375867, "grad_norm": 0.10459932684898376, "learning_rate": 0.0005, "loss": 2.1549, "step": 6470 }, { "epoch": 0.02466447934349855, "grad_norm": 0.11193633824586868, "learning_rate": 0.0005, "loss": 2.1554, "step": 6480 }, { "epoch": 0.024702541811621233, "grad_norm": 0.11788009852170944, "learning_rate": 0.0005, "loss": 2.1509, "step": 6490 }, { "epoch": 0.024740604279743915, "grad_norm": 0.11227643489837646, "learning_rate": 0.0005, "loss": 2.1634, "step": 6500 }, { "epoch": 0.0247786667478666, "grad_norm": 0.1095656156539917, "learning_rate": 0.0005, "loss": 2.1344, "step": 6510 }, { "epoch": 0.02481672921598928, "grad_norm": 0.10564127564430237, "learning_rate": 0.0005, "loss": 2.1404, "step": 6520 }, { "epoch": 0.024854791684111966, "grad_norm": 0.11981156468391418, "learning_rate": 0.0005, "loss": 2.1556, "step": 6530 }, { "epoch": 0.024892854152234647, "grad_norm": 0.11325754225254059, "learning_rate": 0.0005, "loss": 2.1384, "step": 6540 }, { "epoch": 0.02493091662035733, "grad_norm": 0.13337786495685577, "learning_rate": 0.0005, "loss": 2.1394, "step": 6550 }, { "epoch": 0.024968979088480014, "grad_norm": 0.13114260137081146, "learning_rate": 0.0005, "loss": 2.1411, "step": 6560 }, { "epoch": 0.025007041556602695, "grad_norm": 0.1203683391213417, "learning_rate": 0.0005, "loss": 2.1504, "step": 6570 }, { "epoch": 0.02504510402472538, "grad_norm": 0.11538281291723251, "learning_rate": 0.0005, "loss": 2.143, "step": 6580 }, { "epoch": 0.02508316649284806, "grad_norm": 0.10961015522480011, "learning_rate": 0.0005, "loss": 2.1586, "step": 6590 }, { "epoch": 0.025121228960970746, "grad_norm": 0.11965086311101913, "learning_rate": 0.0005, "loss": 2.1382, "step": 6600 }, { "epoch": 0.025159291429093428, "grad_norm": 0.11759531497955322, "learning_rate": 0.0005, "loss": 2.1294, "step": 6610 }, { "epoch": 0.025197353897216113, "grad_norm": 0.1089395061135292, "learning_rate": 0.0005, "loss": 2.1533, "step": 6620 }, { "epoch": 0.025235416365338794, "grad_norm": 0.11554133892059326, "learning_rate": 0.0005, "loss": 2.1356, "step": 6630 }, { "epoch": 0.025273478833461475, "grad_norm": 0.10535012185573578, "learning_rate": 0.0005, "loss": 2.1452, "step": 6640 }, { "epoch": 0.02531154130158416, "grad_norm": 0.12574630975723267, "learning_rate": 0.0005, "loss": 2.152, "step": 6650 }, { "epoch": 0.02534960376970684, "grad_norm": 0.12262838333845139, "learning_rate": 0.0005, "loss": 2.1407, "step": 6660 }, { "epoch": 0.025387666237829527, "grad_norm": 0.1025639995932579, "learning_rate": 0.0005, "loss": 2.1456, "step": 6670 }, { "epoch": 0.025425728705952208, "grad_norm": 0.11699458956718445, "learning_rate": 0.0005, "loss": 2.1421, "step": 6680 }, { "epoch": 0.025463791174074893, "grad_norm": 0.10782221704721451, "learning_rate": 0.0005, "loss": 2.1523, "step": 6690 }, { "epoch": 0.025501853642197574, "grad_norm": 0.11212996393442154, "learning_rate": 0.0005, "loss": 2.1264, "step": 6700 }, { "epoch": 0.02553991611032026, "grad_norm": 0.10972334444522858, "learning_rate": 0.0005, "loss": 2.1326, "step": 6710 }, { "epoch": 0.02557797857844294, "grad_norm": 0.11672057956457138, "learning_rate": 0.0005, "loss": 2.136, "step": 6720 }, { "epoch": 0.025616041046565622, "grad_norm": 0.1198444738984108, "learning_rate": 0.0005, "loss": 2.1546, "step": 6730 }, { "epoch": 0.025654103514688307, "grad_norm": 0.10473254323005676, "learning_rate": 0.0005, "loss": 2.1549, "step": 6740 }, { "epoch": 0.02569216598281099, "grad_norm": 0.11385004222393036, "learning_rate": 0.0005, "loss": 2.1523, "step": 6750 }, { "epoch": 0.025730228450933673, "grad_norm": 0.11270426958799362, "learning_rate": 0.0005, "loss": 2.1369, "step": 6760 }, { "epoch": 0.025768290919056355, "grad_norm": 0.11007201671600342, "learning_rate": 0.0005, "loss": 2.142, "step": 6770 }, { "epoch": 0.02580635338717904, "grad_norm": 0.11610470712184906, "learning_rate": 0.0005, "loss": 2.1376, "step": 6780 }, { "epoch": 0.02584441585530172, "grad_norm": 0.12364150583744049, "learning_rate": 0.0005, "loss": 2.1564, "step": 6790 }, { "epoch": 0.025882478323424406, "grad_norm": 0.1113385483622551, "learning_rate": 0.0005, "loss": 2.1457, "step": 6800 }, { "epoch": 0.025920540791547087, "grad_norm": 0.1211201623082161, "learning_rate": 0.0005, "loss": 2.1555, "step": 6810 }, { "epoch": 0.02595860325966977, "grad_norm": 0.12166136503219604, "learning_rate": 0.0005, "loss": 2.1463, "step": 6820 }, { "epoch": 0.025996665727792453, "grad_norm": 0.10655366629362106, "learning_rate": 0.0005, "loss": 2.1371, "step": 6830 }, { "epoch": 0.026034728195915135, "grad_norm": 0.10823944956064224, "learning_rate": 0.0005, "loss": 2.1426, "step": 6840 }, { "epoch": 0.02607279066403782, "grad_norm": 0.12336534261703491, "learning_rate": 0.0005, "loss": 2.1441, "step": 6850 }, { "epoch": 0.0261108531321605, "grad_norm": 0.11191314458847046, "learning_rate": 0.0005, "loss": 2.1513, "step": 6860 }, { "epoch": 0.026148915600283186, "grad_norm": 0.10364817082881927, "learning_rate": 0.0005, "loss": 2.1567, "step": 6870 }, { "epoch": 0.026186978068405867, "grad_norm": 0.12202759087085724, "learning_rate": 0.0005, "loss": 2.1601, "step": 6880 }, { "epoch": 0.026225040536528552, "grad_norm": 0.11444476246833801, "learning_rate": 0.0005, "loss": 2.1334, "step": 6890 }, { "epoch": 0.026263103004651234, "grad_norm": 0.107620969414711, "learning_rate": 0.0005, "loss": 2.1491, "step": 6900 }, { "epoch": 0.026301165472773915, "grad_norm": 0.1263957917690277, "learning_rate": 0.0005, "loss": 2.1333, "step": 6910 }, { "epoch": 0.0263392279408966, "grad_norm": 0.11235121637582779, "learning_rate": 0.0005, "loss": 2.1288, "step": 6920 }, { "epoch": 0.02637729040901928, "grad_norm": 0.1108112782239914, "learning_rate": 0.0005, "loss": 2.1505, "step": 6930 }, { "epoch": 0.026415352877141966, "grad_norm": 0.11218473315238953, "learning_rate": 0.0005, "loss": 2.1429, "step": 6940 }, { "epoch": 0.026453415345264648, "grad_norm": 0.10671050101518631, "learning_rate": 0.0005, "loss": 2.14, "step": 6950 }, { "epoch": 0.026491477813387333, "grad_norm": 0.11324463784694672, "learning_rate": 0.0005, "loss": 2.1407, "step": 6960 }, { "epoch": 0.026529540281510014, "grad_norm": 0.1347011923789978, "learning_rate": 0.0005, "loss": 2.1375, "step": 6970 }, { "epoch": 0.0265676027496327, "grad_norm": 0.13093167543411255, "learning_rate": 0.0005, "loss": 2.1402, "step": 6980 }, { "epoch": 0.02660566521775538, "grad_norm": 0.11753138154745102, "learning_rate": 0.0005, "loss": 2.1423, "step": 6990 }, { "epoch": 0.02664372768587806, "grad_norm": 0.10647093504667282, "learning_rate": 0.0005, "loss": 2.1311, "step": 7000 }, { "epoch": 0.026681790154000747, "grad_norm": 0.11827953904867172, "learning_rate": 0.0005, "loss": 2.1455, "step": 7010 }, { "epoch": 0.026719852622123428, "grad_norm": 0.12453864514827728, "learning_rate": 0.0005, "loss": 2.1484, "step": 7020 }, { "epoch": 0.026757915090246113, "grad_norm": 0.12062682956457138, "learning_rate": 0.0005, "loss": 2.1282, "step": 7030 }, { "epoch": 0.026795977558368794, "grad_norm": 0.11530686914920807, "learning_rate": 0.0005, "loss": 2.1471, "step": 7040 }, { "epoch": 0.02683404002649148, "grad_norm": 0.1173919066786766, "learning_rate": 0.0005, "loss": 2.1427, "step": 7050 }, { "epoch": 0.02687210249461416, "grad_norm": 0.11233177781105042, "learning_rate": 0.0005, "loss": 2.1438, "step": 7060 }, { "epoch": 0.026910164962736845, "grad_norm": 0.12039361894130707, "learning_rate": 0.0005, "loss": 2.15, "step": 7070 }, { "epoch": 0.026948227430859527, "grad_norm": 0.11644244939088821, "learning_rate": 0.0005, "loss": 2.1445, "step": 7080 }, { "epoch": 0.026986289898982208, "grad_norm": 0.12728439271450043, "learning_rate": 0.0005, "loss": 2.1544, "step": 7090 }, { "epoch": 0.027024352367104893, "grad_norm": 0.11197049170732498, "learning_rate": 0.0005, "loss": 2.1524, "step": 7100 }, { "epoch": 0.027062414835227575, "grad_norm": 0.1252804547548294, "learning_rate": 0.0005, "loss": 2.1272, "step": 7110 }, { "epoch": 0.02710047730335026, "grad_norm": 0.11560700088739395, "learning_rate": 0.0005, "loss": 2.1474, "step": 7120 }, { "epoch": 0.02713853977147294, "grad_norm": 0.1106269583106041, "learning_rate": 0.0005, "loss": 2.1408, "step": 7130 }, { "epoch": 0.027176602239595626, "grad_norm": 0.10807859152555466, "learning_rate": 0.0005, "loss": 2.1314, "step": 7140 }, { "epoch": 0.027214664707718307, "grad_norm": 0.13045553863048553, "learning_rate": 0.0005, "loss": 2.1484, "step": 7150 }, { "epoch": 0.02725272717584099, "grad_norm": 0.1218976378440857, "learning_rate": 0.0005, "loss": 2.1511, "step": 7160 }, { "epoch": 0.027290789643963673, "grad_norm": 0.11346330493688583, "learning_rate": 0.0005, "loss": 2.1359, "step": 7170 }, { "epoch": 0.027328852112086355, "grad_norm": 0.1056937724351883, "learning_rate": 0.0005, "loss": 2.1416, "step": 7180 }, { "epoch": 0.02736691458020904, "grad_norm": 0.1118604838848114, "learning_rate": 0.0005, "loss": 2.14, "step": 7190 }, { "epoch": 0.02740497704833172, "grad_norm": 0.10969972610473633, "learning_rate": 0.0005, "loss": 2.1361, "step": 7200 }, { "epoch": 0.027443039516454406, "grad_norm": 0.11284728348255157, "learning_rate": 0.0005, "loss": 2.1341, "step": 7210 }, { "epoch": 0.027481101984577087, "grad_norm": 0.11114726215600967, "learning_rate": 0.0005, "loss": 2.1367, "step": 7220 }, { "epoch": 0.027519164452699772, "grad_norm": 0.116744764149189, "learning_rate": 0.0005, "loss": 2.1454, "step": 7230 }, { "epoch": 0.027557226920822454, "grad_norm": 0.1106627881526947, "learning_rate": 0.0005, "loss": 2.1535, "step": 7240 }, { "epoch": 0.027595289388945135, "grad_norm": 0.10879063606262207, "learning_rate": 0.0005, "loss": 2.1479, "step": 7250 }, { "epoch": 0.02763335185706782, "grad_norm": 0.11433703452348709, "learning_rate": 0.0005, "loss": 2.1377, "step": 7260 }, { "epoch": 0.0276714143251905, "grad_norm": 0.11956377327442169, "learning_rate": 0.0005, "loss": 2.1422, "step": 7270 }, { "epoch": 0.027709476793313186, "grad_norm": 0.10950538516044617, "learning_rate": 0.0005, "loss": 2.1506, "step": 7280 }, { "epoch": 0.027747539261435868, "grad_norm": 0.10610220581293106, "learning_rate": 0.0005, "loss": 2.1387, "step": 7290 }, { "epoch": 0.027785601729558553, "grad_norm": 0.10162237286567688, "learning_rate": 0.0005, "loss": 2.1399, "step": 7300 }, { "epoch": 0.027823664197681234, "grad_norm": 0.11824820935726166, "learning_rate": 0.0005, "loss": 2.1439, "step": 7310 }, { "epoch": 0.02786172666580392, "grad_norm": 0.11466916650533676, "learning_rate": 0.0005, "loss": 2.1444, "step": 7320 }, { "epoch": 0.0278997891339266, "grad_norm": 0.11753802001476288, "learning_rate": 0.0005, "loss": 2.1332, "step": 7330 }, { "epoch": 0.02793785160204928, "grad_norm": 0.10082371532917023, "learning_rate": 0.0005, "loss": 2.153, "step": 7340 }, { "epoch": 0.027975914070171966, "grad_norm": 0.1257477104663849, "learning_rate": 0.0005, "loss": 2.1366, "step": 7350 }, { "epoch": 0.028013976538294648, "grad_norm": 0.1157645434141159, "learning_rate": 0.0005, "loss": 2.1589, "step": 7360 }, { "epoch": 0.028052039006417333, "grad_norm": 0.12373010814189911, "learning_rate": 0.0005, "loss": 2.1384, "step": 7370 }, { "epoch": 0.028090101474540014, "grad_norm": 0.12133149057626724, "learning_rate": 0.0005, "loss": 2.1316, "step": 7380 }, { "epoch": 0.0281281639426627, "grad_norm": 0.108877994120121, "learning_rate": 0.0005, "loss": 2.1403, "step": 7390 }, { "epoch": 0.02816622641078538, "grad_norm": 0.1152234897017479, "learning_rate": 0.0005, "loss": 2.1439, "step": 7400 }, { "epoch": 0.028204288878908065, "grad_norm": 0.10108889639377594, "learning_rate": 0.0005, "loss": 2.1688, "step": 7410 }, { "epoch": 0.028242351347030747, "grad_norm": 0.11246947199106216, "learning_rate": 0.0005, "loss": 2.1365, "step": 7420 }, { "epoch": 0.028280413815153428, "grad_norm": 0.11029767245054245, "learning_rate": 0.0005, "loss": 2.1389, "step": 7430 }, { "epoch": 0.028318476283276113, "grad_norm": 0.10918702930212021, "learning_rate": 0.0005, "loss": 2.1443, "step": 7440 }, { "epoch": 0.028356538751398794, "grad_norm": 0.12176518887281418, "learning_rate": 0.0005, "loss": 2.133, "step": 7450 }, { "epoch": 0.02839460121952148, "grad_norm": 0.12156303226947784, "learning_rate": 0.0005, "loss": 2.1603, "step": 7460 }, { "epoch": 0.02843266368764416, "grad_norm": 0.11835929751396179, "learning_rate": 0.0005, "loss": 2.1522, "step": 7470 }, { "epoch": 0.028470726155766846, "grad_norm": 0.10707549750804901, "learning_rate": 0.0005, "loss": 2.157, "step": 7480 }, { "epoch": 0.028508788623889527, "grad_norm": 0.11295874416828156, "learning_rate": 0.0005, "loss": 2.15, "step": 7490 }, { "epoch": 0.028546851092012212, "grad_norm": 0.11424469202756882, "learning_rate": 0.0005, "loss": 2.1493, "step": 7500 }, { "epoch": 0.028584913560134893, "grad_norm": 0.12551772594451904, "learning_rate": 0.0005, "loss": 2.1348, "step": 7510 }, { "epoch": 0.028622976028257575, "grad_norm": 0.11593035608530045, "learning_rate": 0.0005, "loss": 2.1547, "step": 7520 }, { "epoch": 0.02866103849638026, "grad_norm": 0.1208166629076004, "learning_rate": 0.0005, "loss": 2.1465, "step": 7530 }, { "epoch": 0.02869910096450294, "grad_norm": 0.11421196162700653, "learning_rate": 0.0005, "loss": 2.1358, "step": 7540 }, { "epoch": 0.028737163432625626, "grad_norm": 0.1322488635778427, "learning_rate": 0.0005, "loss": 2.1568, "step": 7550 }, { "epoch": 0.028775225900748307, "grad_norm": 0.13235977292060852, "learning_rate": 0.0005, "loss": 2.1376, "step": 7560 }, { "epoch": 0.028813288368870992, "grad_norm": 0.10404398292303085, "learning_rate": 0.0005, "loss": 2.1533, "step": 7570 }, { "epoch": 0.028851350836993674, "grad_norm": 0.1253490298986435, "learning_rate": 0.0005, "loss": 2.1291, "step": 7580 }, { "epoch": 0.02888941330511636, "grad_norm": 0.11406102031469345, "learning_rate": 0.0005, "loss": 2.1477, "step": 7590 }, { "epoch": 0.02892747577323904, "grad_norm": 0.11629606783390045, "learning_rate": 0.0005, "loss": 2.1377, "step": 7600 }, { "epoch": 0.02896553824136172, "grad_norm": 0.10625175386667252, "learning_rate": 0.0005, "loss": 2.1469, "step": 7610 }, { "epoch": 0.029003600709484406, "grad_norm": 0.11212033778429031, "learning_rate": 0.0005, "loss": 2.1504, "step": 7620 }, { "epoch": 0.029041663177607088, "grad_norm": 0.10987796634435654, "learning_rate": 0.0005, "loss": 2.137, "step": 7630 }, { "epoch": 0.029079725645729772, "grad_norm": 0.10258632153272629, "learning_rate": 0.0005, "loss": 2.1297, "step": 7640 }, { "epoch": 0.029117788113852454, "grad_norm": 0.10536502301692963, "learning_rate": 0.0005, "loss": 2.143, "step": 7650 }, { "epoch": 0.02915585058197514, "grad_norm": 0.10387527942657471, "learning_rate": 0.0005, "loss": 2.1433, "step": 7660 }, { "epoch": 0.02919391305009782, "grad_norm": 0.10539811849594116, "learning_rate": 0.0005, "loss": 2.1436, "step": 7670 }, { "epoch": 0.029231975518220505, "grad_norm": 0.1249329149723053, "learning_rate": 0.0005, "loss": 2.1508, "step": 7680 }, { "epoch": 0.029270037986343186, "grad_norm": 0.12725548446178436, "learning_rate": 0.0005, "loss": 2.1526, "step": 7690 }, { "epoch": 0.029308100454465868, "grad_norm": 0.12263181805610657, "learning_rate": 0.0005, "loss": 2.1282, "step": 7700 }, { "epoch": 0.029346162922588553, "grad_norm": 0.1168946847319603, "learning_rate": 0.0005, "loss": 2.1388, "step": 7710 }, { "epoch": 0.029384225390711234, "grad_norm": 0.12111609429121017, "learning_rate": 0.0005, "loss": 2.1323, "step": 7720 }, { "epoch": 0.02942228785883392, "grad_norm": 0.10218352824449539, "learning_rate": 0.0005, "loss": 2.1377, "step": 7730 }, { "epoch": 0.0294603503269566, "grad_norm": 0.10925023257732391, "learning_rate": 0.0005, "loss": 2.1473, "step": 7740 }, { "epoch": 0.029498412795079285, "grad_norm": 0.12212564051151276, "learning_rate": 0.0005, "loss": 2.1335, "step": 7750 }, { "epoch": 0.029536475263201967, "grad_norm": 0.10729344934225082, "learning_rate": 0.0005, "loss": 2.1386, "step": 7760 }, { "epoch": 0.02957453773132465, "grad_norm": 0.10953337699174881, "learning_rate": 0.0005, "loss": 2.1422, "step": 7770 }, { "epoch": 0.029612600199447333, "grad_norm": 0.11217037588357925, "learning_rate": 0.0005, "loss": 2.1513, "step": 7780 }, { "epoch": 0.029650662667570014, "grad_norm": 0.11673374474048615, "learning_rate": 0.0005, "loss": 2.1371, "step": 7790 }, { "epoch": 0.0296887251356927, "grad_norm": 0.11757118254899979, "learning_rate": 0.0005, "loss": 2.1509, "step": 7800 }, { "epoch": 0.02972678760381538, "grad_norm": 0.10758315771818161, "learning_rate": 0.0005, "loss": 2.1535, "step": 7810 }, { "epoch": 0.029764850071938066, "grad_norm": 0.11497870832681656, "learning_rate": 0.0005, "loss": 2.1466, "step": 7820 }, { "epoch": 0.029802912540060747, "grad_norm": 0.12000583857297897, "learning_rate": 0.0005, "loss": 2.136, "step": 7830 }, { "epoch": 0.029840975008183432, "grad_norm": 0.12263140082359314, "learning_rate": 0.0005, "loss": 2.1507, "step": 7840 }, { "epoch": 0.029879037476306113, "grad_norm": 0.11573194712400436, "learning_rate": 0.0005, "loss": 2.1496, "step": 7850 }, { "epoch": 0.029917099944428798, "grad_norm": 0.111940398812294, "learning_rate": 0.0005, "loss": 2.1327, "step": 7860 }, { "epoch": 0.02995516241255148, "grad_norm": 0.11048493534326553, "learning_rate": 0.0005, "loss": 2.1428, "step": 7870 }, { "epoch": 0.02999322488067416, "grad_norm": 0.11425173282623291, "learning_rate": 0.0005, "loss": 2.1433, "step": 7880 }, { "epoch": 0.030031287348796846, "grad_norm": 0.14338618516921997, "learning_rate": 0.0005, "loss": 2.1555, "step": 7890 }, { "epoch": 0.030069349816919527, "grad_norm": 0.12020622938871384, "learning_rate": 0.0005, "loss": 2.1625, "step": 7900 }, { "epoch": 0.030107412285042212, "grad_norm": 0.11354848742485046, "learning_rate": 0.0005, "loss": 2.1351, "step": 7910 }, { "epoch": 0.030145474753164894, "grad_norm": 0.11214245855808258, "learning_rate": 0.0005, "loss": 2.1422, "step": 7920 }, { "epoch": 0.03018353722128758, "grad_norm": 0.1082155704498291, "learning_rate": 0.0005, "loss": 2.156, "step": 7930 }, { "epoch": 0.03022159968941026, "grad_norm": 0.11786253750324249, "learning_rate": 0.0005, "loss": 2.1545, "step": 7940 }, { "epoch": 0.030259662157532945, "grad_norm": 0.11564178764820099, "learning_rate": 0.0005, "loss": 2.1406, "step": 7950 }, { "epoch": 0.030297724625655626, "grad_norm": 0.11741790175437927, "learning_rate": 0.0005, "loss": 2.1451, "step": 7960 }, { "epoch": 0.030335787093778308, "grad_norm": 0.13863076269626617, "learning_rate": 0.0005, "loss": 2.151, "step": 7970 }, { "epoch": 0.030373849561900992, "grad_norm": 0.11684587597846985, "learning_rate": 0.0005, "loss": 2.1308, "step": 7980 }, { "epoch": 0.030411912030023674, "grad_norm": 0.11619321256875992, "learning_rate": 0.0005, "loss": 2.1623, "step": 7990 }, { "epoch": 0.03044997449814636, "grad_norm": 0.11050112545490265, "learning_rate": 0.0005, "loss": 2.1441, "step": 8000 }, { "epoch": 0.03048803696626904, "grad_norm": 0.11222852766513824, "learning_rate": 0.0005, "loss": 2.146, "step": 8010 }, { "epoch": 0.030526099434391725, "grad_norm": 0.10987085849046707, "learning_rate": 0.0005, "loss": 2.1437, "step": 8020 }, { "epoch": 0.030564161902514406, "grad_norm": 0.11533662676811218, "learning_rate": 0.0005, "loss": 2.1449, "step": 8030 }, { "epoch": 0.03060222437063709, "grad_norm": 0.11021203547716141, "learning_rate": 0.0005, "loss": 2.1413, "step": 8040 }, { "epoch": 0.030640286838759773, "grad_norm": 0.10626066476106644, "learning_rate": 0.0005, "loss": 2.157, "step": 8050 }, { "epoch": 0.030678349306882454, "grad_norm": 0.10716816037893295, "learning_rate": 0.0005, "loss": 2.134, "step": 8060 }, { "epoch": 0.03071641177500514, "grad_norm": 0.14796046912670135, "learning_rate": 0.0005, "loss": 2.1474, "step": 8070 }, { "epoch": 0.03075447424312782, "grad_norm": 0.11453462392091751, "learning_rate": 0.0005, "loss": 2.141, "step": 8080 }, { "epoch": 0.030792536711250505, "grad_norm": 0.10463167726993561, "learning_rate": 0.0005, "loss": 2.1471, "step": 8090 }, { "epoch": 0.030830599179373187, "grad_norm": 0.10290495306253433, "learning_rate": 0.0005, "loss": 2.1343, "step": 8100 }, { "epoch": 0.03086866164749587, "grad_norm": 0.10581576824188232, "learning_rate": 0.0005, "loss": 2.1367, "step": 8110 }, { "epoch": 0.030906724115618553, "grad_norm": 0.12016568332910538, "learning_rate": 0.0005, "loss": 2.132, "step": 8120 }, { "epoch": 0.030944786583741238, "grad_norm": 0.11537013202905655, "learning_rate": 0.0005, "loss": 2.1295, "step": 8130 }, { "epoch": 0.03098284905186392, "grad_norm": 0.12919427454471588, "learning_rate": 0.0005, "loss": 2.1442, "step": 8140 }, { "epoch": 0.0310209115199866, "grad_norm": 0.11007408797740936, "learning_rate": 0.0005, "loss": 2.1367, "step": 8150 }, { "epoch": 0.031058973988109286, "grad_norm": 0.11892461031675339, "learning_rate": 0.0005, "loss": 2.1517, "step": 8160 }, { "epoch": 0.031097036456231967, "grad_norm": 0.17755889892578125, "learning_rate": 0.0005, "loss": 2.1713, "step": 8170 }, { "epoch": 0.031135098924354652, "grad_norm": 0.11239070445299149, "learning_rate": 0.0005, "loss": 2.1545, "step": 8180 }, { "epoch": 0.031173161392477333, "grad_norm": 0.1028556078672409, "learning_rate": 0.0005, "loss": 2.1573, "step": 8190 }, { "epoch": 0.031211223860600018, "grad_norm": 0.1157098338007927, "learning_rate": 0.0005, "loss": 2.1426, "step": 8200 }, { "epoch": 0.0312492863287227, "grad_norm": 0.10809467732906342, "learning_rate": 0.0005, "loss": 2.1322, "step": 8210 }, { "epoch": 0.03128734879684538, "grad_norm": 0.11528951674699783, "learning_rate": 0.0005, "loss": 2.1467, "step": 8220 }, { "epoch": 0.031325411264968066, "grad_norm": 0.12987327575683594, "learning_rate": 0.0005, "loss": 2.1535, "step": 8230 }, { "epoch": 0.03136347373309075, "grad_norm": 0.11531908810138702, "learning_rate": 0.0005, "loss": 2.1537, "step": 8240 }, { "epoch": 0.03140153620121343, "grad_norm": 0.10050345212221146, "learning_rate": 0.0005, "loss": 2.1433, "step": 8250 }, { "epoch": 0.03143959866933611, "grad_norm": 0.12242735922336578, "learning_rate": 0.0005, "loss": 2.1493, "step": 8260 }, { "epoch": 0.0314776611374588, "grad_norm": 0.1069076880812645, "learning_rate": 0.0005, "loss": 2.1484, "step": 8270 }, { "epoch": 0.03151572360558148, "grad_norm": 0.1232587918639183, "learning_rate": 0.0005, "loss": 2.1481, "step": 8280 }, { "epoch": 0.03155378607370416, "grad_norm": 0.10760942846536636, "learning_rate": 0.0005, "loss": 2.1377, "step": 8290 }, { "epoch": 0.031591848541826846, "grad_norm": 0.1072341799736023, "learning_rate": 0.0005, "loss": 2.1412, "step": 8300 }, { "epoch": 0.03162991100994953, "grad_norm": 0.11002219468355179, "learning_rate": 0.0005, "loss": 2.1458, "step": 8310 }, { "epoch": 0.03166797347807221, "grad_norm": 0.11687692254781723, "learning_rate": 0.0005, "loss": 2.1488, "step": 8320 }, { "epoch": 0.031706035946194894, "grad_norm": 0.11089745163917542, "learning_rate": 0.0005, "loss": 2.1358, "step": 8330 }, { "epoch": 0.03174409841431758, "grad_norm": 0.11608854681253433, "learning_rate": 0.0005, "loss": 2.1526, "step": 8340 }, { "epoch": 0.031782160882440263, "grad_norm": 0.10124436765909195, "learning_rate": 0.0005, "loss": 2.1463, "step": 8350 }, { "epoch": 0.03182022335056294, "grad_norm": 0.10724233835935593, "learning_rate": 0.0005, "loss": 2.1484, "step": 8360 }, { "epoch": 0.031858285818685626, "grad_norm": 0.11886536329984665, "learning_rate": 0.0005, "loss": 2.1481, "step": 8370 }, { "epoch": 0.03189634828680831, "grad_norm": 0.11546676605939865, "learning_rate": 0.0005, "loss": 2.14, "step": 8380 }, { "epoch": 0.031934410754930996, "grad_norm": 0.12106689810752869, "learning_rate": 0.0005, "loss": 2.153, "step": 8390 }, { "epoch": 0.031972473223053674, "grad_norm": 0.11836668848991394, "learning_rate": 0.0005, "loss": 2.1377, "step": 8400 }, { "epoch": 0.03201053569117636, "grad_norm": 0.10874088108539581, "learning_rate": 0.0005, "loss": 2.1282, "step": 8410 }, { "epoch": 0.032048598159299044, "grad_norm": 0.1145242378115654, "learning_rate": 0.0005, "loss": 2.1295, "step": 8420 }, { "epoch": 0.03208666062742172, "grad_norm": 0.11752122640609741, "learning_rate": 0.0005, "loss": 2.1277, "step": 8430 }, { "epoch": 0.03212472309554441, "grad_norm": 0.12958693504333496, "learning_rate": 0.0005, "loss": 2.1475, "step": 8440 }, { "epoch": 0.03216278556366709, "grad_norm": 0.11110221594572067, "learning_rate": 0.0005, "loss": 2.1379, "step": 8450 }, { "epoch": 0.032200848031789776, "grad_norm": 0.11337503045797348, "learning_rate": 0.0005, "loss": 2.1556, "step": 8460 }, { "epoch": 0.032238910499912454, "grad_norm": 0.11539135873317719, "learning_rate": 0.0005, "loss": 2.1439, "step": 8470 }, { "epoch": 0.03227697296803514, "grad_norm": 0.10884083807468414, "learning_rate": 0.0005, "loss": 2.1506, "step": 8480 }, { "epoch": 0.032315035436157824, "grad_norm": 0.12452396005392075, "learning_rate": 0.0005, "loss": 2.1256, "step": 8490 }, { "epoch": 0.0323530979042805, "grad_norm": 0.1131415143609047, "learning_rate": 0.0005, "loss": 2.1457, "step": 8500 }, { "epoch": 0.03239116037240319, "grad_norm": 0.11396261304616928, "learning_rate": 0.0005, "loss": 2.1516, "step": 8510 }, { "epoch": 0.03242922284052587, "grad_norm": 0.12674427032470703, "learning_rate": 0.0005, "loss": 2.1559, "step": 8520 }, { "epoch": 0.03246728530864856, "grad_norm": 0.11402904242277145, "learning_rate": 0.0005, "loss": 2.1367, "step": 8530 }, { "epoch": 0.032505347776771235, "grad_norm": 0.10981621593236923, "learning_rate": 0.0005, "loss": 2.1492, "step": 8540 }, { "epoch": 0.03254341024489392, "grad_norm": 0.10459432005882263, "learning_rate": 0.0005, "loss": 2.1424, "step": 8550 }, { "epoch": 0.032581472713016604, "grad_norm": 0.10308747738599777, "learning_rate": 0.0005, "loss": 2.15, "step": 8560 }, { "epoch": 0.03261953518113929, "grad_norm": 0.11720939725637436, "learning_rate": 0.0005, "loss": 2.1351, "step": 8570 }, { "epoch": 0.03265759764926197, "grad_norm": 0.11396331340074539, "learning_rate": 0.0005, "loss": 2.1418, "step": 8580 }, { "epoch": 0.03269566011738465, "grad_norm": 0.11162128299474716, "learning_rate": 0.0005, "loss": 2.1432, "step": 8590 }, { "epoch": 0.03273372258550734, "grad_norm": 0.12489422410726547, "learning_rate": 0.0005, "loss": 2.1395, "step": 8600 }, { "epoch": 0.032771785053630015, "grad_norm": 0.10979968309402466, "learning_rate": 0.0005, "loss": 2.1429, "step": 8610 }, { "epoch": 0.0328098475217527, "grad_norm": 0.11844202876091003, "learning_rate": 0.0005, "loss": 2.17, "step": 8620 }, { "epoch": 0.032847909989875385, "grad_norm": 0.11446519196033478, "learning_rate": 0.0005, "loss": 2.1443, "step": 8630 }, { "epoch": 0.03288597245799807, "grad_norm": 0.12458764016628265, "learning_rate": 0.0005, "loss": 2.1505, "step": 8640 }, { "epoch": 0.03292403492612075, "grad_norm": 0.2882729470729828, "learning_rate": 0.0005, "loss": 2.1591, "step": 8650 }, { "epoch": 0.03296209739424343, "grad_norm": 0.11263883113861084, "learning_rate": 0.0005, "loss": 2.145, "step": 8660 }, { "epoch": 0.03300015986236612, "grad_norm": 0.13356174528598785, "learning_rate": 0.0005, "loss": 2.1446, "step": 8670 }, { "epoch": 0.033038222330488795, "grad_norm": 0.10944783687591553, "learning_rate": 0.0005, "loss": 2.1393, "step": 8680 }, { "epoch": 0.03307628479861148, "grad_norm": 0.12128116935491562, "learning_rate": 0.0005, "loss": 2.1399, "step": 8690 }, { "epoch": 0.033114347266734165, "grad_norm": 0.11913850903511047, "learning_rate": 0.0005, "loss": 2.1427, "step": 8700 }, { "epoch": 0.03315240973485685, "grad_norm": 0.11542215943336487, "learning_rate": 0.0005, "loss": 2.1427, "step": 8710 }, { "epoch": 0.03319047220297953, "grad_norm": 0.11794381588697433, "learning_rate": 0.0005, "loss": 2.1389, "step": 8720 }, { "epoch": 0.03322853467110221, "grad_norm": 0.1318274885416031, "learning_rate": 0.0005, "loss": 2.1545, "step": 8730 }, { "epoch": 0.0332665971392249, "grad_norm": 0.11836835741996765, "learning_rate": 0.0005, "loss": 2.14, "step": 8740 }, { "epoch": 0.03330465960734758, "grad_norm": 0.12125727534294128, "learning_rate": 0.0005, "loss": 2.1462, "step": 8750 }, { "epoch": 0.03334272207547026, "grad_norm": 0.1427885890007019, "learning_rate": 0.0005, "loss": 2.149, "step": 8760 }, { "epoch": 0.033380784543592945, "grad_norm": 0.1247292086482048, "learning_rate": 0.0005, "loss": 2.1337, "step": 8770 }, { "epoch": 0.03341884701171563, "grad_norm": 0.10335478186607361, "learning_rate": 0.0005, "loss": 2.1439, "step": 8780 }, { "epoch": 0.03345690947983831, "grad_norm": 0.11352815479040146, "learning_rate": 0.0005, "loss": 2.1398, "step": 8790 }, { "epoch": 0.03349497194796099, "grad_norm": 0.12018328905105591, "learning_rate": 0.0005, "loss": 2.1509, "step": 8800 }, { "epoch": 0.03353303441608368, "grad_norm": 0.11159548163414001, "learning_rate": 0.0005, "loss": 2.1439, "step": 8810 }, { "epoch": 0.03357109688420636, "grad_norm": 0.11696930229663849, "learning_rate": 0.0005, "loss": 2.1464, "step": 8820 }, { "epoch": 0.03360915935232904, "grad_norm": 0.1082887277007103, "learning_rate": 0.0005, "loss": 2.1387, "step": 8830 }, { "epoch": 0.033647221820451725, "grad_norm": 0.12014048546552658, "learning_rate": 0.0005, "loss": 2.1568, "step": 8840 }, { "epoch": 0.03368528428857441, "grad_norm": 0.1180727481842041, "learning_rate": 0.0005, "loss": 2.1405, "step": 8850 }, { "epoch": 0.03372334675669709, "grad_norm": 0.10631722211837769, "learning_rate": 0.0005, "loss": 2.1454, "step": 8860 }, { "epoch": 0.03376140922481977, "grad_norm": 0.1066332533955574, "learning_rate": 0.0005, "loss": 2.1275, "step": 8870 }, { "epoch": 0.03379947169294246, "grad_norm": 0.10460060834884644, "learning_rate": 0.0005, "loss": 2.1301, "step": 8880 }, { "epoch": 0.03383753416106514, "grad_norm": 0.11816377192735672, "learning_rate": 0.0005, "loss": 2.1408, "step": 8890 }, { "epoch": 0.03387559662918782, "grad_norm": 0.12219863384962082, "learning_rate": 0.0005, "loss": 2.1514, "step": 8900 }, { "epoch": 0.033913659097310506, "grad_norm": 0.12654411792755127, "learning_rate": 0.0005, "loss": 2.1283, "step": 8910 }, { "epoch": 0.03395172156543319, "grad_norm": 0.10688552260398865, "learning_rate": 0.0005, "loss": 2.1463, "step": 8920 }, { "epoch": 0.03398978403355587, "grad_norm": 0.13524161279201508, "learning_rate": 0.0005, "loss": 2.1327, "step": 8930 }, { "epoch": 0.03402784650167855, "grad_norm": 0.1091911792755127, "learning_rate": 0.0005, "loss": 2.1431, "step": 8940 }, { "epoch": 0.03406590896980124, "grad_norm": 0.12308672070503235, "learning_rate": 0.0005, "loss": 2.1367, "step": 8950 }, { "epoch": 0.03410397143792392, "grad_norm": 0.11753041297197342, "learning_rate": 0.0005, "loss": 2.1424, "step": 8960 }, { "epoch": 0.0341420339060466, "grad_norm": 0.11694305390119553, "learning_rate": 0.0005, "loss": 2.1501, "step": 8970 }, { "epoch": 0.034180096374169286, "grad_norm": 0.12045048922300339, "learning_rate": 0.0005, "loss": 2.1365, "step": 8980 }, { "epoch": 0.03421815884229197, "grad_norm": 0.11517022550106049, "learning_rate": 0.0005, "loss": 2.1468, "step": 8990 }, { "epoch": 0.034256221310414656, "grad_norm": 0.13119100034236908, "learning_rate": 0.0005, "loss": 2.1403, "step": 9000 }, { "epoch": 0.034294283778537334, "grad_norm": 0.2477245330810547, "learning_rate": 0.0005, "loss": 2.1287, "step": 9010 }, { "epoch": 0.03433234624666002, "grad_norm": 0.11066543310880661, "learning_rate": 0.0005, "loss": 2.1408, "step": 9020 }, { "epoch": 0.0343704087147827, "grad_norm": 0.1192685142159462, "learning_rate": 0.0005, "loss": 2.148, "step": 9030 }, { "epoch": 0.03440847118290538, "grad_norm": 0.11177363246679306, "learning_rate": 0.0005, "loss": 2.1442, "step": 9040 }, { "epoch": 0.034446533651028066, "grad_norm": 0.1382625252008438, "learning_rate": 0.0005, "loss": 2.1504, "step": 9050 }, { "epoch": 0.03448459611915075, "grad_norm": 0.11589354276657104, "learning_rate": 0.0005, "loss": 2.1601, "step": 9060 }, { "epoch": 0.034522658587273436, "grad_norm": 0.13153360784053802, "learning_rate": 0.0005, "loss": 2.1466, "step": 9070 }, { "epoch": 0.034560721055396114, "grad_norm": 0.11433520168066025, "learning_rate": 0.0005, "loss": 2.1305, "step": 9080 }, { "epoch": 0.0345987835235188, "grad_norm": 0.13263995945453644, "learning_rate": 0.0005, "loss": 2.1496, "step": 9090 }, { "epoch": 0.034636845991641484, "grad_norm": 0.11930017918348312, "learning_rate": 0.0005, "loss": 2.1435, "step": 9100 }, { "epoch": 0.03467490845976416, "grad_norm": 0.10451359301805496, "learning_rate": 0.0005, "loss": 2.1497, "step": 9110 }, { "epoch": 0.034712970927886846, "grad_norm": 0.12230315804481506, "learning_rate": 0.0005, "loss": 2.143, "step": 9120 }, { "epoch": 0.03475103339600953, "grad_norm": 0.10477308928966522, "learning_rate": 0.0005, "loss": 2.1438, "step": 9130 }, { "epoch": 0.034789095864132216, "grad_norm": 0.1123172715306282, "learning_rate": 0.0005, "loss": 2.144, "step": 9140 }, { "epoch": 0.034827158332254894, "grad_norm": 0.12633569538593292, "learning_rate": 0.0005, "loss": 2.1348, "step": 9150 }, { "epoch": 0.03486522080037758, "grad_norm": 0.11606360226869583, "learning_rate": 0.0005, "loss": 2.1477, "step": 9160 }, { "epoch": 0.034903283268500264, "grad_norm": 0.1049937903881073, "learning_rate": 0.0005, "loss": 2.1543, "step": 9170 }, { "epoch": 0.03494134573662295, "grad_norm": 0.12141606211662292, "learning_rate": 0.0005, "loss": 2.1457, "step": 9180 }, { "epoch": 0.03497940820474563, "grad_norm": 0.10848421603441238, "learning_rate": 0.0005, "loss": 2.14, "step": 9190 }, { "epoch": 0.03501747067286831, "grad_norm": 0.12193699181079865, "learning_rate": 0.0005, "loss": 2.144, "step": 9200 }, { "epoch": 0.035055533140990996, "grad_norm": 0.10787303745746613, "learning_rate": 0.0005, "loss": 2.1311, "step": 9210 }, { "epoch": 0.035093595609113674, "grad_norm": 0.10789080709218979, "learning_rate": 0.0005, "loss": 2.146, "step": 9220 }, { "epoch": 0.03513165807723636, "grad_norm": 0.1300131380558014, "learning_rate": 0.0005, "loss": 2.1409, "step": 9230 }, { "epoch": 0.035169720545359044, "grad_norm": 0.11685086041688919, "learning_rate": 0.0005, "loss": 2.1401, "step": 9240 }, { "epoch": 0.03520778301348173, "grad_norm": 0.10747739672660828, "learning_rate": 0.0005, "loss": 2.1638, "step": 9250 }, { "epoch": 0.03524584548160441, "grad_norm": 0.12577669322490692, "learning_rate": 0.0005, "loss": 2.1525, "step": 9260 }, { "epoch": 0.03528390794972709, "grad_norm": 0.12838762998580933, "learning_rate": 0.0005, "loss": 2.1318, "step": 9270 }, { "epoch": 0.03532197041784978, "grad_norm": 0.10830754786729813, "learning_rate": 0.0005, "loss": 2.1327, "step": 9280 }, { "epoch": 0.035360032885972455, "grad_norm": 0.11010642349720001, "learning_rate": 0.0005, "loss": 2.1513, "step": 9290 }, { "epoch": 0.03539809535409514, "grad_norm": 0.11120496690273285, "learning_rate": 0.0005, "loss": 2.15, "step": 9300 }, { "epoch": 0.035436157822217824, "grad_norm": 0.10505425930023193, "learning_rate": 0.0005, "loss": 2.1404, "step": 9310 }, { "epoch": 0.03547422029034051, "grad_norm": 0.11274050921201706, "learning_rate": 0.0005, "loss": 2.1376, "step": 9320 }, { "epoch": 0.03551228275846319, "grad_norm": 0.1192653551697731, "learning_rate": 0.0005, "loss": 2.1542, "step": 9330 }, { "epoch": 0.03555034522658587, "grad_norm": 0.10686255246400833, "learning_rate": 0.0005, "loss": 2.1368, "step": 9340 }, { "epoch": 0.03558840769470856, "grad_norm": 0.1253276765346527, "learning_rate": 0.0005, "loss": 2.1293, "step": 9350 }, { "epoch": 0.03562647016283124, "grad_norm": 0.11075481027364731, "learning_rate": 0.0005, "loss": 2.139, "step": 9360 }, { "epoch": 0.03566453263095392, "grad_norm": 0.11865369975566864, "learning_rate": 0.0005, "loss": 2.1486, "step": 9370 }, { "epoch": 0.035702595099076605, "grad_norm": 0.10745938122272491, "learning_rate": 0.0005, "loss": 2.1584, "step": 9380 }, { "epoch": 0.03574065756719929, "grad_norm": 0.11748365312814713, "learning_rate": 0.0005, "loss": 2.1552, "step": 9390 }, { "epoch": 0.03577872003532197, "grad_norm": 0.11585269123315811, "learning_rate": 0.0005, "loss": 2.1345, "step": 9400 }, { "epoch": 0.03581678250344465, "grad_norm": 0.10678518563508987, "learning_rate": 0.0005, "loss": 2.1507, "step": 9410 }, { "epoch": 0.03585484497156734, "grad_norm": 0.11283028870820999, "learning_rate": 0.0005, "loss": 2.1477, "step": 9420 }, { "epoch": 0.03589290743969002, "grad_norm": 0.11182880401611328, "learning_rate": 0.0005, "loss": 2.1459, "step": 9430 }, { "epoch": 0.0359309699078127, "grad_norm": 0.11779739707708359, "learning_rate": 0.0005, "loss": 2.1379, "step": 9440 }, { "epoch": 0.035969032375935385, "grad_norm": 0.11326213926076889, "learning_rate": 0.0005, "loss": 2.1439, "step": 9450 }, { "epoch": 0.03600709484405807, "grad_norm": 0.11592642962932587, "learning_rate": 0.0005, "loss": 2.1435, "step": 9460 }, { "epoch": 0.03604515731218075, "grad_norm": 0.11186369508504868, "learning_rate": 0.0005, "loss": 2.1403, "step": 9470 }, { "epoch": 0.03608321978030343, "grad_norm": 0.11607187241315842, "learning_rate": 0.0005, "loss": 2.1537, "step": 9480 }, { "epoch": 0.03612128224842612, "grad_norm": 0.1025259718298912, "learning_rate": 0.0005, "loss": 2.143, "step": 9490 }, { "epoch": 0.0361593447165488, "grad_norm": 0.10464241355657578, "learning_rate": 0.0005, "loss": 2.1487, "step": 9500 }, { "epoch": 0.03619740718467148, "grad_norm": 0.1146213710308075, "learning_rate": 0.0005, "loss": 2.1477, "step": 9510 }, { "epoch": 0.036235469652794165, "grad_norm": 0.11801069229841232, "learning_rate": 0.0005, "loss": 2.1423, "step": 9520 }, { "epoch": 0.03627353212091685, "grad_norm": 0.10072599351406097, "learning_rate": 0.0005, "loss": 2.1511, "step": 9530 }, { "epoch": 0.036311594589039535, "grad_norm": 0.11770542711019516, "learning_rate": 0.0005, "loss": 2.1462, "step": 9540 }, { "epoch": 0.03634965705716221, "grad_norm": 0.1119447574019432, "learning_rate": 0.0005, "loss": 2.1299, "step": 9550 }, { "epoch": 0.0363877195252849, "grad_norm": 0.12693637609481812, "learning_rate": 0.0005, "loss": 2.1275, "step": 9560 }, { "epoch": 0.03642578199340758, "grad_norm": 0.12111397087574005, "learning_rate": 0.0005, "loss": 2.1445, "step": 9570 }, { "epoch": 0.03646384446153026, "grad_norm": 0.11317116022109985, "learning_rate": 0.0005, "loss": 2.1336, "step": 9580 }, { "epoch": 0.036501906929652946, "grad_norm": 0.12426373362541199, "learning_rate": 0.0005, "loss": 2.1382, "step": 9590 }, { "epoch": 0.03653996939777563, "grad_norm": 0.11448558419942856, "learning_rate": 0.0005, "loss": 2.141, "step": 9600 }, { "epoch": 0.036578031865898315, "grad_norm": 0.1049763485789299, "learning_rate": 0.0005, "loss": 2.1362, "step": 9610 }, { "epoch": 0.03661609433402099, "grad_norm": 0.12384191900491714, "learning_rate": 0.0005, "loss": 2.1417, "step": 9620 }, { "epoch": 0.03665415680214368, "grad_norm": 0.12874995172023773, "learning_rate": 0.0005, "loss": 2.1293, "step": 9630 }, { "epoch": 0.03669221927026636, "grad_norm": 0.11989755183458328, "learning_rate": 0.0005, "loss": 2.1451, "step": 9640 }, { "epoch": 0.03673028173838904, "grad_norm": 0.11521641165018082, "learning_rate": 0.0005, "loss": 2.1534, "step": 9650 }, { "epoch": 0.036768344206511726, "grad_norm": 0.10849806666374207, "learning_rate": 0.0005, "loss": 2.1531, "step": 9660 }, { "epoch": 0.03680640667463441, "grad_norm": 0.10426240414381027, "learning_rate": 0.0005, "loss": 2.1451, "step": 9670 }, { "epoch": 0.036844469142757096, "grad_norm": 0.11630252748727798, "learning_rate": 0.0005, "loss": 2.1553, "step": 9680 }, { "epoch": 0.036882531610879773, "grad_norm": 0.1050904244184494, "learning_rate": 0.0005, "loss": 2.1346, "step": 9690 }, { "epoch": 0.03692059407900246, "grad_norm": 0.14695580303668976, "learning_rate": 0.0005, "loss": 2.1423, "step": 9700 }, { "epoch": 0.03695865654712514, "grad_norm": 0.11704635620117188, "learning_rate": 0.0005, "loss": 2.1394, "step": 9710 }, { "epoch": 0.03699671901524783, "grad_norm": 0.11972527205944061, "learning_rate": 0.0005, "loss": 2.1386, "step": 9720 }, { "epoch": 0.037034781483370506, "grad_norm": 0.11107856035232544, "learning_rate": 0.0005, "loss": 2.1533, "step": 9730 }, { "epoch": 0.03707284395149319, "grad_norm": 0.11058147251605988, "learning_rate": 0.0005, "loss": 2.1386, "step": 9740 }, { "epoch": 0.037110906419615876, "grad_norm": 0.12540003657341003, "learning_rate": 0.0005, "loss": 2.1339, "step": 9750 }, { "epoch": 0.037148968887738554, "grad_norm": 0.10699540376663208, "learning_rate": 0.0005, "loss": 2.1313, "step": 9760 }, { "epoch": 0.03718703135586124, "grad_norm": 0.11579212546348572, "learning_rate": 0.0005, "loss": 2.1445, "step": 9770 }, { "epoch": 0.037225093823983924, "grad_norm": 0.10480821877717972, "learning_rate": 0.0005, "loss": 2.1523, "step": 9780 }, { "epoch": 0.03726315629210661, "grad_norm": 0.11196145415306091, "learning_rate": 0.0005, "loss": 2.1433, "step": 9790 }, { "epoch": 0.037301218760229286, "grad_norm": 0.12401875853538513, "learning_rate": 0.0005, "loss": 2.136, "step": 9800 }, { "epoch": 0.03733928122835197, "grad_norm": 0.11669589579105377, "learning_rate": 0.0005, "loss": 2.1541, "step": 9810 }, { "epoch": 0.037377343696474656, "grad_norm": 0.10399942845106125, "learning_rate": 0.0005, "loss": 2.1516, "step": 9820 }, { "epoch": 0.037415406164597334, "grad_norm": 0.12019993364810944, "learning_rate": 0.0005, "loss": 2.1353, "step": 9830 }, { "epoch": 0.03745346863272002, "grad_norm": 0.11188608407974243, "learning_rate": 0.0005, "loss": 2.1512, "step": 9840 }, { "epoch": 0.037491531100842704, "grad_norm": 0.11040358990430832, "learning_rate": 0.0005, "loss": 2.1433, "step": 9850 }, { "epoch": 0.03752959356896539, "grad_norm": 0.1256285011768341, "learning_rate": 0.0005, "loss": 2.1435, "step": 9860 }, { "epoch": 0.03756765603708807, "grad_norm": 0.1065913662314415, "learning_rate": 0.0005, "loss": 2.1588, "step": 9870 }, { "epoch": 0.03760571850521075, "grad_norm": 0.11596047133207321, "learning_rate": 0.0005, "loss": 2.1385, "step": 9880 }, { "epoch": 0.037643780973333436, "grad_norm": 0.108096644282341, "learning_rate": 0.0005, "loss": 2.1524, "step": 9890 }, { "epoch": 0.03768184344145612, "grad_norm": 0.11330022662878036, "learning_rate": 0.0005, "loss": 2.1296, "step": 9900 }, { "epoch": 0.0377199059095788, "grad_norm": 0.5385860800743103, "learning_rate": 0.0005, "loss": 2.1555, "step": 9910 }, { "epoch": 0.037757968377701484, "grad_norm": 0.1062675416469574, "learning_rate": 0.0005, "loss": 2.1442, "step": 9920 }, { "epoch": 0.03779603084582417, "grad_norm": 0.11074505001306534, "learning_rate": 0.0005, "loss": 2.1497, "step": 9930 }, { "epoch": 0.03783409331394685, "grad_norm": 0.12226357311010361, "learning_rate": 0.0005, "loss": 2.1448, "step": 9940 }, { "epoch": 0.03787215578206953, "grad_norm": 0.11123912781476974, "learning_rate": 0.0005, "loss": 2.1222, "step": 9950 }, { "epoch": 0.03791021825019222, "grad_norm": 0.14550866186618805, "learning_rate": 0.0005, "loss": 2.1396, "step": 9960 }, { "epoch": 0.0379482807183149, "grad_norm": 0.12066611647605896, "learning_rate": 0.0005, "loss": 2.1176, "step": 9970 }, { "epoch": 0.03798634318643758, "grad_norm": 0.12039341777563095, "learning_rate": 0.0005, "loss": 2.1388, "step": 9980 }, { "epoch": 0.038024405654560264, "grad_norm": 0.11662400513887405, "learning_rate": 0.0005, "loss": 2.1386, "step": 9990 }, { "epoch": 0.03806246812268295, "grad_norm": 0.12345922738313675, "learning_rate": 0.0005, "loss": 2.1249, "step": 10000 }, { "epoch": 0.03810053059080563, "grad_norm": 0.11478866636753082, "learning_rate": 0.0005, "loss": 2.1352, "step": 10010 }, { "epoch": 0.03813859305892831, "grad_norm": 0.13019050657749176, "learning_rate": 0.0005, "loss": 2.1582, "step": 10020 }, { "epoch": 0.038176655527051, "grad_norm": 0.12493010610342026, "learning_rate": 0.0005, "loss": 2.1407, "step": 10030 }, { "epoch": 0.03821471799517368, "grad_norm": 0.12378795444965363, "learning_rate": 0.0005, "loss": 2.1457, "step": 10040 }, { "epoch": 0.03825278046329636, "grad_norm": 0.1202225461602211, "learning_rate": 0.0005, "loss": 2.1565, "step": 10050 }, { "epoch": 0.038290842931419045, "grad_norm": 0.1146734431385994, "learning_rate": 0.0005, "loss": 2.134, "step": 10060 }, { "epoch": 0.03832890539954173, "grad_norm": 0.1264592409133911, "learning_rate": 0.0005, "loss": 2.1431, "step": 10070 }, { "epoch": 0.03836696786766441, "grad_norm": 0.11752371490001678, "learning_rate": 0.0005, "loss": 2.1608, "step": 10080 }, { "epoch": 0.03840503033578709, "grad_norm": 0.1169474869966507, "learning_rate": 0.0005, "loss": 2.1339, "step": 10090 }, { "epoch": 0.03844309280390978, "grad_norm": 0.1330779492855072, "learning_rate": 0.0005, "loss": 2.163, "step": 10100 }, { "epoch": 0.03848115527203246, "grad_norm": 0.1204131469130516, "learning_rate": 0.0005, "loss": 2.1401, "step": 10110 }, { "epoch": 0.03851921774015514, "grad_norm": 0.11632289737462997, "learning_rate": 0.0005, "loss": 2.1359, "step": 10120 }, { "epoch": 0.038557280208277825, "grad_norm": 0.10953623056411743, "learning_rate": 0.0005, "loss": 2.1552, "step": 10130 }, { "epoch": 0.03859534267640051, "grad_norm": 0.12678860127925873, "learning_rate": 0.0005, "loss": 2.1532, "step": 10140 }, { "epoch": 0.038633405144523195, "grad_norm": 0.101773202419281, "learning_rate": 0.0005, "loss": 2.1495, "step": 10150 }, { "epoch": 0.03867146761264587, "grad_norm": 0.10864038020372391, "learning_rate": 0.0005, "loss": 2.1545, "step": 10160 }, { "epoch": 0.03870953008076856, "grad_norm": 0.12198641151189804, "learning_rate": 0.0005, "loss": 2.1315, "step": 10170 }, { "epoch": 0.03874759254889124, "grad_norm": 0.12078975886106491, "learning_rate": 0.0005, "loss": 2.1416, "step": 10180 }, { "epoch": 0.03878565501701392, "grad_norm": 0.1104569360613823, "learning_rate": 0.0005, "loss": 2.1376, "step": 10190 }, { "epoch": 0.038823717485136605, "grad_norm": 0.10495690256357193, "learning_rate": 0.0005, "loss": 2.1455, "step": 10200 }, { "epoch": 0.03886177995325929, "grad_norm": 0.11198532581329346, "learning_rate": 0.0005, "loss": 2.1425, "step": 10210 }, { "epoch": 0.038899842421381975, "grad_norm": 0.10918529331684113, "learning_rate": 0.0005, "loss": 2.1327, "step": 10220 }, { "epoch": 0.03893790488950465, "grad_norm": 0.13063205778598785, "learning_rate": 0.0005, "loss": 2.1301, "step": 10230 }, { "epoch": 0.03897596735762734, "grad_norm": 0.11537187546491623, "learning_rate": 0.0005, "loss": 2.1235, "step": 10240 }, { "epoch": 0.03901402982575002, "grad_norm": 0.11714793741703033, "learning_rate": 0.0005, "loss": 2.1521, "step": 10250 }, { "epoch": 0.0390520922938727, "grad_norm": 0.10736022889614105, "learning_rate": 0.0005, "loss": 2.1446, "step": 10260 }, { "epoch": 0.039090154761995385, "grad_norm": 0.12772643566131592, "learning_rate": 0.0005, "loss": 2.1525, "step": 10270 }, { "epoch": 0.03912821723011807, "grad_norm": 0.11992309242486954, "learning_rate": 0.0005, "loss": 2.1352, "step": 10280 }, { "epoch": 0.039166279698240755, "grad_norm": 0.1298178732395172, "learning_rate": 0.0005, "loss": 2.1518, "step": 10290 }, { "epoch": 0.03920434216636343, "grad_norm": 0.12157441675662994, "learning_rate": 0.0005, "loss": 2.14, "step": 10300 }, { "epoch": 0.03924240463448612, "grad_norm": 0.10330235213041306, "learning_rate": 0.0005, "loss": 2.1315, "step": 10310 }, { "epoch": 0.0392804671026088, "grad_norm": 0.11134987324476242, "learning_rate": 0.0005, "loss": 2.136, "step": 10320 }, { "epoch": 0.03931852957073149, "grad_norm": 0.11837029457092285, "learning_rate": 0.0005, "loss": 2.148, "step": 10330 }, { "epoch": 0.039356592038854166, "grad_norm": 0.11824219673871994, "learning_rate": 0.0005, "loss": 2.1267, "step": 10340 }, { "epoch": 0.03939465450697685, "grad_norm": 0.1204075962305069, "learning_rate": 0.0005, "loss": 2.1592, "step": 10350 }, { "epoch": 0.039432716975099535, "grad_norm": 0.11336227506399155, "learning_rate": 0.0005, "loss": 2.1295, "step": 10360 }, { "epoch": 0.03947077944322221, "grad_norm": 0.12388197332620621, "learning_rate": 0.0005, "loss": 2.1541, "step": 10370 }, { "epoch": 0.0395088419113449, "grad_norm": 0.11918102204799652, "learning_rate": 0.0005, "loss": 2.1511, "step": 10380 }, { "epoch": 0.03954690437946758, "grad_norm": 0.11205635219812393, "learning_rate": 0.0005, "loss": 2.1477, "step": 10390 }, { "epoch": 0.03958496684759027, "grad_norm": 0.11027996242046356, "learning_rate": 0.0005, "loss": 2.1387, "step": 10400 }, { "epoch": 0.039623029315712946, "grad_norm": 0.13072600960731506, "learning_rate": 0.0005, "loss": 2.1583, "step": 10410 }, { "epoch": 0.03966109178383563, "grad_norm": 0.11660933494567871, "learning_rate": 0.0005, "loss": 2.1491, "step": 10420 }, { "epoch": 0.039699154251958316, "grad_norm": 0.11653047800064087, "learning_rate": 0.0005, "loss": 2.1657, "step": 10430 }, { "epoch": 0.039737216720080994, "grad_norm": 0.125066876411438, "learning_rate": 0.0005, "loss": 2.147, "step": 10440 }, { "epoch": 0.03977527918820368, "grad_norm": 0.11215624958276749, "learning_rate": 0.0005, "loss": 2.163, "step": 10450 }, { "epoch": 0.03981334165632636, "grad_norm": 0.11980816721916199, "learning_rate": 0.0005, "loss": 2.161, "step": 10460 }, { "epoch": 0.03985140412444905, "grad_norm": 0.11664776504039764, "learning_rate": 0.0005, "loss": 2.1407, "step": 10470 }, { "epoch": 0.039889466592571726, "grad_norm": 0.1179792582988739, "learning_rate": 0.0005, "loss": 2.1569, "step": 10480 }, { "epoch": 0.03992752906069441, "grad_norm": 0.13249574601650238, "learning_rate": 0.0005, "loss": 2.1431, "step": 10490 }, { "epoch": 0.039965591528817096, "grad_norm": 0.1165829673409462, "learning_rate": 0.0005, "loss": 2.1543, "step": 10500 }, { "epoch": 0.04000365399693978, "grad_norm": 0.12203454971313477, "learning_rate": 0.0005, "loss": 2.1433, "step": 10510 }, { "epoch": 0.04004171646506246, "grad_norm": 0.11847560852766037, "learning_rate": 0.0005, "loss": 2.1369, "step": 10520 }, { "epoch": 0.040079778933185144, "grad_norm": 0.12019187957048416, "learning_rate": 0.0005, "loss": 2.135, "step": 10530 }, { "epoch": 0.04011784140130783, "grad_norm": 0.12712723016738892, "learning_rate": 0.0005, "loss": 2.1501, "step": 10540 }, { "epoch": 0.040155903869430506, "grad_norm": 0.17454050481319427, "learning_rate": 0.0005, "loss": 2.148, "step": 10550 }, { "epoch": 0.04019396633755319, "grad_norm": 0.12277776747941971, "learning_rate": 0.0005, "loss": 2.151, "step": 10560 }, { "epoch": 0.040232028805675876, "grad_norm": 0.12997326254844666, "learning_rate": 0.0005, "loss": 2.1324, "step": 10570 }, { "epoch": 0.04027009127379856, "grad_norm": 0.12896186113357544, "learning_rate": 0.0005, "loss": 2.1511, "step": 10580 }, { "epoch": 0.04030815374192124, "grad_norm": 0.12204229831695557, "learning_rate": 0.0005, "loss": 2.1504, "step": 10590 }, { "epoch": 0.040346216210043924, "grad_norm": 0.10776589810848236, "learning_rate": 0.0005, "loss": 2.1545, "step": 10600 }, { "epoch": 0.04038427867816661, "grad_norm": 0.11930728703737259, "learning_rate": 0.0005, "loss": 2.1483, "step": 10610 }, { "epoch": 0.04042234114628929, "grad_norm": 0.12586049735546112, "learning_rate": 0.0005, "loss": 2.1382, "step": 10620 }, { "epoch": 0.04046040361441197, "grad_norm": 0.10810121893882751, "learning_rate": 0.0005, "loss": 2.1417, "step": 10630 }, { "epoch": 0.040498466082534657, "grad_norm": 0.11577042937278748, "learning_rate": 0.0005, "loss": 2.1479, "step": 10640 }, { "epoch": 0.04053652855065734, "grad_norm": 0.11528394371271133, "learning_rate": 0.0005, "loss": 2.1589, "step": 10650 }, { "epoch": 0.04057459101878002, "grad_norm": 0.12058746814727783, "learning_rate": 0.0005, "loss": 2.1375, "step": 10660 }, { "epoch": 0.040612653486902704, "grad_norm": 0.11240281164646149, "learning_rate": 0.0005, "loss": 2.1534, "step": 10670 }, { "epoch": 0.04065071595502539, "grad_norm": 0.12102147191762924, "learning_rate": 0.0005, "loss": 2.1289, "step": 10680 }, { "epoch": 0.040688778423148074, "grad_norm": 0.12779374420642853, "learning_rate": 0.0005, "loss": 2.1454, "step": 10690 }, { "epoch": 0.04072684089127075, "grad_norm": 0.11621201783418655, "learning_rate": 0.0005, "loss": 2.1464, "step": 10700 }, { "epoch": 0.04076490335939344, "grad_norm": 0.10922065377235413, "learning_rate": 0.0005, "loss": 2.1361, "step": 10710 }, { "epoch": 0.04080296582751612, "grad_norm": 0.11401363462209702, "learning_rate": 0.0005, "loss": 2.1441, "step": 10720 }, { "epoch": 0.0408410282956388, "grad_norm": 0.1241462379693985, "learning_rate": 0.0005, "loss": 2.1668, "step": 10730 }, { "epoch": 0.040879090763761484, "grad_norm": 0.1269540786743164, "learning_rate": 0.0005, "loss": 2.1516, "step": 10740 }, { "epoch": 0.04091715323188417, "grad_norm": 0.12056253850460052, "learning_rate": 0.0005, "loss": 2.1323, "step": 10750 }, { "epoch": 0.040955215700006854, "grad_norm": 0.11011311411857605, "learning_rate": 0.0005, "loss": 2.132, "step": 10760 }, { "epoch": 0.04099327816812953, "grad_norm": 0.13204540312290192, "learning_rate": 0.0005, "loss": 2.1473, "step": 10770 }, { "epoch": 0.04103134063625222, "grad_norm": 0.12694178521633148, "learning_rate": 0.0005, "loss": 2.1444, "step": 10780 }, { "epoch": 0.0410694031043749, "grad_norm": 0.11872044205665588, "learning_rate": 0.0005, "loss": 2.153, "step": 10790 }, { "epoch": 0.04110746557249758, "grad_norm": 0.1134173646569252, "learning_rate": 0.0005, "loss": 2.1469, "step": 10800 }, { "epoch": 0.041145528040620265, "grad_norm": 0.11684293299913406, "learning_rate": 0.0005, "loss": 2.1259, "step": 10810 }, { "epoch": 0.04118359050874295, "grad_norm": 0.11171558499336243, "learning_rate": 0.0005, "loss": 2.1376, "step": 10820 }, { "epoch": 0.041221652976865635, "grad_norm": 0.13105729222297668, "learning_rate": 0.0005, "loss": 2.1356, "step": 10830 }, { "epoch": 0.04125971544498831, "grad_norm": 0.11976274102926254, "learning_rate": 0.0005, "loss": 2.1424, "step": 10840 }, { "epoch": 0.041297777913111, "grad_norm": 0.1136620044708252, "learning_rate": 0.0005, "loss": 2.1594, "step": 10850 }, { "epoch": 0.04133584038123368, "grad_norm": 0.1374150663614273, "learning_rate": 0.0005, "loss": 2.1233, "step": 10860 }, { "epoch": 0.04137390284935637, "grad_norm": 0.11635814607143402, "learning_rate": 0.0005, "loss": 2.1319, "step": 10870 }, { "epoch": 0.041411965317479045, "grad_norm": 0.11033984273672104, "learning_rate": 0.0005, "loss": 2.1409, "step": 10880 }, { "epoch": 0.04145002778560173, "grad_norm": 0.11242027580738068, "learning_rate": 0.0005, "loss": 2.1279, "step": 10890 }, { "epoch": 0.041488090253724415, "grad_norm": 0.11739082634449005, "learning_rate": 0.0005, "loss": 2.1326, "step": 10900 }, { "epoch": 0.04152615272184709, "grad_norm": 0.12341734021902084, "learning_rate": 0.0005, "loss": 2.121, "step": 10910 }, { "epoch": 0.04156421518996978, "grad_norm": 0.1177162379026413, "learning_rate": 0.0005, "loss": 2.1459, "step": 10920 }, { "epoch": 0.04160227765809246, "grad_norm": 0.11812618374824524, "learning_rate": 0.0005, "loss": 2.1509, "step": 10930 }, { "epoch": 0.04164034012621515, "grad_norm": 0.11785928159952164, "learning_rate": 0.0005, "loss": 2.1405, "step": 10940 }, { "epoch": 0.041678402594337825, "grad_norm": 0.11944951117038727, "learning_rate": 0.0005, "loss": 2.1214, "step": 10950 }, { "epoch": 0.04171646506246051, "grad_norm": 0.10570940375328064, "learning_rate": 0.0005, "loss": 2.1577, "step": 10960 }, { "epoch": 0.041754527530583195, "grad_norm": 0.12870647013187408, "learning_rate": 0.0005, "loss": 2.1332, "step": 10970 }, { "epoch": 0.04179258999870587, "grad_norm": 0.11651504039764404, "learning_rate": 0.0005, "loss": 2.1362, "step": 10980 }, { "epoch": 0.04183065246682856, "grad_norm": 0.11421600729227066, "learning_rate": 0.0005, "loss": 2.1368, "step": 10990 }, { "epoch": 0.04186871493495124, "grad_norm": 0.11833158135414124, "learning_rate": 0.0005, "loss": 2.143, "step": 11000 }, { "epoch": 0.04190677740307393, "grad_norm": 0.11282741278409958, "learning_rate": 0.0005, "loss": 2.1426, "step": 11010 }, { "epoch": 0.041944839871196606, "grad_norm": 0.11734993010759354, "learning_rate": 0.0005, "loss": 2.1487, "step": 11020 }, { "epoch": 0.04198290233931929, "grad_norm": 0.10959252715110779, "learning_rate": 0.0005, "loss": 2.1546, "step": 11030 }, { "epoch": 0.042020964807441975, "grad_norm": 0.11803598701953888, "learning_rate": 0.0005, "loss": 2.119, "step": 11040 }, { "epoch": 0.04205902727556465, "grad_norm": 0.12175311893224716, "learning_rate": 0.0005, "loss": 2.1414, "step": 11050 }, { "epoch": 0.04209708974368734, "grad_norm": 0.13541342318058014, "learning_rate": 0.0005, "loss": 2.1475, "step": 11060 }, { "epoch": 0.04213515221181002, "grad_norm": 0.11709780246019363, "learning_rate": 0.0005, "loss": 2.137, "step": 11070 }, { "epoch": 0.04217321467993271, "grad_norm": 0.11346873641014099, "learning_rate": 0.0005, "loss": 2.1683, "step": 11080 }, { "epoch": 0.042211277148055386, "grad_norm": 0.10680264979600906, "learning_rate": 0.0005, "loss": 2.1558, "step": 11090 }, { "epoch": 0.04224933961617807, "grad_norm": 0.11458170413970947, "learning_rate": 0.0005, "loss": 2.1521, "step": 11100 }, { "epoch": 0.042287402084300756, "grad_norm": 0.1096096783876419, "learning_rate": 0.0005, "loss": 2.1468, "step": 11110 }, { "epoch": 0.04232546455242344, "grad_norm": 0.11289811134338379, "learning_rate": 0.0005, "loss": 2.1347, "step": 11120 }, { "epoch": 0.04236352702054612, "grad_norm": 0.1036987230181694, "learning_rate": 0.0005, "loss": 2.1451, "step": 11130 }, { "epoch": 0.0424015894886688, "grad_norm": 0.12004408985376358, "learning_rate": 0.0005, "loss": 2.1338, "step": 11140 }, { "epoch": 0.04243965195679149, "grad_norm": 0.12811318039894104, "learning_rate": 0.0005, "loss": 2.1482, "step": 11150 }, { "epoch": 0.042477714424914166, "grad_norm": 0.11849028617143631, "learning_rate": 0.0005, "loss": 2.1452, "step": 11160 }, { "epoch": 0.04251577689303685, "grad_norm": 0.11515294760465622, "learning_rate": 0.0005, "loss": 2.1583, "step": 11170 }, { "epoch": 0.042553839361159536, "grad_norm": 0.106211818754673, "learning_rate": 0.0005, "loss": 2.1342, "step": 11180 }, { "epoch": 0.04259190182928222, "grad_norm": 0.10687409341335297, "learning_rate": 0.0005, "loss": 2.1505, "step": 11190 }, { "epoch": 0.0426299642974049, "grad_norm": 0.11422056704759598, "learning_rate": 0.0005, "loss": 2.161, "step": 11200 }, { "epoch": 0.042668026765527584, "grad_norm": 0.11594432592391968, "learning_rate": 0.0005, "loss": 2.1432, "step": 11210 }, { "epoch": 0.04270608923365027, "grad_norm": 0.11001340299844742, "learning_rate": 0.0005, "loss": 2.1438, "step": 11220 }, { "epoch": 0.042744151701772946, "grad_norm": 0.10638782382011414, "learning_rate": 0.0005, "loss": 2.1357, "step": 11230 }, { "epoch": 0.04278221416989563, "grad_norm": 0.11647048592567444, "learning_rate": 0.0005, "loss": 2.1483, "step": 11240 }, { "epoch": 0.042820276638018316, "grad_norm": 0.10914741456508636, "learning_rate": 0.0005, "loss": 2.1413, "step": 11250 }, { "epoch": 0.042858339106141, "grad_norm": 0.11346109211444855, "learning_rate": 0.0005, "loss": 2.1311, "step": 11260 }, { "epoch": 0.04289640157426368, "grad_norm": 0.11912467330694199, "learning_rate": 0.0005, "loss": 2.1451, "step": 11270 }, { "epoch": 0.042934464042386364, "grad_norm": 0.11510618031024933, "learning_rate": 0.0005, "loss": 2.1438, "step": 11280 }, { "epoch": 0.04297252651050905, "grad_norm": 0.10769151151180267, "learning_rate": 0.0005, "loss": 2.1489, "step": 11290 }, { "epoch": 0.043010588978631734, "grad_norm": 0.1087614893913269, "learning_rate": 0.0005, "loss": 2.1429, "step": 11300 }, { "epoch": 0.04304865144675441, "grad_norm": 0.1045922189950943, "learning_rate": 0.0005, "loss": 2.1475, "step": 11310 }, { "epoch": 0.043086713914877096, "grad_norm": 0.12309058010578156, "learning_rate": 0.0005, "loss": 2.1498, "step": 11320 }, { "epoch": 0.04312477638299978, "grad_norm": 0.11188312619924545, "learning_rate": 0.0005, "loss": 2.1471, "step": 11330 }, { "epoch": 0.04316283885112246, "grad_norm": 0.11572781205177307, "learning_rate": 0.0005, "loss": 2.124, "step": 11340 }, { "epoch": 0.043200901319245144, "grad_norm": 0.11633489280939102, "learning_rate": 0.0005, "loss": 2.1534, "step": 11350 }, { "epoch": 0.04323896378736783, "grad_norm": 0.11622288078069687, "learning_rate": 0.0005, "loss": 2.1466, "step": 11360 }, { "epoch": 0.043277026255490514, "grad_norm": 0.11403504759073257, "learning_rate": 0.0005, "loss": 2.15, "step": 11370 }, { "epoch": 0.04331508872361319, "grad_norm": 0.13158078491687775, "learning_rate": 0.0005, "loss": 2.1455, "step": 11380 }, { "epoch": 0.04335315119173588, "grad_norm": 0.11610095202922821, "learning_rate": 0.0005, "loss": 2.1443, "step": 11390 }, { "epoch": 0.04339121365985856, "grad_norm": 0.10601352155208588, "learning_rate": 0.0005, "loss": 2.1505, "step": 11400 }, { "epoch": 0.04342927612798124, "grad_norm": 0.1139233261346817, "learning_rate": 0.0005, "loss": 2.1464, "step": 11410 }, { "epoch": 0.043467338596103924, "grad_norm": 0.12444206327199936, "learning_rate": 0.0005, "loss": 2.1362, "step": 11420 }, { "epoch": 0.04350540106422661, "grad_norm": 0.12657175958156586, "learning_rate": 0.0005, "loss": 2.1275, "step": 11430 }, { "epoch": 0.043543463532349294, "grad_norm": 0.12856648862361908, "learning_rate": 0.0005, "loss": 2.1622, "step": 11440 }, { "epoch": 0.04358152600047197, "grad_norm": 0.12082226574420929, "learning_rate": 0.0005, "loss": 2.1481, "step": 11450 }, { "epoch": 0.04361958846859466, "grad_norm": 0.11483483761548996, "learning_rate": 0.0005, "loss": 2.1512, "step": 11460 }, { "epoch": 0.04365765093671734, "grad_norm": 0.11333905905485153, "learning_rate": 0.0005, "loss": 2.1245, "step": 11470 }, { "epoch": 0.04369571340484003, "grad_norm": 0.16719497740268707, "learning_rate": 0.0005, "loss": 2.154, "step": 11480 }, { "epoch": 0.043733775872962705, "grad_norm": 0.11585808545351028, "learning_rate": 0.0005, "loss": 2.1553, "step": 11490 }, { "epoch": 0.04377183834108539, "grad_norm": 0.13105982542037964, "learning_rate": 0.0005, "loss": 2.1239, "step": 11500 }, { "epoch": 0.043809900809208074, "grad_norm": 0.11006354540586472, "learning_rate": 0.0005, "loss": 2.145, "step": 11510 }, { "epoch": 0.04384796327733075, "grad_norm": 0.11695243418216705, "learning_rate": 0.0005, "loss": 2.1338, "step": 11520 }, { "epoch": 0.04388602574545344, "grad_norm": 0.12058282643556595, "learning_rate": 0.0005, "loss": 2.1186, "step": 11530 }, { "epoch": 0.04392408821357612, "grad_norm": 0.1141928881406784, "learning_rate": 0.0005, "loss": 2.1347, "step": 11540 }, { "epoch": 0.04396215068169881, "grad_norm": 0.10934638231992722, "learning_rate": 0.0005, "loss": 2.148, "step": 11550 }, { "epoch": 0.044000213149821485, "grad_norm": 0.11017810553312302, "learning_rate": 0.0005, "loss": 2.1612, "step": 11560 }, { "epoch": 0.04403827561794417, "grad_norm": 0.10793840140104294, "learning_rate": 0.0005, "loss": 2.1637, "step": 11570 }, { "epoch": 0.044076338086066855, "grad_norm": 0.12375883013010025, "learning_rate": 0.0005, "loss": 2.1459, "step": 11580 }, { "epoch": 0.04411440055418953, "grad_norm": 0.12532900273799896, "learning_rate": 0.0005, "loss": 2.1444, "step": 11590 }, { "epoch": 0.04415246302231222, "grad_norm": 0.11699268966913223, "learning_rate": 0.0005, "loss": 2.1442, "step": 11600 }, { "epoch": 0.0441905254904349, "grad_norm": 0.1009625494480133, "learning_rate": 0.0005, "loss": 2.1513, "step": 11610 }, { "epoch": 0.04422858795855759, "grad_norm": 0.10361494868993759, "learning_rate": 0.0005, "loss": 2.1411, "step": 11620 }, { "epoch": 0.044266650426680265, "grad_norm": 0.110933817923069, "learning_rate": 0.0005, "loss": 2.1441, "step": 11630 }, { "epoch": 0.04430471289480295, "grad_norm": 0.11680163443088531, "learning_rate": 0.0005, "loss": 2.1422, "step": 11640 }, { "epoch": 0.044342775362925635, "grad_norm": 0.11191077530384064, "learning_rate": 0.0005, "loss": 2.1499, "step": 11650 }, { "epoch": 0.04438083783104832, "grad_norm": 0.11856496334075928, "learning_rate": 0.0005, "loss": 2.1475, "step": 11660 }, { "epoch": 0.044418900299171, "grad_norm": 0.1134980171918869, "learning_rate": 0.0005, "loss": 2.1497, "step": 11670 }, { "epoch": 0.04445696276729368, "grad_norm": 0.12401348352432251, "learning_rate": 0.0005, "loss": 2.1572, "step": 11680 }, { "epoch": 0.04449502523541637, "grad_norm": 0.12504594027996063, "learning_rate": 0.0005, "loss": 2.1215, "step": 11690 }, { "epoch": 0.044533087703539045, "grad_norm": 0.11235152184963226, "learning_rate": 0.0005, "loss": 2.1298, "step": 11700 }, { "epoch": 0.04457115017166173, "grad_norm": 0.11963079869747162, "learning_rate": 0.0005, "loss": 2.1348, "step": 11710 }, { "epoch": 0.044609212639784415, "grad_norm": 0.11335135996341705, "learning_rate": 0.0005, "loss": 2.1524, "step": 11720 }, { "epoch": 0.0446472751079071, "grad_norm": 0.11719565838575363, "learning_rate": 0.0005, "loss": 2.1587, "step": 11730 }, { "epoch": 0.04468533757602978, "grad_norm": 0.11393314599990845, "learning_rate": 0.0005, "loss": 2.1436, "step": 11740 }, { "epoch": 0.04472340004415246, "grad_norm": 0.11835304647684097, "learning_rate": 0.0005, "loss": 2.1495, "step": 11750 }, { "epoch": 0.04476146251227515, "grad_norm": 0.15484121441841125, "learning_rate": 0.0005, "loss": 2.1536, "step": 11760 }, { "epoch": 0.044799524980397826, "grad_norm": 0.12383699417114258, "learning_rate": 0.0005, "loss": 2.1332, "step": 11770 }, { "epoch": 0.04483758744852051, "grad_norm": 0.12207262217998505, "learning_rate": 0.0005, "loss": 2.1442, "step": 11780 }, { "epoch": 0.044875649916643195, "grad_norm": 0.10953963547945023, "learning_rate": 0.0005, "loss": 2.15, "step": 11790 }, { "epoch": 0.04491371238476588, "grad_norm": 0.12065628916025162, "learning_rate": 0.0005, "loss": 2.1491, "step": 11800 }, { "epoch": 0.04495177485288856, "grad_norm": 0.10965772718191147, "learning_rate": 0.0005, "loss": 2.1361, "step": 11810 }, { "epoch": 0.04498983732101124, "grad_norm": 0.11649373173713684, "learning_rate": 0.0005, "loss": 2.1434, "step": 11820 }, { "epoch": 0.04502789978913393, "grad_norm": 0.10941790044307709, "learning_rate": 0.0005, "loss": 2.1277, "step": 11830 }, { "epoch": 0.04506596225725661, "grad_norm": 0.12691697478294373, "learning_rate": 0.0005, "loss": 2.1417, "step": 11840 }, { "epoch": 0.04510402472537929, "grad_norm": 0.12523461878299713, "learning_rate": 0.0005, "loss": 2.1552, "step": 11850 }, { "epoch": 0.045142087193501976, "grad_norm": 0.12670059502124786, "learning_rate": 0.0005, "loss": 2.148, "step": 11860 }, { "epoch": 0.04518014966162466, "grad_norm": 0.11297820508480072, "learning_rate": 0.0005, "loss": 2.1352, "step": 11870 }, { "epoch": 0.04521821212974734, "grad_norm": 0.12396519631147385, "learning_rate": 0.0005, "loss": 2.138, "step": 11880 }, { "epoch": 0.04525627459787002, "grad_norm": 0.10837090015411377, "learning_rate": 0.0005, "loss": 2.1435, "step": 11890 }, { "epoch": 0.04529433706599271, "grad_norm": 0.11423803120851517, "learning_rate": 0.0005, "loss": 2.1542, "step": 11900 }, { "epoch": 0.04533239953411539, "grad_norm": 0.10724925249814987, "learning_rate": 0.0005, "loss": 2.1479, "step": 11910 }, { "epoch": 0.04537046200223807, "grad_norm": 0.11402925103902817, "learning_rate": 0.0005, "loss": 2.1444, "step": 11920 }, { "epoch": 0.045408524470360756, "grad_norm": 0.11158744245767593, "learning_rate": 0.0005, "loss": 2.1497, "step": 11930 }, { "epoch": 0.04544658693848344, "grad_norm": 0.10439786314964294, "learning_rate": 0.0005, "loss": 2.1391, "step": 11940 }, { "epoch": 0.04548464940660612, "grad_norm": 0.13331472873687744, "learning_rate": 0.0005, "loss": 2.1607, "step": 11950 }, { "epoch": 0.045522711874728804, "grad_norm": 0.13179340958595276, "learning_rate": 0.0005, "loss": 2.1484, "step": 11960 }, { "epoch": 0.04556077434285149, "grad_norm": 0.11391700804233551, "learning_rate": 0.0005, "loss": 2.1687, "step": 11970 }, { "epoch": 0.04559883681097417, "grad_norm": 0.11158286780118942, "learning_rate": 0.0005, "loss": 2.1506, "step": 11980 }, { "epoch": 0.04563689927909685, "grad_norm": 0.1332971751689911, "learning_rate": 0.0005, "loss": 2.1394, "step": 11990 }, { "epoch": 0.045674961747219536, "grad_norm": 0.10865537077188492, "learning_rate": 0.0005, "loss": 2.121, "step": 12000 }, { "epoch": 0.04571302421534222, "grad_norm": 0.11893896758556366, "learning_rate": 0.0005, "loss": 2.145, "step": 12010 }, { "epoch": 0.045751086683464906, "grad_norm": 0.11757250875234604, "learning_rate": 0.0005, "loss": 2.1513, "step": 12020 }, { "epoch": 0.045789149151587584, "grad_norm": 0.11391414701938629, "learning_rate": 0.0005, "loss": 2.1392, "step": 12030 }, { "epoch": 0.04582721161971027, "grad_norm": 0.11165343970060349, "learning_rate": 0.0005, "loss": 2.1355, "step": 12040 }, { "epoch": 0.045865274087832954, "grad_norm": 0.1106116846203804, "learning_rate": 0.0005, "loss": 2.1428, "step": 12050 }, { "epoch": 0.04590333655595563, "grad_norm": 0.1184442788362503, "learning_rate": 0.0005, "loss": 2.1382, "step": 12060 }, { "epoch": 0.04594139902407832, "grad_norm": 0.11048837751150131, "learning_rate": 0.0005, "loss": 2.1488, "step": 12070 }, { "epoch": 0.045979461492201, "grad_norm": 0.11627710610628128, "learning_rate": 0.0005, "loss": 2.1347, "step": 12080 }, { "epoch": 0.046017523960323686, "grad_norm": 0.12435373663902283, "learning_rate": 0.0005, "loss": 2.1499, "step": 12090 }, { "epoch": 0.046055586428446364, "grad_norm": 0.10093658417463303, "learning_rate": 0.0005, "loss": 2.1528, "step": 12100 }, { "epoch": 0.04609364889656905, "grad_norm": 0.122145876288414, "learning_rate": 0.0005, "loss": 2.1432, "step": 12110 }, { "epoch": 0.046131711364691734, "grad_norm": 0.1312531679868698, "learning_rate": 0.0005, "loss": 2.1437, "step": 12120 }, { "epoch": 0.04616977383281441, "grad_norm": 0.1195501983165741, "learning_rate": 0.0005, "loss": 2.1596, "step": 12130 }, { "epoch": 0.0462078363009371, "grad_norm": 0.119472935795784, "learning_rate": 0.0005, "loss": 2.1504, "step": 12140 }, { "epoch": 0.04624589876905978, "grad_norm": 0.11523136496543884, "learning_rate": 0.0005, "loss": 2.1338, "step": 12150 }, { "epoch": 0.04628396123718247, "grad_norm": 0.11513196676969528, "learning_rate": 0.0005, "loss": 2.1501, "step": 12160 }, { "epoch": 0.046322023705305145, "grad_norm": 0.11697802692651749, "learning_rate": 0.0005, "loss": 2.1409, "step": 12170 }, { "epoch": 0.04636008617342783, "grad_norm": 0.12429428100585938, "learning_rate": 0.0005, "loss": 2.1481, "step": 12180 }, { "epoch": 0.046398148641550514, "grad_norm": 0.12820155918598175, "learning_rate": 0.0005, "loss": 2.1304, "step": 12190 }, { "epoch": 0.04643621110967319, "grad_norm": 0.1277361959218979, "learning_rate": 0.0005, "loss": 2.15, "step": 12200 }, { "epoch": 0.04647427357779588, "grad_norm": 0.1044423058629036, "learning_rate": 0.0005, "loss": 2.1445, "step": 12210 }, { "epoch": 0.04651233604591856, "grad_norm": 0.10871636867523193, "learning_rate": 0.0005, "loss": 2.1402, "step": 12220 }, { "epoch": 0.04655039851404125, "grad_norm": 0.11233247816562653, "learning_rate": 0.0005, "loss": 2.1336, "step": 12230 }, { "epoch": 0.046588460982163925, "grad_norm": 0.11828743666410446, "learning_rate": 0.0005, "loss": 2.1544, "step": 12240 }, { "epoch": 0.04662652345028661, "grad_norm": 0.11883151531219482, "learning_rate": 0.0005, "loss": 2.1402, "step": 12250 }, { "epoch": 0.046664585918409295, "grad_norm": 0.11778468638658524, "learning_rate": 0.0005, "loss": 2.1348, "step": 12260 }, { "epoch": 0.04670264838653198, "grad_norm": 0.12145578116178513, "learning_rate": 0.0005, "loss": 2.1378, "step": 12270 }, { "epoch": 0.04674071085465466, "grad_norm": 0.10665465891361237, "learning_rate": 0.0005, "loss": 2.1388, "step": 12280 }, { "epoch": 0.04677877332277734, "grad_norm": 0.10334984213113785, "learning_rate": 0.0005, "loss": 2.1395, "step": 12290 }, { "epoch": 0.04681683579090003, "grad_norm": 0.13126300275325775, "learning_rate": 0.0005, "loss": 2.1419, "step": 12300 }, { "epoch": 0.046854898259022705, "grad_norm": 0.17418350279331207, "learning_rate": 0.0005, "loss": 2.1357, "step": 12310 }, { "epoch": 0.04689296072714539, "grad_norm": 0.13565418124198914, "learning_rate": 0.0005, "loss": 2.137, "step": 12320 }, { "epoch": 0.046931023195268075, "grad_norm": 0.13762266933918, "learning_rate": 0.0005, "loss": 2.1403, "step": 12330 }, { "epoch": 0.04696908566339076, "grad_norm": 0.11412933468818665, "learning_rate": 0.0005, "loss": 2.1282, "step": 12340 }, { "epoch": 0.04700714813151344, "grad_norm": 0.10721313953399658, "learning_rate": 0.0005, "loss": 2.1349, "step": 12350 }, { "epoch": 0.04704521059963612, "grad_norm": 0.11282768845558167, "learning_rate": 0.0005, "loss": 2.1475, "step": 12360 }, { "epoch": 0.04708327306775881, "grad_norm": 0.1208154559135437, "learning_rate": 0.0005, "loss": 2.144, "step": 12370 }, { "epoch": 0.047121335535881485, "grad_norm": 0.14094211161136627, "learning_rate": 0.0005, "loss": 2.129, "step": 12380 }, { "epoch": 0.04715939800400417, "grad_norm": 0.12425476312637329, "learning_rate": 0.0005, "loss": 2.1653, "step": 12390 }, { "epoch": 0.047197460472126855, "grad_norm": 0.11844827979803085, "learning_rate": 0.0005, "loss": 2.1405, "step": 12400 }, { "epoch": 0.04723552294024954, "grad_norm": 0.10863589495420456, "learning_rate": 0.0005, "loss": 2.1473, "step": 12410 }, { "epoch": 0.04727358540837222, "grad_norm": 0.13282550871372223, "learning_rate": 0.0005, "loss": 2.1456, "step": 12420 }, { "epoch": 0.0473116478764949, "grad_norm": 0.10495536774396896, "learning_rate": 0.0005, "loss": 2.1603, "step": 12430 }, { "epoch": 0.04734971034461759, "grad_norm": 0.11081427335739136, "learning_rate": 0.0005, "loss": 2.1431, "step": 12440 }, { "epoch": 0.04738777281274027, "grad_norm": 0.12156649678945541, "learning_rate": 0.0005, "loss": 2.1395, "step": 12450 }, { "epoch": 0.04742583528086295, "grad_norm": 0.13550612330436707, "learning_rate": 0.0005, "loss": 2.1437, "step": 12460 }, { "epoch": 0.047463897748985635, "grad_norm": 0.11600054055452347, "learning_rate": 0.0005, "loss": 2.1281, "step": 12470 }, { "epoch": 0.04750196021710832, "grad_norm": 0.11698690801858902, "learning_rate": 0.0005, "loss": 2.141, "step": 12480 }, { "epoch": 0.047540022685231, "grad_norm": 0.1163286417722702, "learning_rate": 0.0005, "loss": 2.1462, "step": 12490 }, { "epoch": 0.04757808515335368, "grad_norm": 0.11984337866306305, "learning_rate": 0.0005, "loss": 2.1389, "step": 12500 }, { "epoch": 0.04761614762147637, "grad_norm": 0.11663845181465149, "learning_rate": 0.0005, "loss": 2.1478, "step": 12510 }, { "epoch": 0.04765421008959905, "grad_norm": 0.1164526715874672, "learning_rate": 0.0005, "loss": 2.1579, "step": 12520 }, { "epoch": 0.04769227255772173, "grad_norm": 0.1177983358502388, "learning_rate": 0.0005, "loss": 2.1462, "step": 12530 }, { "epoch": 0.047730335025844416, "grad_norm": 0.1029103472828865, "learning_rate": 0.0005, "loss": 2.1534, "step": 12540 }, { "epoch": 0.0477683974939671, "grad_norm": 0.1200849637389183, "learning_rate": 0.0005, "loss": 2.1414, "step": 12550 }, { "epoch": 0.04780645996208978, "grad_norm": 0.12218188494443893, "learning_rate": 0.0005, "loss": 2.1572, "step": 12560 }, { "epoch": 0.04784452243021246, "grad_norm": 0.11455339193344116, "learning_rate": 0.0005, "loss": 2.1397, "step": 12570 }, { "epoch": 0.04788258489833515, "grad_norm": 0.122074656188488, "learning_rate": 0.0005, "loss": 2.148, "step": 12580 }, { "epoch": 0.04792064736645783, "grad_norm": 0.12012477964162827, "learning_rate": 0.0005, "loss": 2.1421, "step": 12590 }, { "epoch": 0.04795870983458051, "grad_norm": 0.12606510519981384, "learning_rate": 0.0005, "loss": 2.1573, "step": 12600 }, { "epoch": 0.047996772302703196, "grad_norm": 0.12740558385849, "learning_rate": 0.0005, "loss": 2.1536, "step": 12610 }, { "epoch": 0.04803483477082588, "grad_norm": 0.11046471446752548, "learning_rate": 0.0005, "loss": 2.1454, "step": 12620 }, { "epoch": 0.048072897238948566, "grad_norm": 0.13119575381278992, "learning_rate": 0.0005, "loss": 2.146, "step": 12630 }, { "epoch": 0.048110959707071244, "grad_norm": 0.11584407091140747, "learning_rate": 0.0005, "loss": 2.1468, "step": 12640 }, { "epoch": 0.04814902217519393, "grad_norm": 0.12544192373752594, "learning_rate": 0.0005, "loss": 2.1605, "step": 12650 }, { "epoch": 0.04818708464331661, "grad_norm": 0.12177877873182297, "learning_rate": 0.0005, "loss": 2.1662, "step": 12660 }, { "epoch": 0.04822514711143929, "grad_norm": 0.13451693952083588, "learning_rate": 0.0005, "loss": 2.1424, "step": 12670 }, { "epoch": 0.048263209579561976, "grad_norm": 0.12698346376419067, "learning_rate": 0.0005, "loss": 2.1589, "step": 12680 }, { "epoch": 0.04830127204768466, "grad_norm": 0.11226338148117065, "learning_rate": 0.0005, "loss": 2.1276, "step": 12690 }, { "epoch": 0.048339334515807346, "grad_norm": 0.11505680531263351, "learning_rate": 0.0005, "loss": 2.1412, "step": 12700 }, { "epoch": 0.048377396983930024, "grad_norm": 0.1290256232023239, "learning_rate": 0.0005, "loss": 2.1319, "step": 12710 }, { "epoch": 0.04841545945205271, "grad_norm": 0.11181953549385071, "learning_rate": 0.0005, "loss": 2.1528, "step": 12720 }, { "epoch": 0.048453521920175394, "grad_norm": 0.11853830516338348, "learning_rate": 0.0005, "loss": 2.1342, "step": 12730 }, { "epoch": 0.04849158438829807, "grad_norm": 0.12245513498783112, "learning_rate": 0.0005, "loss": 2.1519, "step": 12740 }, { "epoch": 0.048529646856420756, "grad_norm": 0.11556795984506607, "learning_rate": 0.0005, "loss": 2.1216, "step": 12750 }, { "epoch": 0.04856770932454344, "grad_norm": 0.11135729402303696, "learning_rate": 0.0005, "loss": 2.1578, "step": 12760 }, { "epoch": 0.048605771792666126, "grad_norm": 0.13058693706989288, "learning_rate": 0.0005, "loss": 2.1583, "step": 12770 }, { "epoch": 0.048643834260788804, "grad_norm": 0.12102789431810379, "learning_rate": 0.0005, "loss": 2.1411, "step": 12780 }, { "epoch": 0.04868189672891149, "grad_norm": 0.1127203106880188, "learning_rate": 0.0005, "loss": 2.1371, "step": 12790 }, { "epoch": 0.048719959197034174, "grad_norm": 0.1162179708480835, "learning_rate": 0.0005, "loss": 2.1426, "step": 12800 }, { "epoch": 0.04875802166515686, "grad_norm": 0.12106090039014816, "learning_rate": 0.0005, "loss": 2.15, "step": 12810 }, { "epoch": 0.04879608413327954, "grad_norm": 0.12847653031349182, "learning_rate": 0.0005, "loss": 2.1364, "step": 12820 }, { "epoch": 0.04883414660140222, "grad_norm": 0.12018883228302002, "learning_rate": 0.0005, "loss": 2.1411, "step": 12830 }, { "epoch": 0.048872209069524906, "grad_norm": 0.12718555331230164, "learning_rate": 0.0005, "loss": 2.1464, "step": 12840 }, { "epoch": 0.048910271537647584, "grad_norm": 0.13112443685531616, "learning_rate": 0.0005, "loss": 2.1342, "step": 12850 }, { "epoch": 0.04894833400577027, "grad_norm": 0.12325897067785263, "learning_rate": 0.0005, "loss": 2.1438, "step": 12860 }, { "epoch": 0.048986396473892954, "grad_norm": 0.11568273603916168, "learning_rate": 0.0005, "loss": 2.1398, "step": 12870 }, { "epoch": 0.04902445894201564, "grad_norm": 0.11961609125137329, "learning_rate": 0.0005, "loss": 2.1467, "step": 12880 }, { "epoch": 0.04906252141013832, "grad_norm": 0.13741044700145721, "learning_rate": 0.0005, "loss": 2.1464, "step": 12890 }, { "epoch": 0.049100583878261, "grad_norm": 0.12998056411743164, "learning_rate": 0.0005, "loss": 2.1445, "step": 12900 }, { "epoch": 0.04913864634638369, "grad_norm": 0.1075034886598587, "learning_rate": 0.0005, "loss": 2.1394, "step": 12910 }, { "epoch": 0.049176708814506365, "grad_norm": 0.11980056762695312, "learning_rate": 0.0005, "loss": 2.1496, "step": 12920 }, { "epoch": 0.04921477128262905, "grad_norm": 0.1278366595506668, "learning_rate": 0.0005, "loss": 2.1513, "step": 12930 }, { "epoch": 0.049252833750751734, "grad_norm": 0.12148536741733551, "learning_rate": 0.0005, "loss": 2.1382, "step": 12940 }, { "epoch": 0.04929089621887442, "grad_norm": 0.13064073026180267, "learning_rate": 0.0005, "loss": 2.1463, "step": 12950 }, { "epoch": 0.0493289586869971, "grad_norm": 0.12474600970745087, "learning_rate": 0.0005, "loss": 2.1514, "step": 12960 }, { "epoch": 0.04936702115511978, "grad_norm": 0.1313926726579666, "learning_rate": 0.0005, "loss": 2.1513, "step": 12970 }, { "epoch": 0.04940508362324247, "grad_norm": 0.12423042207956314, "learning_rate": 0.0005, "loss": 2.152, "step": 12980 }, { "epoch": 0.04944314609136515, "grad_norm": 0.12478918582201004, "learning_rate": 0.0005, "loss": 2.1157, "step": 12990 }, { "epoch": 0.04948120855948783, "grad_norm": 0.11432372778654099, "learning_rate": 0.0005, "loss": 2.1426, "step": 13000 }, { "epoch": 0.049519271027610515, "grad_norm": 0.11537044495344162, "learning_rate": 0.0005, "loss": 2.1361, "step": 13010 }, { "epoch": 0.0495573334957332, "grad_norm": 0.12360623478889465, "learning_rate": 0.0005, "loss": 2.1575, "step": 13020 }, { "epoch": 0.04959539596385588, "grad_norm": 0.12187385559082031, "learning_rate": 0.0005, "loss": 2.1429, "step": 13030 }, { "epoch": 0.04963345843197856, "grad_norm": 0.1254439353942871, "learning_rate": 0.0005, "loss": 2.1423, "step": 13040 }, { "epoch": 0.04967152090010125, "grad_norm": 0.12457789480686188, "learning_rate": 0.0005, "loss": 2.1584, "step": 13050 }, { "epoch": 0.04970958336822393, "grad_norm": 0.11507994681596756, "learning_rate": 0.0005, "loss": 2.1332, "step": 13060 }, { "epoch": 0.04974764583634661, "grad_norm": 0.12360002845525742, "learning_rate": 0.0005, "loss": 2.1463, "step": 13070 }, { "epoch": 0.049785708304469295, "grad_norm": 0.10747748613357544, "learning_rate": 0.0005, "loss": 2.1417, "step": 13080 }, { "epoch": 0.04982377077259198, "grad_norm": 0.10097293555736542, "learning_rate": 0.0005, "loss": 2.15, "step": 13090 }, { "epoch": 0.04986183324071466, "grad_norm": 0.12312037497758865, "learning_rate": 0.0005, "loss": 2.1499, "step": 13100 }, { "epoch": 0.04989989570883734, "grad_norm": 0.12179164588451385, "learning_rate": 0.0005, "loss": 2.1437, "step": 13110 }, { "epoch": 0.04993795817696003, "grad_norm": 0.1149076297879219, "learning_rate": 0.0005, "loss": 2.1421, "step": 13120 }, { "epoch": 0.04997602064508271, "grad_norm": 0.11413358896970749, "learning_rate": 0.0005, "loss": 2.1406, "step": 13130 }, { "epoch": 0.05001408311320539, "grad_norm": 0.12448055297136307, "learning_rate": 0.0005, "loss": 2.139, "step": 13140 }, { "epoch": 0.050052145581328075, "grad_norm": 0.1304500848054886, "learning_rate": 0.0005, "loss": 2.1413, "step": 13150 }, { "epoch": 0.05009020804945076, "grad_norm": 0.10983631759881973, "learning_rate": 0.0005, "loss": 2.1514, "step": 13160 }, { "epoch": 0.050128270517573445, "grad_norm": 0.11582379788160324, "learning_rate": 0.0005, "loss": 2.1414, "step": 13170 }, { "epoch": 0.05016633298569612, "grad_norm": 0.11181753128767014, "learning_rate": 0.0005, "loss": 2.1386, "step": 13180 }, { "epoch": 0.05020439545381881, "grad_norm": 0.11949356645345688, "learning_rate": 0.0005, "loss": 2.141, "step": 13190 }, { "epoch": 0.05024245792194149, "grad_norm": 0.111565500497818, "learning_rate": 0.0005, "loss": 2.1299, "step": 13200 }, { "epoch": 0.05028052039006417, "grad_norm": 0.12090222537517548, "learning_rate": 0.0005, "loss": 2.1596, "step": 13210 }, { "epoch": 0.050318582858186855, "grad_norm": 0.10770760476589203, "learning_rate": 0.0005, "loss": 2.1571, "step": 13220 }, { "epoch": 0.05035664532630954, "grad_norm": 0.13612627983093262, "learning_rate": 0.0005, "loss": 2.1526, "step": 13230 }, { "epoch": 0.050394707794432225, "grad_norm": 0.1083691418170929, "learning_rate": 0.0005, "loss": 2.1422, "step": 13240 }, { "epoch": 0.0504327702625549, "grad_norm": 0.1286705583333969, "learning_rate": 0.0005, "loss": 2.1239, "step": 13250 }, { "epoch": 0.05047083273067759, "grad_norm": 0.13741260766983032, "learning_rate": 0.0005, "loss": 2.1522, "step": 13260 }, { "epoch": 0.05050889519880027, "grad_norm": 0.11412322521209717, "learning_rate": 0.0005, "loss": 2.1371, "step": 13270 }, { "epoch": 0.05054695766692295, "grad_norm": 0.11559664458036423, "learning_rate": 0.0005, "loss": 2.1504, "step": 13280 }, { "epoch": 0.050585020135045636, "grad_norm": 0.12051431089639664, "learning_rate": 0.0005, "loss": 2.1525, "step": 13290 }, { "epoch": 0.05062308260316832, "grad_norm": 0.12224344909191132, "learning_rate": 0.0005, "loss": 2.1439, "step": 13300 }, { "epoch": 0.050661145071291006, "grad_norm": 0.11170416325330734, "learning_rate": 0.0005, "loss": 2.13, "step": 13310 }, { "epoch": 0.05069920753941368, "grad_norm": 0.172870472073555, "learning_rate": 0.0005, "loss": 2.1355, "step": 13320 }, { "epoch": 0.05073727000753637, "grad_norm": 0.1193099096417427, "learning_rate": 0.0005, "loss": 2.1121, "step": 13330 }, { "epoch": 0.05077533247565905, "grad_norm": 0.1267542988061905, "learning_rate": 0.0005, "loss": 2.1603, "step": 13340 }, { "epoch": 0.05081339494378173, "grad_norm": 0.11663828045129776, "learning_rate": 0.0005, "loss": 2.1364, "step": 13350 }, { "epoch": 0.050851457411904416, "grad_norm": 0.12765999138355255, "learning_rate": 0.0005, "loss": 2.1391, "step": 13360 }, { "epoch": 0.0508895198800271, "grad_norm": 0.12195882946252823, "learning_rate": 0.0005, "loss": 2.1396, "step": 13370 }, { "epoch": 0.050927582348149786, "grad_norm": 0.12730921804904938, "learning_rate": 0.0005, "loss": 2.1446, "step": 13380 }, { "epoch": 0.050965644816272464, "grad_norm": 0.12172224372625351, "learning_rate": 0.0005, "loss": 2.1591, "step": 13390 }, { "epoch": 0.05100370728439515, "grad_norm": 0.12180347740650177, "learning_rate": 0.0005, "loss": 2.1402, "step": 13400 }, { "epoch": 0.051041769752517833, "grad_norm": 0.10897450149059296, "learning_rate": 0.0005, "loss": 2.1513, "step": 13410 }, { "epoch": 0.05107983222064052, "grad_norm": 0.12180740386247635, "learning_rate": 0.0005, "loss": 2.1391, "step": 13420 }, { "epoch": 0.051117894688763196, "grad_norm": 0.10590329766273499, "learning_rate": 0.0005, "loss": 2.1602, "step": 13430 }, { "epoch": 0.05115595715688588, "grad_norm": 0.102902851998806, "learning_rate": 0.0005, "loss": 2.1411, "step": 13440 }, { "epoch": 0.051194019625008566, "grad_norm": 0.12098443508148193, "learning_rate": 0.0005, "loss": 2.1603, "step": 13450 }, { "epoch": 0.051232082093131244, "grad_norm": 0.1269441395998001, "learning_rate": 0.0005, "loss": 2.1361, "step": 13460 }, { "epoch": 0.05127014456125393, "grad_norm": 0.11580880731344223, "learning_rate": 0.0005, "loss": 2.1464, "step": 13470 }, { "epoch": 0.051308207029376614, "grad_norm": 0.10751645267009735, "learning_rate": 0.0005, "loss": 2.1228, "step": 13480 }, { "epoch": 0.0513462694974993, "grad_norm": 0.11714409291744232, "learning_rate": 0.0005, "loss": 2.1477, "step": 13490 }, { "epoch": 0.05138433196562198, "grad_norm": 0.11440756171941757, "learning_rate": 0.0005, "loss": 2.1551, "step": 13500 }, { "epoch": 0.05142239443374466, "grad_norm": 0.11872199177742004, "learning_rate": 0.0005, "loss": 2.1459, "step": 13510 }, { "epoch": 0.051460456901867346, "grad_norm": 0.11766054481267929, "learning_rate": 0.0005, "loss": 2.1335, "step": 13520 }, { "epoch": 0.051498519369990024, "grad_norm": 0.11872246116399765, "learning_rate": 0.0005, "loss": 2.1388, "step": 13530 }, { "epoch": 0.05153658183811271, "grad_norm": 0.10821834951639175, "learning_rate": 0.0005, "loss": 2.1443, "step": 13540 }, { "epoch": 0.051574644306235394, "grad_norm": 0.11680571734905243, "learning_rate": 0.0005, "loss": 2.1416, "step": 13550 }, { "epoch": 0.05161270677435808, "grad_norm": 0.11038310825824738, "learning_rate": 0.0005, "loss": 2.1338, "step": 13560 }, { "epoch": 0.05165076924248076, "grad_norm": 0.11159025132656097, "learning_rate": 0.0005, "loss": 2.1352, "step": 13570 }, { "epoch": 0.05168883171060344, "grad_norm": 0.11030484735965729, "learning_rate": 0.0005, "loss": 2.1353, "step": 13580 }, { "epoch": 0.05172689417872613, "grad_norm": 0.11383315920829773, "learning_rate": 0.0005, "loss": 2.1515, "step": 13590 }, { "epoch": 0.05176495664684881, "grad_norm": 0.111046202480793, "learning_rate": 0.0005, "loss": 2.1337, "step": 13600 }, { "epoch": 0.05180301911497149, "grad_norm": 0.109249547123909, "learning_rate": 0.0005, "loss": 2.1433, "step": 13610 }, { "epoch": 0.051841081583094174, "grad_norm": 0.11588253825902939, "learning_rate": 0.0005, "loss": 2.146, "step": 13620 }, { "epoch": 0.05187914405121686, "grad_norm": 0.11833309382200241, "learning_rate": 0.0005, "loss": 2.1536, "step": 13630 }, { "epoch": 0.05191720651933954, "grad_norm": 0.12221593409776688, "learning_rate": 0.0005, "loss": 2.1517, "step": 13640 }, { "epoch": 0.05195526898746222, "grad_norm": 0.11224260926246643, "learning_rate": 0.0005, "loss": 2.1461, "step": 13650 }, { "epoch": 0.05199333145558491, "grad_norm": 0.12999624013900757, "learning_rate": 0.0005, "loss": 2.1485, "step": 13660 }, { "epoch": 0.05203139392370759, "grad_norm": 0.1172730103135109, "learning_rate": 0.0005, "loss": 2.1288, "step": 13670 }, { "epoch": 0.05206945639183027, "grad_norm": 0.14356952905654907, "learning_rate": 0.0005, "loss": 2.154, "step": 13680 }, { "epoch": 0.052107518859952955, "grad_norm": 0.11941909044981003, "learning_rate": 0.0005, "loss": 2.1229, "step": 13690 }, { "epoch": 0.05214558132807564, "grad_norm": 0.10828124731779099, "learning_rate": 0.0005, "loss": 2.1346, "step": 13700 }, { "epoch": 0.05218364379619832, "grad_norm": 0.11855553090572357, "learning_rate": 0.0005, "loss": 2.1368, "step": 13710 }, { "epoch": 0.052221706264321, "grad_norm": 0.1110978052020073, "learning_rate": 0.0005, "loss": 2.1383, "step": 13720 }, { "epoch": 0.05225976873244369, "grad_norm": 0.11708894371986389, "learning_rate": 0.0005, "loss": 2.1673, "step": 13730 }, { "epoch": 0.05229783120056637, "grad_norm": 0.11091696470975876, "learning_rate": 0.0005, "loss": 2.1428, "step": 13740 }, { "epoch": 0.05233589366868905, "grad_norm": 0.11971557885408401, "learning_rate": 0.0005, "loss": 2.139, "step": 13750 }, { "epoch": 0.052373956136811735, "grad_norm": 0.12013959139585495, "learning_rate": 0.0005, "loss": 2.1447, "step": 13760 }, { "epoch": 0.05241201860493442, "grad_norm": 0.12948036193847656, "learning_rate": 0.0005, "loss": 2.1404, "step": 13770 }, { "epoch": 0.052450081073057105, "grad_norm": 0.12004504352807999, "learning_rate": 0.0005, "loss": 2.1435, "step": 13780 }, { "epoch": 0.05248814354117978, "grad_norm": 0.12624917924404144, "learning_rate": 0.0005, "loss": 2.1461, "step": 13790 }, { "epoch": 0.05252620600930247, "grad_norm": 0.12713316082954407, "learning_rate": 0.0005, "loss": 2.1443, "step": 13800 }, { "epoch": 0.05256426847742515, "grad_norm": 0.11220577359199524, "learning_rate": 0.0005, "loss": 2.1509, "step": 13810 }, { "epoch": 0.05260233094554783, "grad_norm": 0.10795598477125168, "learning_rate": 0.0005, "loss": 2.1448, "step": 13820 }, { "epoch": 0.052640393413670515, "grad_norm": 0.11754649877548218, "learning_rate": 0.0005, "loss": 2.1346, "step": 13830 }, { "epoch": 0.0526784558817932, "grad_norm": 0.12715084850788116, "learning_rate": 0.0005, "loss": 2.1398, "step": 13840 }, { "epoch": 0.052716518349915885, "grad_norm": 0.11614597588777542, "learning_rate": 0.0005, "loss": 2.1383, "step": 13850 }, { "epoch": 0.05275458081803856, "grad_norm": 0.12445352226495743, "learning_rate": 0.0005, "loss": 2.1443, "step": 13860 }, { "epoch": 0.05279264328616125, "grad_norm": 0.12445578724145889, "learning_rate": 0.0005, "loss": 2.1595, "step": 13870 }, { "epoch": 0.05283070575428393, "grad_norm": 0.13612398505210876, "learning_rate": 0.0005, "loss": 2.1503, "step": 13880 }, { "epoch": 0.05286876822240661, "grad_norm": 0.10932034254074097, "learning_rate": 0.0005, "loss": 2.1326, "step": 13890 }, { "epoch": 0.052906830690529295, "grad_norm": 0.12032169103622437, "learning_rate": 0.0005, "loss": 2.1639, "step": 13900 }, { "epoch": 0.05294489315865198, "grad_norm": 0.12248440086841583, "learning_rate": 0.0005, "loss": 2.1501, "step": 13910 }, { "epoch": 0.052982955626774665, "grad_norm": 0.11166456341743469, "learning_rate": 0.0005, "loss": 2.1389, "step": 13920 }, { "epoch": 0.05302101809489734, "grad_norm": 0.11123169213533401, "learning_rate": 0.0005, "loss": 2.1455, "step": 13930 }, { "epoch": 0.05305908056302003, "grad_norm": 0.11789651960134506, "learning_rate": 0.0005, "loss": 2.1331, "step": 13940 }, { "epoch": 0.05309714303114271, "grad_norm": 0.11766628175973892, "learning_rate": 0.0005, "loss": 2.1512, "step": 13950 }, { "epoch": 0.0531352054992654, "grad_norm": 0.12354835867881775, "learning_rate": 0.0005, "loss": 2.1334, "step": 13960 }, { "epoch": 0.053173267967388076, "grad_norm": 0.14414092898368835, "learning_rate": 0.0005, "loss": 2.1475, "step": 13970 }, { "epoch": 0.05321133043551076, "grad_norm": 0.12397162616252899, "learning_rate": 0.0005, "loss": 2.1512, "step": 13980 }, { "epoch": 0.053249392903633445, "grad_norm": 0.10753097385168076, "learning_rate": 0.0005, "loss": 2.1325, "step": 13990 }, { "epoch": 0.05328745537175612, "grad_norm": 0.11333264410495758, "learning_rate": 0.0005, "loss": 2.1384, "step": 14000 }, { "epoch": 0.05332551783987881, "grad_norm": 0.11931371688842773, "learning_rate": 0.0005, "loss": 2.1369, "step": 14010 }, { "epoch": 0.05336358030800149, "grad_norm": 0.12130443006753922, "learning_rate": 0.0005, "loss": 2.148, "step": 14020 }, { "epoch": 0.05340164277612418, "grad_norm": 0.11555348336696625, "learning_rate": 0.0005, "loss": 2.1566, "step": 14030 }, { "epoch": 0.053439705244246856, "grad_norm": 0.1037520095705986, "learning_rate": 0.0005, "loss": 2.1279, "step": 14040 }, { "epoch": 0.05347776771236954, "grad_norm": 0.11452918499708176, "learning_rate": 0.0005, "loss": 2.1413, "step": 14050 }, { "epoch": 0.053515830180492226, "grad_norm": 0.13714534044265747, "learning_rate": 0.0005, "loss": 2.1298, "step": 14060 }, { "epoch": 0.053553892648614904, "grad_norm": 0.13775818049907684, "learning_rate": 0.0005, "loss": 2.1389, "step": 14070 }, { "epoch": 0.05359195511673759, "grad_norm": 0.119764044880867, "learning_rate": 0.0005, "loss": 2.1452, "step": 14080 }, { "epoch": 0.05363001758486027, "grad_norm": 0.10478755831718445, "learning_rate": 0.0005, "loss": 2.1336, "step": 14090 }, { "epoch": 0.05366808005298296, "grad_norm": 0.12574444711208344, "learning_rate": 0.0005, "loss": 2.1312, "step": 14100 }, { "epoch": 0.053706142521105636, "grad_norm": 0.12435276061296463, "learning_rate": 0.0005, "loss": 2.1483, "step": 14110 }, { "epoch": 0.05374420498922832, "grad_norm": 0.1113581657409668, "learning_rate": 0.0005, "loss": 2.1256, "step": 14120 }, { "epoch": 0.053782267457351006, "grad_norm": 0.1145823672413826, "learning_rate": 0.0005, "loss": 2.1444, "step": 14130 }, { "epoch": 0.05382032992547369, "grad_norm": 0.11234494298696518, "learning_rate": 0.0005, "loss": 2.1346, "step": 14140 }, { "epoch": 0.05385839239359637, "grad_norm": 0.11848796904087067, "learning_rate": 0.0005, "loss": 2.1435, "step": 14150 }, { "epoch": 0.053896454861719054, "grad_norm": 0.12532465159893036, "learning_rate": 0.0005, "loss": 2.1429, "step": 14160 }, { "epoch": 0.05393451732984174, "grad_norm": 0.10763829201459885, "learning_rate": 0.0005, "loss": 2.1457, "step": 14170 }, { "epoch": 0.053972579797964416, "grad_norm": 0.12030988931655884, "learning_rate": 0.0005, "loss": 2.1422, "step": 14180 }, { "epoch": 0.0540106422660871, "grad_norm": 0.1350197196006775, "learning_rate": 0.0005, "loss": 2.1429, "step": 14190 }, { "epoch": 0.054048704734209786, "grad_norm": 0.11541256308555603, "learning_rate": 0.0005, "loss": 2.1521, "step": 14200 }, { "epoch": 0.05408676720233247, "grad_norm": 0.12731333076953888, "learning_rate": 0.0005, "loss": 2.1508, "step": 14210 }, { "epoch": 0.05412482967045515, "grad_norm": 0.12442631274461746, "learning_rate": 0.0005, "loss": 2.1436, "step": 14220 }, { "epoch": 0.054162892138577834, "grad_norm": 0.12272724509239197, "learning_rate": 0.0005, "loss": 2.1413, "step": 14230 }, { "epoch": 0.05420095460670052, "grad_norm": 0.11611298471689224, "learning_rate": 0.0005, "loss": 2.139, "step": 14240 }, { "epoch": 0.0542390170748232, "grad_norm": 0.10913633555173874, "learning_rate": 0.0005, "loss": 2.1402, "step": 14250 }, { "epoch": 0.05427707954294588, "grad_norm": 0.10878828912973404, "learning_rate": 0.0005, "loss": 2.1451, "step": 14260 }, { "epoch": 0.054315142011068566, "grad_norm": 0.12029829621315002, "learning_rate": 0.0005, "loss": 2.1441, "step": 14270 }, { "epoch": 0.05435320447919125, "grad_norm": 0.11875171959400177, "learning_rate": 0.0005, "loss": 2.1393, "step": 14280 }, { "epoch": 0.05439126694731393, "grad_norm": 0.12768295407295227, "learning_rate": 0.0005, "loss": 2.161, "step": 14290 }, { "epoch": 0.054429329415436614, "grad_norm": 0.13291147351264954, "learning_rate": 0.0005, "loss": 2.1553, "step": 14300 }, { "epoch": 0.0544673918835593, "grad_norm": 0.12438298016786575, "learning_rate": 0.0005, "loss": 2.1513, "step": 14310 }, { "epoch": 0.05450545435168198, "grad_norm": 0.1304919719696045, "learning_rate": 0.0005, "loss": 2.1388, "step": 14320 }, { "epoch": 0.05454351681980466, "grad_norm": 0.1263260394334793, "learning_rate": 0.0005, "loss": 2.1407, "step": 14330 }, { "epoch": 0.05458157928792735, "grad_norm": 0.1273345649242401, "learning_rate": 0.0005, "loss": 2.1477, "step": 14340 }, { "epoch": 0.05461964175605003, "grad_norm": 0.11910910904407501, "learning_rate": 0.0005, "loss": 2.1362, "step": 14350 }, { "epoch": 0.05465770422417271, "grad_norm": 0.10915575176477432, "learning_rate": 0.0005, "loss": 2.1225, "step": 14360 }, { "epoch": 0.054695766692295394, "grad_norm": 0.11233872920274734, "learning_rate": 0.0005, "loss": 2.1608, "step": 14370 }, { "epoch": 0.05473382916041808, "grad_norm": 0.1092049777507782, "learning_rate": 0.0005, "loss": 2.1312, "step": 14380 }, { "epoch": 0.054771891628540764, "grad_norm": 0.10772830247879028, "learning_rate": 0.0005, "loss": 2.1317, "step": 14390 }, { "epoch": 0.05480995409666344, "grad_norm": 0.11499619483947754, "learning_rate": 0.0005, "loss": 2.1314, "step": 14400 }, { "epoch": 0.05484801656478613, "grad_norm": 0.12505364418029785, "learning_rate": 0.0005, "loss": 2.1215, "step": 14410 }, { "epoch": 0.05488607903290881, "grad_norm": 0.10888305306434631, "learning_rate": 0.0005, "loss": 2.1406, "step": 14420 }, { "epoch": 0.05492414150103149, "grad_norm": 0.11248484998941422, "learning_rate": 0.0005, "loss": 2.1372, "step": 14430 }, { "epoch": 0.054962203969154175, "grad_norm": 0.12412357330322266, "learning_rate": 0.0005, "loss": 2.13, "step": 14440 }, { "epoch": 0.05500026643727686, "grad_norm": 0.13320519030094147, "learning_rate": 0.0005, "loss": 2.1478, "step": 14450 }, { "epoch": 0.055038328905399544, "grad_norm": 0.11333293467760086, "learning_rate": 0.0005, "loss": 2.1486, "step": 14460 }, { "epoch": 0.05507639137352222, "grad_norm": 0.12249065935611725, "learning_rate": 0.0005, "loss": 2.1376, "step": 14470 }, { "epoch": 0.05511445384164491, "grad_norm": 0.11434555053710938, "learning_rate": 0.0005, "loss": 2.1449, "step": 14480 }, { "epoch": 0.05515251630976759, "grad_norm": 0.12702178955078125, "learning_rate": 0.0005, "loss": 2.1449, "step": 14490 }, { "epoch": 0.05519057877789027, "grad_norm": 0.10995277017354965, "learning_rate": 0.0005, "loss": 2.1305, "step": 14500 }, { "epoch": 0.055228641246012955, "grad_norm": 0.1193859875202179, "learning_rate": 0.0005, "loss": 2.1455, "step": 14510 }, { "epoch": 0.05526670371413564, "grad_norm": 0.12037155777215958, "learning_rate": 0.0005, "loss": 2.1372, "step": 14520 }, { "epoch": 0.055304766182258325, "grad_norm": 0.11841357499361038, "learning_rate": 0.0005, "loss": 2.1512, "step": 14530 }, { "epoch": 0.055342828650381, "grad_norm": 0.11028337478637695, "learning_rate": 0.0005, "loss": 2.1459, "step": 14540 }, { "epoch": 0.05538089111850369, "grad_norm": 0.11902716010808945, "learning_rate": 0.0005, "loss": 2.1481, "step": 14550 }, { "epoch": 0.05541895358662637, "grad_norm": 0.11138655245304108, "learning_rate": 0.0005, "loss": 2.133, "step": 14560 }, { "epoch": 0.05545701605474906, "grad_norm": 0.11641339957714081, "learning_rate": 0.0005, "loss": 2.1341, "step": 14570 }, { "epoch": 0.055495078522871735, "grad_norm": 0.13227738440036774, "learning_rate": 0.0005, "loss": 2.1301, "step": 14580 }, { "epoch": 0.05553314099099442, "grad_norm": 0.12470174580812454, "learning_rate": 0.0005, "loss": 2.1475, "step": 14590 }, { "epoch": 0.055571203459117105, "grad_norm": 0.10701868683099747, "learning_rate": 0.0005, "loss": 2.1429, "step": 14600 }, { "epoch": 0.05560926592723978, "grad_norm": 0.12646402418613434, "learning_rate": 0.0005, "loss": 2.157, "step": 14610 }, { "epoch": 0.05564732839536247, "grad_norm": 0.10631322115659714, "learning_rate": 0.0005, "loss": 2.1458, "step": 14620 }, { "epoch": 0.05568539086348515, "grad_norm": 0.11027562618255615, "learning_rate": 0.0005, "loss": 2.1407, "step": 14630 }, { "epoch": 0.05572345333160784, "grad_norm": 0.12378138303756714, "learning_rate": 0.0005, "loss": 2.127, "step": 14640 }, { "epoch": 0.055761515799730516, "grad_norm": 0.11404086649417877, "learning_rate": 0.0005, "loss": 2.1452, "step": 14650 }, { "epoch": 0.0557995782678532, "grad_norm": 0.11708112806081772, "learning_rate": 0.0005, "loss": 2.139, "step": 14660 }, { "epoch": 0.055837640735975885, "grad_norm": 0.12451019138097763, "learning_rate": 0.0005, "loss": 2.1457, "step": 14670 }, { "epoch": 0.05587570320409856, "grad_norm": 0.13042905926704407, "learning_rate": 0.0005, "loss": 2.1429, "step": 14680 }, { "epoch": 0.05591376567222125, "grad_norm": 0.1280081868171692, "learning_rate": 0.0005, "loss": 2.1469, "step": 14690 }, { "epoch": 0.05595182814034393, "grad_norm": 0.1199612095952034, "learning_rate": 0.0005, "loss": 2.1353, "step": 14700 }, { "epoch": 0.05598989060846662, "grad_norm": 0.11185267567634583, "learning_rate": 0.0005, "loss": 2.1352, "step": 14710 }, { "epoch": 0.056027953076589296, "grad_norm": 0.11486394703388214, "learning_rate": 0.0005, "loss": 2.144, "step": 14720 }, { "epoch": 0.05606601554471198, "grad_norm": 0.11386962980031967, "learning_rate": 0.0005, "loss": 2.1347, "step": 14730 }, { "epoch": 0.056104078012834666, "grad_norm": 0.13496342301368713, "learning_rate": 0.0005, "loss": 2.1457, "step": 14740 }, { "epoch": 0.05614214048095735, "grad_norm": 0.1216830313205719, "learning_rate": 0.0005, "loss": 2.1501, "step": 14750 }, { "epoch": 0.05618020294908003, "grad_norm": 0.1195463240146637, "learning_rate": 0.0005, "loss": 2.1347, "step": 14760 }, { "epoch": 0.05621826541720271, "grad_norm": 0.12027116119861603, "learning_rate": 0.0005, "loss": 2.1442, "step": 14770 }, { "epoch": 0.0562563278853254, "grad_norm": 0.12256968766450882, "learning_rate": 0.0005, "loss": 2.1483, "step": 14780 }, { "epoch": 0.056294390353448076, "grad_norm": 0.12090452015399933, "learning_rate": 0.0005, "loss": 2.1434, "step": 14790 }, { "epoch": 0.05633245282157076, "grad_norm": 0.12250377982854843, "learning_rate": 0.0005, "loss": 2.1404, "step": 14800 }, { "epoch": 0.056370515289693446, "grad_norm": 0.11214666813611984, "learning_rate": 0.0005, "loss": 2.1611, "step": 14810 }, { "epoch": 0.05640857775781613, "grad_norm": 0.11024061590433121, "learning_rate": 0.0005, "loss": 2.136, "step": 14820 }, { "epoch": 0.05644664022593881, "grad_norm": 0.120402991771698, "learning_rate": 0.0005, "loss": 2.1477, "step": 14830 }, { "epoch": 0.056484702694061494, "grad_norm": 0.12345562875270844, "learning_rate": 0.0005, "loss": 2.15, "step": 14840 }, { "epoch": 0.05652276516218418, "grad_norm": 0.1288871020078659, "learning_rate": 0.0005, "loss": 2.1318, "step": 14850 }, { "epoch": 0.056560827630306856, "grad_norm": 0.12485507130622864, "learning_rate": 0.0005, "loss": 2.1457, "step": 14860 }, { "epoch": 0.05659889009842954, "grad_norm": 0.11655443906784058, "learning_rate": 0.0005, "loss": 2.141, "step": 14870 }, { "epoch": 0.056636952566552226, "grad_norm": 0.136055126786232, "learning_rate": 0.0005, "loss": 2.1419, "step": 14880 }, { "epoch": 0.05667501503467491, "grad_norm": 0.11421075463294983, "learning_rate": 0.0005, "loss": 2.1413, "step": 14890 }, { "epoch": 0.05671307750279759, "grad_norm": 0.11379828304052353, "learning_rate": 0.0005, "loss": 2.1385, "step": 14900 }, { "epoch": 0.056751139970920274, "grad_norm": 0.11044083535671234, "learning_rate": 0.0005, "loss": 2.1285, "step": 14910 }, { "epoch": 0.05678920243904296, "grad_norm": 0.11647764593362808, "learning_rate": 0.0005, "loss": 2.1373, "step": 14920 }, { "epoch": 0.056827264907165644, "grad_norm": 0.1301194727420807, "learning_rate": 0.0005, "loss": 2.1416, "step": 14930 }, { "epoch": 0.05686532737528832, "grad_norm": 0.11834192276000977, "learning_rate": 0.0005, "loss": 2.1238, "step": 14940 }, { "epoch": 0.056903389843411006, "grad_norm": 0.13392376899719238, "learning_rate": 0.0005, "loss": 2.1596, "step": 14950 }, { "epoch": 0.05694145231153369, "grad_norm": 0.1137416735291481, "learning_rate": 0.0005, "loss": 2.1399, "step": 14960 }, { "epoch": 0.05697951477965637, "grad_norm": 0.12346642464399338, "learning_rate": 0.0005, "loss": 2.1433, "step": 14970 }, { "epoch": 0.057017577247779054, "grad_norm": 0.12635205686092377, "learning_rate": 0.0005, "loss": 2.1464, "step": 14980 }, { "epoch": 0.05705563971590174, "grad_norm": 0.1041107177734375, "learning_rate": 0.0005, "loss": 2.1411, "step": 14990 }, { "epoch": 0.057093702184024424, "grad_norm": 0.11996068060398102, "learning_rate": 0.0005, "loss": 2.1377, "step": 15000 }, { "epoch": 0.0571317646521471, "grad_norm": 0.10924813151359558, "learning_rate": 0.0005, "loss": 2.1493, "step": 15010 }, { "epoch": 0.05716982712026979, "grad_norm": 0.11912663280963898, "learning_rate": 0.0005, "loss": 2.1359, "step": 15020 }, { "epoch": 0.05720788958839247, "grad_norm": 0.10958463698625565, "learning_rate": 0.0005, "loss": 2.1295, "step": 15030 }, { "epoch": 0.05724595205651515, "grad_norm": 0.11127685755491257, "learning_rate": 0.0005, "loss": 2.1484, "step": 15040 }, { "epoch": 0.057284014524637834, "grad_norm": 0.11640200763940811, "learning_rate": 0.0005, "loss": 2.1431, "step": 15050 }, { "epoch": 0.05732207699276052, "grad_norm": 0.10901486128568649, "learning_rate": 0.0005, "loss": 2.1501, "step": 15060 }, { "epoch": 0.057360139460883204, "grad_norm": 0.11258064955472946, "learning_rate": 0.0005, "loss": 2.1449, "step": 15070 }, { "epoch": 0.05739820192900588, "grad_norm": 0.13269981741905212, "learning_rate": 0.0005, "loss": 2.1559, "step": 15080 }, { "epoch": 0.05743626439712857, "grad_norm": 0.11579059064388275, "learning_rate": 0.0005, "loss": 2.1497, "step": 15090 }, { "epoch": 0.05747432686525125, "grad_norm": 0.10881291329860687, "learning_rate": 0.0005, "loss": 2.1153, "step": 15100 }, { "epoch": 0.05751238933337394, "grad_norm": 0.11739391833543777, "learning_rate": 0.0005, "loss": 2.1367, "step": 15110 }, { "epoch": 0.057550451801496615, "grad_norm": 0.114154152572155, "learning_rate": 0.0005, "loss": 2.1504, "step": 15120 }, { "epoch": 0.0575885142696193, "grad_norm": 0.11408552527427673, "learning_rate": 0.0005, "loss": 2.1467, "step": 15130 }, { "epoch": 0.057626576737741984, "grad_norm": 0.12134097516536713, "learning_rate": 0.0005, "loss": 2.1582, "step": 15140 }, { "epoch": 0.05766463920586466, "grad_norm": 0.11878085881471634, "learning_rate": 0.0005, "loss": 2.1255, "step": 15150 }, { "epoch": 0.05770270167398735, "grad_norm": 0.11943016946315765, "learning_rate": 0.0005, "loss": 2.1462, "step": 15160 }, { "epoch": 0.05774076414211003, "grad_norm": 0.12231114506721497, "learning_rate": 0.0005, "loss": 2.1464, "step": 15170 }, { "epoch": 0.05777882661023272, "grad_norm": 0.1249522864818573, "learning_rate": 0.0005, "loss": 2.1272, "step": 15180 }, { "epoch": 0.057816889078355395, "grad_norm": 0.12952202558517456, "learning_rate": 0.0005, "loss": 2.1185, "step": 15190 }, { "epoch": 0.05785495154647808, "grad_norm": 0.12828490138053894, "learning_rate": 0.0005, "loss": 2.1502, "step": 15200 }, { "epoch": 0.057893014014600765, "grad_norm": 0.11592209339141846, "learning_rate": 0.0005, "loss": 2.1361, "step": 15210 }, { "epoch": 0.05793107648272344, "grad_norm": 0.11671043932437897, "learning_rate": 0.0005, "loss": 2.1482, "step": 15220 }, { "epoch": 0.05796913895084613, "grad_norm": 0.11773068457841873, "learning_rate": 0.0005, "loss": 2.1347, "step": 15230 }, { "epoch": 0.05800720141896881, "grad_norm": 0.12474120408296585, "learning_rate": 0.0005, "loss": 2.1445, "step": 15240 }, { "epoch": 0.0580452638870915, "grad_norm": 0.11964181810617447, "learning_rate": 0.0005, "loss": 2.1496, "step": 15250 }, { "epoch": 0.058083326355214175, "grad_norm": 0.12345830351114273, "learning_rate": 0.0005, "loss": 2.1486, "step": 15260 }, { "epoch": 0.05812138882333686, "grad_norm": 0.1257256418466568, "learning_rate": 0.0005, "loss": 2.1345, "step": 15270 }, { "epoch": 0.058159451291459545, "grad_norm": 0.10888057947158813, "learning_rate": 0.0005, "loss": 2.143, "step": 15280 }, { "epoch": 0.05819751375958223, "grad_norm": 0.10951346158981323, "learning_rate": 0.0005, "loss": 2.1369, "step": 15290 }, { "epoch": 0.05823557622770491, "grad_norm": 0.10857826471328735, "learning_rate": 0.0005, "loss": 2.1291, "step": 15300 }, { "epoch": 0.05827363869582759, "grad_norm": 0.11710096895694733, "learning_rate": 0.0005, "loss": 2.1423, "step": 15310 }, { "epoch": 0.05831170116395028, "grad_norm": 0.11304796487092972, "learning_rate": 0.0005, "loss": 2.1348, "step": 15320 }, { "epoch": 0.058349763632072955, "grad_norm": 0.12150050699710846, "learning_rate": 0.0005, "loss": 2.1413, "step": 15330 }, { "epoch": 0.05838782610019564, "grad_norm": 0.11822327971458435, "learning_rate": 0.0005, "loss": 2.1495, "step": 15340 }, { "epoch": 0.058425888568318325, "grad_norm": 0.1256248503923416, "learning_rate": 0.0005, "loss": 2.1444, "step": 15350 }, { "epoch": 0.05846395103644101, "grad_norm": 0.12715673446655273, "learning_rate": 0.0005, "loss": 2.1455, "step": 15360 }, { "epoch": 0.05850201350456369, "grad_norm": 0.12773144245147705, "learning_rate": 0.0005, "loss": 2.1357, "step": 15370 }, { "epoch": 0.05854007597268637, "grad_norm": 0.11277401447296143, "learning_rate": 0.0005, "loss": 2.1554, "step": 15380 }, { "epoch": 0.05857813844080906, "grad_norm": 0.10849560052156448, "learning_rate": 0.0005, "loss": 2.1479, "step": 15390 }, { "epoch": 0.058616200908931736, "grad_norm": 0.11700870841741562, "learning_rate": 0.0005, "loss": 2.1337, "step": 15400 }, { "epoch": 0.05865426337705442, "grad_norm": 0.12502211332321167, "learning_rate": 0.0005, "loss": 2.1337, "step": 15410 }, { "epoch": 0.058692325845177105, "grad_norm": 0.11653271317481995, "learning_rate": 0.0005, "loss": 2.161, "step": 15420 }, { "epoch": 0.05873038831329979, "grad_norm": 0.12796089053153992, "learning_rate": 0.0005, "loss": 2.1375, "step": 15430 }, { "epoch": 0.05876845078142247, "grad_norm": 0.12059906125068665, "learning_rate": 0.0005, "loss": 2.1537, "step": 15440 }, { "epoch": 0.05880651324954515, "grad_norm": 0.11414425820112228, "learning_rate": 0.0005, "loss": 2.1406, "step": 15450 }, { "epoch": 0.05884457571766784, "grad_norm": 0.11696535348892212, "learning_rate": 0.0005, "loss": 2.1544, "step": 15460 }, { "epoch": 0.058882638185790516, "grad_norm": 0.12169355154037476, "learning_rate": 0.0005, "loss": 2.1463, "step": 15470 }, { "epoch": 0.0589207006539132, "grad_norm": 0.13853904604911804, "learning_rate": 0.0005, "loss": 2.1433, "step": 15480 }, { "epoch": 0.058958763122035886, "grad_norm": 0.11219564080238342, "learning_rate": 0.0005, "loss": 2.1359, "step": 15490 }, { "epoch": 0.05899682559015857, "grad_norm": 0.1147218570113182, "learning_rate": 0.0005, "loss": 2.136, "step": 15500 }, { "epoch": 0.05903488805828125, "grad_norm": 0.11296788603067398, "learning_rate": 0.0005, "loss": 2.1342, "step": 15510 }, { "epoch": 0.05907295052640393, "grad_norm": 0.11690889298915863, "learning_rate": 0.0005, "loss": 2.1442, "step": 15520 }, { "epoch": 0.05911101299452662, "grad_norm": 0.12081010639667511, "learning_rate": 0.0005, "loss": 2.1577, "step": 15530 }, { "epoch": 0.0591490754626493, "grad_norm": 0.12585335969924927, "learning_rate": 0.0005, "loss": 2.1556, "step": 15540 }, { "epoch": 0.05918713793077198, "grad_norm": 0.11698474735021591, "learning_rate": 0.0005, "loss": 2.1328, "step": 15550 }, { "epoch": 0.059225200398894666, "grad_norm": 0.12431799620389938, "learning_rate": 0.0005, "loss": 2.1231, "step": 15560 }, { "epoch": 0.05926326286701735, "grad_norm": 0.1195605993270874, "learning_rate": 0.0005, "loss": 2.1375, "step": 15570 }, { "epoch": 0.05930132533514003, "grad_norm": 0.12227523326873779, "learning_rate": 0.0005, "loss": 2.1406, "step": 15580 }, { "epoch": 0.059339387803262714, "grad_norm": 0.11026965826749802, "learning_rate": 0.0005, "loss": 2.1399, "step": 15590 }, { "epoch": 0.0593774502713854, "grad_norm": 0.12768976390361786, "learning_rate": 0.0005, "loss": 2.1289, "step": 15600 }, { "epoch": 0.05941551273950808, "grad_norm": 0.1162969097495079, "learning_rate": 0.0005, "loss": 2.1606, "step": 15610 }, { "epoch": 0.05945357520763076, "grad_norm": 0.1282632201910019, "learning_rate": 0.0005, "loss": 2.1525, "step": 15620 }, { "epoch": 0.059491637675753446, "grad_norm": 0.11534926295280457, "learning_rate": 0.0005, "loss": 2.1322, "step": 15630 }, { "epoch": 0.05952970014387613, "grad_norm": 0.12293092906475067, "learning_rate": 0.0005, "loss": 2.1272, "step": 15640 }, { "epoch": 0.05956776261199881, "grad_norm": 0.13364486396312714, "learning_rate": 0.0005, "loss": 2.1434, "step": 15650 }, { "epoch": 0.059605825080121494, "grad_norm": 0.1044587567448616, "learning_rate": 0.0005, "loss": 2.1513, "step": 15660 }, { "epoch": 0.05964388754824418, "grad_norm": 0.11058751493692398, "learning_rate": 0.0005, "loss": 2.1376, "step": 15670 }, { "epoch": 0.059681950016366864, "grad_norm": 0.11776610463857651, "learning_rate": 0.0005, "loss": 2.1451, "step": 15680 }, { "epoch": 0.05972001248448954, "grad_norm": 0.11538823693990707, "learning_rate": 0.0005, "loss": 2.1451, "step": 15690 }, { "epoch": 0.059758074952612227, "grad_norm": 0.11598982661962509, "learning_rate": 0.0005, "loss": 2.1373, "step": 15700 }, { "epoch": 0.05979613742073491, "grad_norm": 0.11938263475894928, "learning_rate": 0.0005, "loss": 2.1264, "step": 15710 }, { "epoch": 0.059834199888857596, "grad_norm": 0.1095249280333519, "learning_rate": 0.0005, "loss": 2.1593, "step": 15720 }, { "epoch": 0.059872262356980274, "grad_norm": 0.10784026980400085, "learning_rate": 0.0005, "loss": 2.1382, "step": 15730 }, { "epoch": 0.05991032482510296, "grad_norm": 0.11930166929960251, "learning_rate": 0.0005, "loss": 2.1461, "step": 15740 }, { "epoch": 0.059948387293225644, "grad_norm": 0.15975899994373322, "learning_rate": 0.0005, "loss": 2.1545, "step": 15750 }, { "epoch": 0.05998644976134832, "grad_norm": 0.11798959225416183, "learning_rate": 0.0005, "loss": 2.1452, "step": 15760 }, { "epoch": 0.06002451222947101, "grad_norm": 0.12302548438310623, "learning_rate": 0.0005, "loss": 2.1419, "step": 15770 }, { "epoch": 0.06006257469759369, "grad_norm": 0.1195300966501236, "learning_rate": 0.0005, "loss": 2.151, "step": 15780 }, { "epoch": 0.06010063716571638, "grad_norm": 0.1508270651102066, "learning_rate": 0.0005, "loss": 2.1473, "step": 15790 }, { "epoch": 0.060138699633839054, "grad_norm": 0.11350975930690765, "learning_rate": 0.0005, "loss": 2.1499, "step": 15800 }, { "epoch": 0.06017676210196174, "grad_norm": 0.11031148582696915, "learning_rate": 0.0005, "loss": 2.1358, "step": 15810 }, { "epoch": 0.060214824570084424, "grad_norm": 0.12113554775714874, "learning_rate": 0.0005, "loss": 2.1473, "step": 15820 }, { "epoch": 0.0602528870382071, "grad_norm": 0.11519166082143784, "learning_rate": 0.0005, "loss": 2.1338, "step": 15830 }, { "epoch": 0.06029094950632979, "grad_norm": 0.11963526904582977, "learning_rate": 0.0005, "loss": 2.1461, "step": 15840 }, { "epoch": 0.06032901197445247, "grad_norm": 0.11226295679807663, "learning_rate": 0.0005, "loss": 2.1403, "step": 15850 }, { "epoch": 0.06036707444257516, "grad_norm": 0.11856474727392197, "learning_rate": 0.0005, "loss": 2.1482, "step": 15860 }, { "epoch": 0.060405136910697835, "grad_norm": 0.11463222652673721, "learning_rate": 0.0005, "loss": 2.1427, "step": 15870 }, { "epoch": 0.06044319937882052, "grad_norm": 0.10893730074167252, "learning_rate": 0.0005, "loss": 2.1316, "step": 15880 }, { "epoch": 0.060481261846943205, "grad_norm": 0.12570078670978546, "learning_rate": 0.0005, "loss": 2.1428, "step": 15890 }, { "epoch": 0.06051932431506589, "grad_norm": 0.12432834506034851, "learning_rate": 0.0005, "loss": 2.1388, "step": 15900 }, { "epoch": 0.06055738678318857, "grad_norm": 0.11896125227212906, "learning_rate": 0.0005, "loss": 2.1616, "step": 15910 }, { "epoch": 0.06059544925131125, "grad_norm": 0.1110902652144432, "learning_rate": 0.0005, "loss": 2.1402, "step": 15920 }, { "epoch": 0.06063351171943394, "grad_norm": 0.1337718665599823, "learning_rate": 0.0005, "loss": 2.1339, "step": 15930 }, { "epoch": 0.060671574187556615, "grad_norm": 0.11414303630590439, "learning_rate": 0.0005, "loss": 2.1414, "step": 15940 }, { "epoch": 0.0607096366556793, "grad_norm": 0.11986953020095825, "learning_rate": 0.0005, "loss": 2.1562, "step": 15950 }, { "epoch": 0.060747699123801985, "grad_norm": 0.12421073019504547, "learning_rate": 0.0005, "loss": 2.1246, "step": 15960 }, { "epoch": 0.06078576159192467, "grad_norm": 0.12681721150875092, "learning_rate": 0.0005, "loss": 2.1421, "step": 15970 }, { "epoch": 0.06082382406004735, "grad_norm": 0.41695889830589294, "learning_rate": 0.0005, "loss": 2.1375, "step": 15980 }, { "epoch": 0.06086188652817003, "grad_norm": 0.11906461417675018, "learning_rate": 0.0005, "loss": 2.1369, "step": 15990 }, { "epoch": 0.06089994899629272, "grad_norm": 0.10986065119504929, "learning_rate": 0.0005, "loss": 2.1329, "step": 16000 }, { "epoch": 0.060938011464415395, "grad_norm": 0.12073797732591629, "learning_rate": 0.0005, "loss": 2.1374, "step": 16010 }, { "epoch": 0.06097607393253808, "grad_norm": 0.12336317449808121, "learning_rate": 0.0005, "loss": 2.1395, "step": 16020 }, { "epoch": 0.061014136400660765, "grad_norm": 0.11709783971309662, "learning_rate": 0.0005, "loss": 2.1222, "step": 16030 }, { "epoch": 0.06105219886878345, "grad_norm": 0.11148954182863235, "learning_rate": 0.0005, "loss": 2.1346, "step": 16040 }, { "epoch": 0.06109026133690613, "grad_norm": 0.1247798353433609, "learning_rate": 0.0005, "loss": 2.1479, "step": 16050 }, { "epoch": 0.06112832380502881, "grad_norm": 0.11071789264678955, "learning_rate": 0.0005, "loss": 2.1561, "step": 16060 }, { "epoch": 0.0611663862731515, "grad_norm": 0.27611425518989563, "learning_rate": 0.0005, "loss": 2.1361, "step": 16070 }, { "epoch": 0.06120444874127418, "grad_norm": 0.11537760496139526, "learning_rate": 0.0005, "loss": 2.1413, "step": 16080 }, { "epoch": 0.06124251120939686, "grad_norm": 0.12730243802070618, "learning_rate": 0.0005, "loss": 2.1598, "step": 16090 }, { "epoch": 0.061280573677519545, "grad_norm": 0.12035439908504486, "learning_rate": 0.0005, "loss": 2.1343, "step": 16100 }, { "epoch": 0.06131863614564223, "grad_norm": 0.1186971440911293, "learning_rate": 0.0005, "loss": 2.1283, "step": 16110 }, { "epoch": 0.06135669861376491, "grad_norm": 0.11212721467018127, "learning_rate": 0.0005, "loss": 2.1358, "step": 16120 }, { "epoch": 0.06139476108188759, "grad_norm": 0.11165232956409454, "learning_rate": 0.0005, "loss": 2.1404, "step": 16130 }, { "epoch": 0.06143282355001028, "grad_norm": 0.12069948017597198, "learning_rate": 0.0005, "loss": 2.1239, "step": 16140 }, { "epoch": 0.06147088601813296, "grad_norm": 0.11381001025438309, "learning_rate": 0.0005, "loss": 2.1293, "step": 16150 }, { "epoch": 0.06150894848625564, "grad_norm": 0.12348002195358276, "learning_rate": 0.0005, "loss": 2.15, "step": 16160 }, { "epoch": 0.061547010954378326, "grad_norm": 0.11615636944770813, "learning_rate": 0.0005, "loss": 2.1544, "step": 16170 }, { "epoch": 0.06158507342250101, "grad_norm": 0.111156165599823, "learning_rate": 0.0005, "loss": 2.1559, "step": 16180 }, { "epoch": 0.06162313589062369, "grad_norm": 0.12605836987495422, "learning_rate": 0.0005, "loss": 2.1309, "step": 16190 }, { "epoch": 0.06166119835874637, "grad_norm": 0.12461890280246735, "learning_rate": 0.0005, "loss": 2.1373, "step": 16200 }, { "epoch": 0.06169926082686906, "grad_norm": 0.12980882823467255, "learning_rate": 0.0005, "loss": 2.1449, "step": 16210 }, { "epoch": 0.06173732329499174, "grad_norm": 0.12242074310779572, "learning_rate": 0.0005, "loss": 2.1395, "step": 16220 }, { "epoch": 0.06177538576311442, "grad_norm": 0.13245800137519836, "learning_rate": 0.0005, "loss": 2.1425, "step": 16230 }, { "epoch": 0.061813448231237106, "grad_norm": 0.11558622121810913, "learning_rate": 0.0005, "loss": 2.1433, "step": 16240 }, { "epoch": 0.06185151069935979, "grad_norm": 0.13138440251350403, "learning_rate": 0.0005, "loss": 2.1476, "step": 16250 }, { "epoch": 0.061889573167482476, "grad_norm": 0.11061680316925049, "learning_rate": 0.0005, "loss": 2.1295, "step": 16260 }, { "epoch": 0.061927635635605154, "grad_norm": 0.12240659445524216, "learning_rate": 0.0005, "loss": 2.1402, "step": 16270 }, { "epoch": 0.06196569810372784, "grad_norm": 0.12258733063936234, "learning_rate": 0.0005, "loss": 2.1499, "step": 16280 }, { "epoch": 0.06200376057185052, "grad_norm": 0.12067878991365433, "learning_rate": 0.0005, "loss": 2.1511, "step": 16290 }, { "epoch": 0.0620418230399732, "grad_norm": 0.10910321027040482, "learning_rate": 0.0005, "loss": 2.1418, "step": 16300 }, { "epoch": 0.062079885508095886, "grad_norm": 0.11242785304784775, "learning_rate": 0.0005, "loss": 2.1476, "step": 16310 }, { "epoch": 0.06211794797621857, "grad_norm": 0.11315638571977615, "learning_rate": 0.0005, "loss": 2.1353, "step": 16320 }, { "epoch": 0.062156010444341256, "grad_norm": 0.11431436985731125, "learning_rate": 0.0005, "loss": 2.1448, "step": 16330 }, { "epoch": 0.062194072912463934, "grad_norm": 0.10935595631599426, "learning_rate": 0.0005, "loss": 2.1372, "step": 16340 }, { "epoch": 0.06223213538058662, "grad_norm": 0.11540095508098602, "learning_rate": 0.0005, "loss": 2.1455, "step": 16350 }, { "epoch": 0.062270197848709304, "grad_norm": 0.1297951191663742, "learning_rate": 0.0005, "loss": 2.1361, "step": 16360 }, { "epoch": 0.06230826031683198, "grad_norm": 0.12086521089076996, "learning_rate": 0.0005, "loss": 2.1385, "step": 16370 }, { "epoch": 0.062346322784954666, "grad_norm": 0.1273258775472641, "learning_rate": 0.0005, "loss": 2.1452, "step": 16380 }, { "epoch": 0.06238438525307735, "grad_norm": 0.12046687304973602, "learning_rate": 0.0005, "loss": 2.1304, "step": 16390 }, { "epoch": 0.062422447721200036, "grad_norm": 0.11739485710859299, "learning_rate": 0.0005, "loss": 2.1497, "step": 16400 }, { "epoch": 0.062460510189322714, "grad_norm": 0.13197965919971466, "learning_rate": 0.0005, "loss": 2.144, "step": 16410 }, { "epoch": 0.0624985726574454, "grad_norm": 0.11531320214271545, "learning_rate": 0.0005, "loss": 2.1201, "step": 16420 }, { "epoch": 0.06253663512556808, "grad_norm": 0.12141018360853195, "learning_rate": 0.0005, "loss": 2.1266, "step": 16430 }, { "epoch": 0.06257469759369076, "grad_norm": 0.11645390838384628, "learning_rate": 0.0005, "loss": 2.1393, "step": 16440 }, { "epoch": 0.06261276006181345, "grad_norm": 0.11072708666324615, "learning_rate": 0.0005, "loss": 2.1333, "step": 16450 }, { "epoch": 0.06265082252993613, "grad_norm": 0.112320177257061, "learning_rate": 0.0005, "loss": 2.15, "step": 16460 }, { "epoch": 0.06268888499805882, "grad_norm": 0.12140627950429916, "learning_rate": 0.0005, "loss": 2.136, "step": 16470 }, { "epoch": 0.0627269474661815, "grad_norm": 0.11296653002500534, "learning_rate": 0.0005, "loss": 2.1434, "step": 16480 }, { "epoch": 0.06276500993430419, "grad_norm": 0.11528851091861725, "learning_rate": 0.0005, "loss": 2.1314, "step": 16490 }, { "epoch": 0.06280307240242686, "grad_norm": 0.1190958246588707, "learning_rate": 0.0005, "loss": 2.1424, "step": 16500 }, { "epoch": 0.06284113487054954, "grad_norm": 0.11648474633693695, "learning_rate": 0.0005, "loss": 2.1365, "step": 16510 }, { "epoch": 0.06287919733867223, "grad_norm": 0.1304817795753479, "learning_rate": 0.0005, "loss": 2.1214, "step": 16520 }, { "epoch": 0.06291725980679491, "grad_norm": 0.2063707560300827, "learning_rate": 0.0005, "loss": 2.1347, "step": 16530 }, { "epoch": 0.0629553222749176, "grad_norm": 0.10847392678260803, "learning_rate": 0.0005, "loss": 2.137, "step": 16540 }, { "epoch": 0.06299338474304028, "grad_norm": 0.12247609347105026, "learning_rate": 0.0005, "loss": 2.1568, "step": 16550 }, { "epoch": 0.06303144721116297, "grad_norm": 0.11401928216218948, "learning_rate": 0.0005, "loss": 2.1193, "step": 16560 }, { "epoch": 0.06306950967928564, "grad_norm": 0.12218509614467621, "learning_rate": 0.0005, "loss": 2.1565, "step": 16570 }, { "epoch": 0.06310757214740832, "grad_norm": 0.12032946199178696, "learning_rate": 0.0005, "loss": 2.1412, "step": 16580 }, { "epoch": 0.06314563461553101, "grad_norm": 0.12295140326023102, "learning_rate": 0.0005, "loss": 2.1385, "step": 16590 }, { "epoch": 0.06318369708365369, "grad_norm": 0.11174926906824112, "learning_rate": 0.0005, "loss": 2.1319, "step": 16600 }, { "epoch": 0.06322175955177638, "grad_norm": 0.12819042801856995, "learning_rate": 0.0005, "loss": 2.135, "step": 16610 }, { "epoch": 0.06325982201989906, "grad_norm": 0.11440946161746979, "learning_rate": 0.0005, "loss": 2.1374, "step": 16620 }, { "epoch": 0.06329788448802175, "grad_norm": 0.1090090423822403, "learning_rate": 0.0005, "loss": 2.1413, "step": 16630 }, { "epoch": 0.06333594695614442, "grad_norm": 0.13145922124385834, "learning_rate": 0.0005, "loss": 2.1518, "step": 16640 }, { "epoch": 0.0633740094242671, "grad_norm": 0.1270890235900879, "learning_rate": 0.0005, "loss": 2.1288, "step": 16650 }, { "epoch": 0.06341207189238979, "grad_norm": 0.11212950944900513, "learning_rate": 0.0005, "loss": 2.1328, "step": 16660 }, { "epoch": 0.06345013436051247, "grad_norm": 0.12071997672319412, "learning_rate": 0.0005, "loss": 2.1395, "step": 16670 }, { "epoch": 0.06348819682863516, "grad_norm": 0.12150036543607712, "learning_rate": 0.0005, "loss": 2.1405, "step": 16680 }, { "epoch": 0.06352625929675784, "grad_norm": 0.11558043211698532, "learning_rate": 0.0005, "loss": 2.1383, "step": 16690 }, { "epoch": 0.06356432176488053, "grad_norm": 0.11684717983007431, "learning_rate": 0.0005, "loss": 2.1335, "step": 16700 }, { "epoch": 0.0636023842330032, "grad_norm": 0.10834956914186478, "learning_rate": 0.0005, "loss": 2.1371, "step": 16710 }, { "epoch": 0.06364044670112588, "grad_norm": 0.11303365230560303, "learning_rate": 0.0005, "loss": 2.1414, "step": 16720 }, { "epoch": 0.06367850916924857, "grad_norm": 0.11881110072135925, "learning_rate": 0.0005, "loss": 2.1393, "step": 16730 }, { "epoch": 0.06371657163737125, "grad_norm": 0.1124265193939209, "learning_rate": 0.0005, "loss": 2.1393, "step": 16740 }, { "epoch": 0.06375463410549394, "grad_norm": 0.11897893995046616, "learning_rate": 0.0005, "loss": 2.1397, "step": 16750 }, { "epoch": 0.06379269657361662, "grad_norm": 0.10814017057418823, "learning_rate": 0.0005, "loss": 2.1386, "step": 16760 }, { "epoch": 0.06383075904173931, "grad_norm": 0.12043372541666031, "learning_rate": 0.0005, "loss": 2.1636, "step": 16770 }, { "epoch": 0.06386882150986199, "grad_norm": 0.108718641102314, "learning_rate": 0.0005, "loss": 2.1383, "step": 16780 }, { "epoch": 0.06390688397798466, "grad_norm": 0.10894080251455307, "learning_rate": 0.0005, "loss": 2.1453, "step": 16790 }, { "epoch": 0.06394494644610735, "grad_norm": 0.12031812220811844, "learning_rate": 0.0005, "loss": 2.1349, "step": 16800 }, { "epoch": 0.06398300891423003, "grad_norm": 0.10882657766342163, "learning_rate": 0.0005, "loss": 2.1497, "step": 16810 }, { "epoch": 0.06402107138235272, "grad_norm": 0.11654344946146011, "learning_rate": 0.0005, "loss": 2.1418, "step": 16820 }, { "epoch": 0.0640591338504754, "grad_norm": 0.1230119913816452, "learning_rate": 0.0005, "loss": 2.1364, "step": 16830 }, { "epoch": 0.06409719631859809, "grad_norm": 0.11708708852529526, "learning_rate": 0.0005, "loss": 2.1595, "step": 16840 }, { "epoch": 0.06413525878672077, "grad_norm": 0.1183198019862175, "learning_rate": 0.0005, "loss": 2.1516, "step": 16850 }, { "epoch": 0.06417332125484344, "grad_norm": 0.1250128149986267, "learning_rate": 0.0005, "loss": 2.1181, "step": 16860 }, { "epoch": 0.06421138372296613, "grad_norm": 0.12850812077522278, "learning_rate": 0.0005, "loss": 2.158, "step": 16870 }, { "epoch": 0.06424944619108881, "grad_norm": 0.12004446983337402, "learning_rate": 0.0005, "loss": 2.1426, "step": 16880 }, { "epoch": 0.0642875086592115, "grad_norm": 0.1127396896481514, "learning_rate": 0.0005, "loss": 2.1522, "step": 16890 }, { "epoch": 0.06432557112733418, "grad_norm": 0.13206587731838226, "learning_rate": 0.0005, "loss": 2.1443, "step": 16900 }, { "epoch": 0.06436363359545687, "grad_norm": 0.11783764511346817, "learning_rate": 0.0005, "loss": 2.1438, "step": 16910 }, { "epoch": 0.06440169606357955, "grad_norm": 0.12285258620977402, "learning_rate": 0.0005, "loss": 2.1362, "step": 16920 }, { "epoch": 0.06443975853170222, "grad_norm": 0.13280342519283295, "learning_rate": 0.0005, "loss": 2.1376, "step": 16930 }, { "epoch": 0.06447782099982491, "grad_norm": 0.11519664525985718, "learning_rate": 0.0005, "loss": 2.1394, "step": 16940 }, { "epoch": 0.0645158834679476, "grad_norm": 0.11494658142328262, "learning_rate": 0.0005, "loss": 2.1447, "step": 16950 }, { "epoch": 0.06455394593607028, "grad_norm": 0.10861913114786148, "learning_rate": 0.0005, "loss": 2.1417, "step": 16960 }, { "epoch": 0.06459200840419296, "grad_norm": 0.1098102480173111, "learning_rate": 0.0005, "loss": 2.1384, "step": 16970 }, { "epoch": 0.06463007087231565, "grad_norm": 0.11972511559724808, "learning_rate": 0.0005, "loss": 2.1504, "step": 16980 }, { "epoch": 0.06466813334043833, "grad_norm": 0.11844224482774734, "learning_rate": 0.0005, "loss": 2.1371, "step": 16990 }, { "epoch": 0.064706195808561, "grad_norm": 0.10886506736278534, "learning_rate": 0.0005, "loss": 2.1565, "step": 17000 }, { "epoch": 0.06474425827668369, "grad_norm": 0.11520258337259293, "learning_rate": 0.0005, "loss": 2.1439, "step": 17010 }, { "epoch": 0.06478232074480637, "grad_norm": 0.113410085439682, "learning_rate": 0.0005, "loss": 2.1337, "step": 17020 }, { "epoch": 0.06482038321292906, "grad_norm": 0.11354994773864746, "learning_rate": 0.0005, "loss": 2.1356, "step": 17030 }, { "epoch": 0.06485844568105174, "grad_norm": 0.1343780755996704, "learning_rate": 0.0005, "loss": 2.153, "step": 17040 }, { "epoch": 0.06489650814917443, "grad_norm": 0.12449301779270172, "learning_rate": 0.0005, "loss": 2.1428, "step": 17050 }, { "epoch": 0.06493457061729711, "grad_norm": 0.12187916040420532, "learning_rate": 0.0005, "loss": 2.1281, "step": 17060 }, { "epoch": 0.06497263308541978, "grad_norm": 0.11673722416162491, "learning_rate": 0.0005, "loss": 2.1473, "step": 17070 }, { "epoch": 0.06501069555354247, "grad_norm": 0.1161603257060051, "learning_rate": 0.0005, "loss": 2.1276, "step": 17080 }, { "epoch": 0.06504875802166515, "grad_norm": 0.12846381962299347, "learning_rate": 0.0005, "loss": 2.1515, "step": 17090 }, { "epoch": 0.06508682048978784, "grad_norm": 0.13948954641819, "learning_rate": 0.0005, "loss": 2.1471, "step": 17100 }, { "epoch": 0.06512488295791052, "grad_norm": 0.11778617650270462, "learning_rate": 0.0005, "loss": 2.1578, "step": 17110 }, { "epoch": 0.06516294542603321, "grad_norm": 0.11828217655420303, "learning_rate": 0.0005, "loss": 2.114, "step": 17120 }, { "epoch": 0.0652010078941559, "grad_norm": 0.1147540882229805, "learning_rate": 0.0005, "loss": 2.1453, "step": 17130 }, { "epoch": 0.06523907036227858, "grad_norm": 0.10472284257411957, "learning_rate": 0.0005, "loss": 2.1474, "step": 17140 }, { "epoch": 0.06527713283040125, "grad_norm": 0.12210440635681152, "learning_rate": 0.0005, "loss": 2.1484, "step": 17150 }, { "epoch": 0.06531519529852393, "grad_norm": 0.1380053013563156, "learning_rate": 0.0005, "loss": 2.1431, "step": 17160 }, { "epoch": 0.06535325776664662, "grad_norm": 0.12499924004077911, "learning_rate": 0.0005, "loss": 2.121, "step": 17170 }, { "epoch": 0.0653913202347693, "grad_norm": 0.11574744433164597, "learning_rate": 0.0005, "loss": 2.1446, "step": 17180 }, { "epoch": 0.06542938270289199, "grad_norm": 0.11198943108320236, "learning_rate": 0.0005, "loss": 2.1352, "step": 17190 }, { "epoch": 0.06546744517101467, "grad_norm": 0.1090979054570198, "learning_rate": 0.0005, "loss": 2.1422, "step": 17200 }, { "epoch": 0.06550550763913736, "grad_norm": 0.11089687049388885, "learning_rate": 0.0005, "loss": 2.1459, "step": 17210 }, { "epoch": 0.06554357010726003, "grad_norm": 0.1218784749507904, "learning_rate": 0.0005, "loss": 2.1423, "step": 17220 }, { "epoch": 0.06558163257538271, "grad_norm": 0.1282770186662674, "learning_rate": 0.0005, "loss": 2.1505, "step": 17230 }, { "epoch": 0.0656196950435054, "grad_norm": 0.11114176362752914, "learning_rate": 0.0005, "loss": 2.1293, "step": 17240 }, { "epoch": 0.06565775751162808, "grad_norm": 0.118684783577919, "learning_rate": 0.0005, "loss": 2.1301, "step": 17250 }, { "epoch": 0.06569581997975077, "grad_norm": 0.11742344498634338, "learning_rate": 0.0005, "loss": 2.1514, "step": 17260 }, { "epoch": 0.06573388244787345, "grad_norm": 0.1274501532316208, "learning_rate": 0.0005, "loss": 2.1473, "step": 17270 }, { "epoch": 0.06577194491599614, "grad_norm": 0.11824849992990494, "learning_rate": 0.0005, "loss": 2.1472, "step": 17280 }, { "epoch": 0.06581000738411881, "grad_norm": 0.1180088147521019, "learning_rate": 0.0005, "loss": 2.1467, "step": 17290 }, { "epoch": 0.0658480698522415, "grad_norm": 0.1161024421453476, "learning_rate": 0.0005, "loss": 2.1344, "step": 17300 }, { "epoch": 0.06588613232036418, "grad_norm": 0.13447245955467224, "learning_rate": 0.0005, "loss": 2.1462, "step": 17310 }, { "epoch": 0.06592419478848686, "grad_norm": 0.13925780355930328, "learning_rate": 0.0005, "loss": 2.1416, "step": 17320 }, { "epoch": 0.06596225725660955, "grad_norm": 0.11427681148052216, "learning_rate": 0.0005, "loss": 2.1551, "step": 17330 }, { "epoch": 0.06600031972473223, "grad_norm": 0.11538650095462799, "learning_rate": 0.0005, "loss": 2.1344, "step": 17340 }, { "epoch": 0.06603838219285492, "grad_norm": 0.11472231894731522, "learning_rate": 0.0005, "loss": 2.1414, "step": 17350 }, { "epoch": 0.06607644466097759, "grad_norm": 0.13605837523937225, "learning_rate": 0.0005, "loss": 2.1473, "step": 17360 }, { "epoch": 0.06611450712910028, "grad_norm": 0.12702429294586182, "learning_rate": 0.0005, "loss": 2.1537, "step": 17370 }, { "epoch": 0.06615256959722296, "grad_norm": 0.11571726202964783, "learning_rate": 0.0005, "loss": 2.1351, "step": 17380 }, { "epoch": 0.06619063206534564, "grad_norm": 0.11434004455804825, "learning_rate": 0.0005, "loss": 2.1508, "step": 17390 }, { "epoch": 0.06622869453346833, "grad_norm": 0.11851716041564941, "learning_rate": 0.0005, "loss": 2.1559, "step": 17400 }, { "epoch": 0.06626675700159101, "grad_norm": 0.11230608075857162, "learning_rate": 0.0005, "loss": 2.1405, "step": 17410 }, { "epoch": 0.0663048194697137, "grad_norm": 0.11201955378055573, "learning_rate": 0.0005, "loss": 2.1572, "step": 17420 }, { "epoch": 0.06634288193783637, "grad_norm": 0.13140922784805298, "learning_rate": 0.0005, "loss": 2.1215, "step": 17430 }, { "epoch": 0.06638094440595906, "grad_norm": 0.1312038004398346, "learning_rate": 0.0005, "loss": 2.1447, "step": 17440 }, { "epoch": 0.06641900687408174, "grad_norm": 0.11670250445604324, "learning_rate": 0.0005, "loss": 2.1392, "step": 17450 }, { "epoch": 0.06645706934220443, "grad_norm": 0.11685652285814285, "learning_rate": 0.0005, "loss": 2.1471, "step": 17460 }, { "epoch": 0.06649513181032711, "grad_norm": 0.12408076226711273, "learning_rate": 0.0005, "loss": 2.15, "step": 17470 }, { "epoch": 0.0665331942784498, "grad_norm": 0.13604721426963806, "learning_rate": 0.0005, "loss": 2.1377, "step": 17480 }, { "epoch": 0.06657125674657248, "grad_norm": 0.11466772109270096, "learning_rate": 0.0005, "loss": 2.1395, "step": 17490 }, { "epoch": 0.06660931921469516, "grad_norm": 0.12748092412948608, "learning_rate": 0.0005, "loss": 2.1141, "step": 17500 }, { "epoch": 0.06664738168281784, "grad_norm": 0.12154994904994965, "learning_rate": 0.0005, "loss": 2.1425, "step": 17510 }, { "epoch": 0.06668544415094052, "grad_norm": 0.11171988397836685, "learning_rate": 0.0005, "loss": 2.1596, "step": 17520 }, { "epoch": 0.0667235066190632, "grad_norm": 0.10425330698490143, "learning_rate": 0.0005, "loss": 2.1384, "step": 17530 }, { "epoch": 0.06676156908718589, "grad_norm": 0.13074353337287903, "learning_rate": 0.0005, "loss": 2.1311, "step": 17540 }, { "epoch": 0.06679963155530858, "grad_norm": 0.11248553544282913, "learning_rate": 0.0005, "loss": 2.1389, "step": 17550 }, { "epoch": 0.06683769402343126, "grad_norm": 0.11872132867574692, "learning_rate": 0.0005, "loss": 2.1603, "step": 17560 }, { "epoch": 0.06687575649155394, "grad_norm": 0.11587295681238174, "learning_rate": 0.0005, "loss": 2.142, "step": 17570 }, { "epoch": 0.06691381895967662, "grad_norm": 0.11544958502054214, "learning_rate": 0.0005, "loss": 2.1368, "step": 17580 }, { "epoch": 0.0669518814277993, "grad_norm": 0.13210080564022064, "learning_rate": 0.0005, "loss": 2.1476, "step": 17590 }, { "epoch": 0.06698994389592199, "grad_norm": 0.1230560839176178, "learning_rate": 0.0005, "loss": 2.128, "step": 17600 }, { "epoch": 0.06702800636404467, "grad_norm": 0.13568370044231415, "learning_rate": 0.0005, "loss": 2.1474, "step": 17610 }, { "epoch": 0.06706606883216736, "grad_norm": 0.12618105113506317, "learning_rate": 0.0005, "loss": 2.1464, "step": 17620 }, { "epoch": 0.06710413130029004, "grad_norm": 0.11416677385568619, "learning_rate": 0.0005, "loss": 2.1417, "step": 17630 }, { "epoch": 0.06714219376841273, "grad_norm": 0.11842382699251175, "learning_rate": 0.0005, "loss": 2.1303, "step": 17640 }, { "epoch": 0.0671802562365354, "grad_norm": 0.11482279002666473, "learning_rate": 0.0005, "loss": 2.1391, "step": 17650 }, { "epoch": 0.06721831870465808, "grad_norm": 0.11678756028413773, "learning_rate": 0.0005, "loss": 2.1487, "step": 17660 }, { "epoch": 0.06725638117278077, "grad_norm": 0.12048120051622391, "learning_rate": 0.0005, "loss": 2.1489, "step": 17670 }, { "epoch": 0.06729444364090345, "grad_norm": 0.1227121651172638, "learning_rate": 0.0005, "loss": 2.1433, "step": 17680 }, { "epoch": 0.06733250610902614, "grad_norm": 0.11371784657239914, "learning_rate": 0.0005, "loss": 2.1409, "step": 17690 }, { "epoch": 0.06737056857714882, "grad_norm": 0.10713077336549759, "learning_rate": 0.0005, "loss": 2.1324, "step": 17700 }, { "epoch": 0.0674086310452715, "grad_norm": 0.10683862119913101, "learning_rate": 0.0005, "loss": 2.142, "step": 17710 }, { "epoch": 0.06744669351339418, "grad_norm": 0.1217755526304245, "learning_rate": 0.0005, "loss": 2.1321, "step": 17720 }, { "epoch": 0.06748475598151686, "grad_norm": 0.13459083437919617, "learning_rate": 0.0005, "loss": 2.1391, "step": 17730 }, { "epoch": 0.06752281844963955, "grad_norm": 0.11992931365966797, "learning_rate": 0.0005, "loss": 2.132, "step": 17740 }, { "epoch": 0.06756088091776223, "grad_norm": 0.11505747586488724, "learning_rate": 0.0005, "loss": 2.1323, "step": 17750 }, { "epoch": 0.06759894338588492, "grad_norm": 0.11355841904878616, "learning_rate": 0.0005, "loss": 2.1442, "step": 17760 }, { "epoch": 0.0676370058540076, "grad_norm": 0.12791898846626282, "learning_rate": 0.0005, "loss": 2.1564, "step": 17770 }, { "epoch": 0.06767506832213029, "grad_norm": 0.11727561056613922, "learning_rate": 0.0005, "loss": 2.1374, "step": 17780 }, { "epoch": 0.06771313079025296, "grad_norm": 0.11516810953617096, "learning_rate": 0.0005, "loss": 2.1488, "step": 17790 }, { "epoch": 0.06775119325837564, "grad_norm": 0.11258723586797714, "learning_rate": 0.0005, "loss": 2.1414, "step": 17800 }, { "epoch": 0.06778925572649833, "grad_norm": 0.12778383493423462, "learning_rate": 0.0005, "loss": 2.1557, "step": 17810 }, { "epoch": 0.06782731819462101, "grad_norm": 0.11237140744924545, "learning_rate": 0.0005, "loss": 2.1361, "step": 17820 }, { "epoch": 0.0678653806627437, "grad_norm": 0.13233919441699982, "learning_rate": 0.0005, "loss": 2.1366, "step": 17830 }, { "epoch": 0.06790344313086638, "grad_norm": 0.1204909235239029, "learning_rate": 0.0005, "loss": 2.135, "step": 17840 }, { "epoch": 0.06794150559898907, "grad_norm": 0.11478203535079956, "learning_rate": 0.0005, "loss": 2.1403, "step": 17850 }, { "epoch": 0.06797956806711174, "grad_norm": 0.1364944875240326, "learning_rate": 0.0005, "loss": 2.1431, "step": 17860 }, { "epoch": 0.06801763053523442, "grad_norm": 0.11078420281410217, "learning_rate": 0.0005, "loss": 2.1566, "step": 17870 }, { "epoch": 0.0680556930033571, "grad_norm": 0.12364714592695236, "learning_rate": 0.0005, "loss": 2.14, "step": 17880 }, { "epoch": 0.06809375547147979, "grad_norm": 0.11979297548532486, "learning_rate": 0.0005, "loss": 2.1407, "step": 17890 }, { "epoch": 0.06813181793960248, "grad_norm": 0.11409571021795273, "learning_rate": 0.0005, "loss": 2.1343, "step": 17900 }, { "epoch": 0.06816988040772516, "grad_norm": 0.12002553045749664, "learning_rate": 0.0005, "loss": 2.1459, "step": 17910 }, { "epoch": 0.06820794287584785, "grad_norm": 0.10696928203105927, "learning_rate": 0.0005, "loss": 2.1569, "step": 17920 }, { "epoch": 0.06824600534397053, "grad_norm": 0.1150284856557846, "learning_rate": 0.0005, "loss": 2.1469, "step": 17930 }, { "epoch": 0.0682840678120932, "grad_norm": 0.1319814920425415, "learning_rate": 0.0005, "loss": 2.1385, "step": 17940 }, { "epoch": 0.06832213028021589, "grad_norm": 0.11064282804727554, "learning_rate": 0.0005, "loss": 2.1413, "step": 17950 }, { "epoch": 0.06836019274833857, "grad_norm": 0.12755045294761658, "learning_rate": 0.0005, "loss": 2.169, "step": 17960 }, { "epoch": 0.06839825521646126, "grad_norm": 0.11299976706504822, "learning_rate": 0.0005, "loss": 2.1336, "step": 17970 }, { "epoch": 0.06843631768458394, "grad_norm": 0.12607094645500183, "learning_rate": 0.0005, "loss": 2.1528, "step": 17980 }, { "epoch": 0.06847438015270663, "grad_norm": 0.1116366758942604, "learning_rate": 0.0005, "loss": 2.141, "step": 17990 }, { "epoch": 0.06851244262082931, "grad_norm": 0.11597177386283875, "learning_rate": 0.0005, "loss": 2.1469, "step": 18000 }, { "epoch": 0.06855050508895198, "grad_norm": 0.12607623636722565, "learning_rate": 0.0005, "loss": 2.1354, "step": 18010 }, { "epoch": 0.06858856755707467, "grad_norm": 0.10983790457248688, "learning_rate": 0.0005, "loss": 2.1415, "step": 18020 }, { "epoch": 0.06862663002519735, "grad_norm": 0.1086328849196434, "learning_rate": 0.0005, "loss": 2.1478, "step": 18030 }, { "epoch": 0.06866469249332004, "grad_norm": 0.1174158975481987, "learning_rate": 0.0005, "loss": 2.1326, "step": 18040 }, { "epoch": 0.06870275496144272, "grad_norm": 0.11614352464675903, "learning_rate": 0.0005, "loss": 2.1317, "step": 18050 }, { "epoch": 0.0687408174295654, "grad_norm": 0.1141098216176033, "learning_rate": 0.0005, "loss": 2.141, "step": 18060 }, { "epoch": 0.06877887989768809, "grad_norm": 0.1251964271068573, "learning_rate": 0.0005, "loss": 2.1316, "step": 18070 }, { "epoch": 0.06881694236581076, "grad_norm": 0.1291750818490982, "learning_rate": 0.0005, "loss": 2.1368, "step": 18080 }, { "epoch": 0.06885500483393345, "grad_norm": 0.11388098448514938, "learning_rate": 0.0005, "loss": 2.1386, "step": 18090 }, { "epoch": 0.06889306730205613, "grad_norm": 0.10952631384134293, "learning_rate": 0.0005, "loss": 2.1383, "step": 18100 }, { "epoch": 0.06893112977017882, "grad_norm": 0.1275061070919037, "learning_rate": 0.0005, "loss": 2.1307, "step": 18110 }, { "epoch": 0.0689691922383015, "grad_norm": 0.09898626804351807, "learning_rate": 0.0005, "loss": 2.1508, "step": 18120 }, { "epoch": 0.06900725470642419, "grad_norm": 0.11916225403547287, "learning_rate": 0.0005, "loss": 2.1204, "step": 18130 }, { "epoch": 0.06904531717454687, "grad_norm": 0.12321484833955765, "learning_rate": 0.0005, "loss": 2.1433, "step": 18140 }, { "epoch": 0.06908337964266954, "grad_norm": 0.11095452308654785, "learning_rate": 0.0005, "loss": 2.1333, "step": 18150 }, { "epoch": 0.06912144211079223, "grad_norm": 0.1228238046169281, "learning_rate": 0.0005, "loss": 2.1466, "step": 18160 }, { "epoch": 0.06915950457891491, "grad_norm": 0.1137445792555809, "learning_rate": 0.0005, "loss": 2.1415, "step": 18170 }, { "epoch": 0.0691975670470376, "grad_norm": 0.11743122339248657, "learning_rate": 0.0005, "loss": 2.1501, "step": 18180 }, { "epoch": 0.06923562951516028, "grad_norm": 0.129505917429924, "learning_rate": 0.0005, "loss": 2.1365, "step": 18190 }, { "epoch": 0.06927369198328297, "grad_norm": 0.1274467259645462, "learning_rate": 0.0005, "loss": 2.1362, "step": 18200 }, { "epoch": 0.06931175445140565, "grad_norm": 0.12610933184623718, "learning_rate": 0.0005, "loss": 2.1389, "step": 18210 }, { "epoch": 0.06934981691952832, "grad_norm": 0.12468487024307251, "learning_rate": 0.0005, "loss": 2.138, "step": 18220 }, { "epoch": 0.06938787938765101, "grad_norm": 0.11550614982843399, "learning_rate": 0.0005, "loss": 2.1256, "step": 18230 }, { "epoch": 0.06942594185577369, "grad_norm": 0.12684103846549988, "learning_rate": 0.0005, "loss": 2.1397, "step": 18240 }, { "epoch": 0.06946400432389638, "grad_norm": 0.1284370720386505, "learning_rate": 0.0005, "loss": 2.143, "step": 18250 }, { "epoch": 0.06950206679201906, "grad_norm": 0.12868432700634003, "learning_rate": 0.0005, "loss": 2.1507, "step": 18260 }, { "epoch": 0.06954012926014175, "grad_norm": 0.13332092761993408, "learning_rate": 0.0005, "loss": 2.145, "step": 18270 }, { "epoch": 0.06957819172826443, "grad_norm": 0.11784756183624268, "learning_rate": 0.0005, "loss": 2.1421, "step": 18280 }, { "epoch": 0.06961625419638712, "grad_norm": 0.12656499445438385, "learning_rate": 0.0005, "loss": 2.1407, "step": 18290 }, { "epoch": 0.06965431666450979, "grad_norm": 0.1278528869152069, "learning_rate": 0.0005, "loss": 2.1523, "step": 18300 }, { "epoch": 0.06969237913263247, "grad_norm": 0.12106233090162277, "learning_rate": 0.0005, "loss": 2.1405, "step": 18310 }, { "epoch": 0.06973044160075516, "grad_norm": 0.1094827800989151, "learning_rate": 0.0005, "loss": 2.1398, "step": 18320 }, { "epoch": 0.06976850406887784, "grad_norm": 0.12173653393983841, "learning_rate": 0.0005, "loss": 2.1378, "step": 18330 }, { "epoch": 0.06980656653700053, "grad_norm": 0.13432110846042633, "learning_rate": 0.0005, "loss": 2.1437, "step": 18340 }, { "epoch": 0.06984462900512321, "grad_norm": 0.12391926348209381, "learning_rate": 0.0005, "loss": 2.141, "step": 18350 }, { "epoch": 0.0698826914732459, "grad_norm": 0.11583663523197174, "learning_rate": 0.0005, "loss": 2.1492, "step": 18360 }, { "epoch": 0.06992075394136857, "grad_norm": 0.13333432376384735, "learning_rate": 0.0005, "loss": 2.1454, "step": 18370 }, { "epoch": 0.06995881640949125, "grad_norm": 0.1078396886587143, "learning_rate": 0.0005, "loss": 2.1468, "step": 18380 }, { "epoch": 0.06999687887761394, "grad_norm": 0.11615791916847229, "learning_rate": 0.0005, "loss": 2.1507, "step": 18390 }, { "epoch": 0.07003494134573662, "grad_norm": 0.11581426858901978, "learning_rate": 0.0005, "loss": 2.1521, "step": 18400 }, { "epoch": 0.07007300381385931, "grad_norm": 0.11066626757383347, "learning_rate": 0.0005, "loss": 2.1322, "step": 18410 }, { "epoch": 0.07011106628198199, "grad_norm": 0.12922365963459015, "learning_rate": 0.0005, "loss": 2.1436, "step": 18420 }, { "epoch": 0.07014912875010468, "grad_norm": 0.11376654356718063, "learning_rate": 0.0005, "loss": 2.1493, "step": 18430 }, { "epoch": 0.07018719121822735, "grad_norm": 0.12547221779823303, "learning_rate": 0.0005, "loss": 2.1316, "step": 18440 }, { "epoch": 0.07022525368635003, "grad_norm": 0.1183745265007019, "learning_rate": 0.0005, "loss": 2.1387, "step": 18450 }, { "epoch": 0.07026331615447272, "grad_norm": 0.11223198473453522, "learning_rate": 0.0005, "loss": 2.1489, "step": 18460 }, { "epoch": 0.0703013786225954, "grad_norm": 0.12431668490171432, "learning_rate": 0.0005, "loss": 2.1332, "step": 18470 }, { "epoch": 0.07033944109071809, "grad_norm": 0.12922313809394836, "learning_rate": 0.0005, "loss": 2.1407, "step": 18480 }, { "epoch": 0.07037750355884077, "grad_norm": 0.11536691337823868, "learning_rate": 0.0005, "loss": 2.1449, "step": 18490 }, { "epoch": 0.07041556602696346, "grad_norm": 0.1122339516878128, "learning_rate": 0.0005, "loss": 2.1494, "step": 18500 }, { "epoch": 0.07045362849508613, "grad_norm": 0.11720927059650421, "learning_rate": 0.0005, "loss": 2.1396, "step": 18510 }, { "epoch": 0.07049169096320881, "grad_norm": 0.10899330675601959, "learning_rate": 0.0005, "loss": 2.1419, "step": 18520 }, { "epoch": 0.0705297534313315, "grad_norm": 0.11351705342531204, "learning_rate": 0.0005, "loss": 2.1358, "step": 18530 }, { "epoch": 0.07056781589945418, "grad_norm": 0.11961089074611664, "learning_rate": 0.0005, "loss": 2.1438, "step": 18540 }, { "epoch": 0.07060587836757687, "grad_norm": 0.11589095741510391, "learning_rate": 0.0005, "loss": 2.1277, "step": 18550 }, { "epoch": 0.07064394083569955, "grad_norm": 0.13567408919334412, "learning_rate": 0.0005, "loss": 2.1352, "step": 18560 }, { "epoch": 0.07068200330382224, "grad_norm": 0.23228104412555695, "learning_rate": 0.0005, "loss": 2.146, "step": 18570 }, { "epoch": 0.07072006577194491, "grad_norm": 0.1197076290845871, "learning_rate": 0.0005, "loss": 2.1523, "step": 18580 }, { "epoch": 0.0707581282400676, "grad_norm": 0.11958331614732742, "learning_rate": 0.0005, "loss": 2.1448, "step": 18590 }, { "epoch": 0.07079619070819028, "grad_norm": 0.12761662900447845, "learning_rate": 0.0005, "loss": 2.1387, "step": 18600 }, { "epoch": 0.07083425317631296, "grad_norm": 0.12039892375469208, "learning_rate": 0.0005, "loss": 2.1337, "step": 18610 }, { "epoch": 0.07087231564443565, "grad_norm": 0.12191681563854218, "learning_rate": 0.0005, "loss": 2.1454, "step": 18620 }, { "epoch": 0.07091037811255833, "grad_norm": 0.12030960619449615, "learning_rate": 0.0005, "loss": 2.1381, "step": 18630 }, { "epoch": 0.07094844058068102, "grad_norm": 0.11639144271612167, "learning_rate": 0.0005, "loss": 2.1562, "step": 18640 }, { "epoch": 0.0709865030488037, "grad_norm": 0.12752319872379303, "learning_rate": 0.0005, "loss": 2.1334, "step": 18650 }, { "epoch": 0.07102456551692637, "grad_norm": 0.1367523968219757, "learning_rate": 0.0005, "loss": 2.1381, "step": 18660 }, { "epoch": 0.07106262798504906, "grad_norm": 0.12350116670131683, "learning_rate": 0.0005, "loss": 2.1361, "step": 18670 }, { "epoch": 0.07110069045317174, "grad_norm": 0.1435011476278305, "learning_rate": 0.0005, "loss": 2.1504, "step": 18680 }, { "epoch": 0.07113875292129443, "grad_norm": 0.13752315938472748, "learning_rate": 0.0005, "loss": 2.1299, "step": 18690 }, { "epoch": 0.07117681538941711, "grad_norm": 0.1276029795408249, "learning_rate": 0.0005, "loss": 2.1475, "step": 18700 }, { "epoch": 0.0712148778575398, "grad_norm": 0.11291246861219406, "learning_rate": 0.0005, "loss": 2.1463, "step": 18710 }, { "epoch": 0.07125294032566248, "grad_norm": 0.11970442533493042, "learning_rate": 0.0005, "loss": 2.1446, "step": 18720 }, { "epoch": 0.07129100279378515, "grad_norm": 0.11741726100444794, "learning_rate": 0.0005, "loss": 2.1578, "step": 18730 }, { "epoch": 0.07132906526190784, "grad_norm": 0.12280002236366272, "learning_rate": 0.0005, "loss": 2.1446, "step": 18740 }, { "epoch": 0.07136712773003052, "grad_norm": 0.1194797158241272, "learning_rate": 0.0005, "loss": 2.1444, "step": 18750 }, { "epoch": 0.07140519019815321, "grad_norm": 0.12653280794620514, "learning_rate": 0.0005, "loss": 2.1454, "step": 18760 }, { "epoch": 0.0714432526662759, "grad_norm": 0.10515428334474564, "learning_rate": 0.0005, "loss": 2.1315, "step": 18770 }, { "epoch": 0.07148131513439858, "grad_norm": 0.12359602004289627, "learning_rate": 0.0005, "loss": 2.1506, "step": 18780 }, { "epoch": 0.07151937760252126, "grad_norm": 0.1203257292509079, "learning_rate": 0.0005, "loss": 2.1468, "step": 18790 }, { "epoch": 0.07155744007064394, "grad_norm": 0.124353788793087, "learning_rate": 0.0005, "loss": 2.1356, "step": 18800 }, { "epoch": 0.07159550253876662, "grad_norm": 0.13274051249027252, "learning_rate": 0.0005, "loss": 2.1262, "step": 18810 }, { "epoch": 0.0716335650068893, "grad_norm": 0.12063852697610855, "learning_rate": 0.0005, "loss": 2.139, "step": 18820 }, { "epoch": 0.07167162747501199, "grad_norm": 0.13945847749710083, "learning_rate": 0.0005, "loss": 2.1295, "step": 18830 }, { "epoch": 0.07170968994313467, "grad_norm": 0.12185059487819672, "learning_rate": 0.0005, "loss": 2.1279, "step": 18840 }, { "epoch": 0.07174775241125736, "grad_norm": 0.10996271669864655, "learning_rate": 0.0005, "loss": 2.1422, "step": 18850 }, { "epoch": 0.07178581487938004, "grad_norm": 0.11733502149581909, "learning_rate": 0.0005, "loss": 2.1456, "step": 18860 }, { "epoch": 0.07182387734750272, "grad_norm": 0.12420696020126343, "learning_rate": 0.0005, "loss": 2.1484, "step": 18870 }, { "epoch": 0.0718619398156254, "grad_norm": 0.11696764081716537, "learning_rate": 0.0005, "loss": 2.1364, "step": 18880 }, { "epoch": 0.07190000228374809, "grad_norm": 0.11581572145223618, "learning_rate": 0.0005, "loss": 2.1374, "step": 18890 }, { "epoch": 0.07193806475187077, "grad_norm": 0.1266104280948639, "learning_rate": 0.0005, "loss": 2.141, "step": 18900 }, { "epoch": 0.07197612721999345, "grad_norm": 0.11462007462978363, "learning_rate": 0.0005, "loss": 2.1453, "step": 18910 }, { "epoch": 0.07201418968811614, "grad_norm": 0.1148967519402504, "learning_rate": 0.0005, "loss": 2.1452, "step": 18920 }, { "epoch": 0.07205225215623882, "grad_norm": 0.11684548854827881, "learning_rate": 0.0005, "loss": 2.1338, "step": 18930 }, { "epoch": 0.0720903146243615, "grad_norm": 0.12068881839513779, "learning_rate": 0.0005, "loss": 2.1478, "step": 18940 }, { "epoch": 0.07212837709248418, "grad_norm": 0.13326232135295868, "learning_rate": 0.0005, "loss": 2.1428, "step": 18950 }, { "epoch": 0.07216643956060687, "grad_norm": 0.10820142179727554, "learning_rate": 0.0005, "loss": 2.1527, "step": 18960 }, { "epoch": 0.07220450202872955, "grad_norm": 0.12245963513851166, "learning_rate": 0.0005, "loss": 2.1344, "step": 18970 }, { "epoch": 0.07224256449685224, "grad_norm": 0.1312924176454544, "learning_rate": 0.0005, "loss": 2.1556, "step": 18980 }, { "epoch": 0.07228062696497492, "grad_norm": 0.12191825360059738, "learning_rate": 0.0005, "loss": 2.1468, "step": 18990 }, { "epoch": 0.0723186894330976, "grad_norm": 0.11584730446338654, "learning_rate": 0.0005, "loss": 2.128, "step": 19000 }, { "epoch": 0.07235675190122028, "grad_norm": 0.11659805476665497, "learning_rate": 0.0005, "loss": 2.1425, "step": 19010 }, { "epoch": 0.07239481436934296, "grad_norm": 0.11305439472198486, "learning_rate": 0.0005, "loss": 2.131, "step": 19020 }, { "epoch": 0.07243287683746565, "grad_norm": 0.11925303190946579, "learning_rate": 0.0005, "loss": 2.1417, "step": 19030 }, { "epoch": 0.07247093930558833, "grad_norm": 0.13060851395130157, "learning_rate": 0.0005, "loss": 2.1415, "step": 19040 }, { "epoch": 0.07250900177371102, "grad_norm": 0.11164027452468872, "learning_rate": 0.0005, "loss": 2.1467, "step": 19050 }, { "epoch": 0.0725470642418337, "grad_norm": 0.12202701717615128, "learning_rate": 0.0005, "loss": 2.1324, "step": 19060 }, { "epoch": 0.07258512670995639, "grad_norm": 0.1152314841747284, "learning_rate": 0.0005, "loss": 2.1215, "step": 19070 }, { "epoch": 0.07262318917807907, "grad_norm": 0.11402899026870728, "learning_rate": 0.0005, "loss": 2.1495, "step": 19080 }, { "epoch": 0.07266125164620174, "grad_norm": 0.1244824081659317, "learning_rate": 0.0005, "loss": 2.1312, "step": 19090 }, { "epoch": 0.07269931411432443, "grad_norm": 0.13850195705890656, "learning_rate": 0.0005, "loss": 2.1403, "step": 19100 }, { "epoch": 0.07273737658244711, "grad_norm": 0.11881489306688309, "learning_rate": 0.0005, "loss": 2.137, "step": 19110 }, { "epoch": 0.0727754390505698, "grad_norm": 0.11745548248291016, "learning_rate": 0.0005, "loss": 2.1381, "step": 19120 }, { "epoch": 0.07281350151869248, "grad_norm": 0.12252794206142426, "learning_rate": 0.0005, "loss": 2.1214, "step": 19130 }, { "epoch": 0.07285156398681517, "grad_norm": 0.12518726289272308, "learning_rate": 0.0005, "loss": 2.1499, "step": 19140 }, { "epoch": 0.07288962645493785, "grad_norm": 0.11924619227647781, "learning_rate": 0.0005, "loss": 2.1347, "step": 19150 }, { "epoch": 0.07292768892306052, "grad_norm": 0.11636649817228317, "learning_rate": 0.0005, "loss": 2.1313, "step": 19160 }, { "epoch": 0.0729657513911832, "grad_norm": 0.11934640258550644, "learning_rate": 0.0005, "loss": 2.1438, "step": 19170 }, { "epoch": 0.07300381385930589, "grad_norm": 0.1170443445444107, "learning_rate": 0.0005, "loss": 2.1283, "step": 19180 }, { "epoch": 0.07304187632742858, "grad_norm": 0.11632074415683746, "learning_rate": 0.0005, "loss": 2.1312, "step": 19190 }, { "epoch": 0.07307993879555126, "grad_norm": 0.12896569073200226, "learning_rate": 0.0005, "loss": 2.1435, "step": 19200 }, { "epoch": 0.07311800126367395, "grad_norm": 0.1291140466928482, "learning_rate": 0.0005, "loss": 2.151, "step": 19210 }, { "epoch": 0.07315606373179663, "grad_norm": 0.11567611247301102, "learning_rate": 0.0005, "loss": 2.1459, "step": 19220 }, { "epoch": 0.0731941261999193, "grad_norm": 0.11281578242778778, "learning_rate": 0.0005, "loss": 2.1437, "step": 19230 }, { "epoch": 0.07323218866804199, "grad_norm": 0.16732710599899292, "learning_rate": 0.0005, "loss": 2.1554, "step": 19240 }, { "epoch": 0.07327025113616467, "grad_norm": 0.13350912928581238, "learning_rate": 0.0005, "loss": 2.1364, "step": 19250 }, { "epoch": 0.07330831360428736, "grad_norm": 0.12566125392913818, "learning_rate": 0.0005, "loss": 2.1452, "step": 19260 }, { "epoch": 0.07334637607241004, "grad_norm": 0.12431460618972778, "learning_rate": 0.0005, "loss": 2.1265, "step": 19270 }, { "epoch": 0.07338443854053273, "grad_norm": 0.14875809848308563, "learning_rate": 0.0005, "loss": 2.1502, "step": 19280 }, { "epoch": 0.07342250100865541, "grad_norm": 0.11105544120073318, "learning_rate": 0.0005, "loss": 2.1549, "step": 19290 }, { "epoch": 0.07346056347677808, "grad_norm": 0.11542293429374695, "learning_rate": 0.0005, "loss": 2.1491, "step": 19300 }, { "epoch": 0.07349862594490077, "grad_norm": 0.10311754047870636, "learning_rate": 0.0005, "loss": 2.1368, "step": 19310 }, { "epoch": 0.07353668841302345, "grad_norm": 0.10644451528787613, "learning_rate": 0.0005, "loss": 2.1292, "step": 19320 }, { "epoch": 0.07357475088114614, "grad_norm": 0.13121920824050903, "learning_rate": 0.0005, "loss": 2.123, "step": 19330 }, { "epoch": 0.07361281334926882, "grad_norm": 0.12500645220279694, "learning_rate": 0.0005, "loss": 2.1369, "step": 19340 }, { "epoch": 0.0736508758173915, "grad_norm": 0.11441967636346817, "learning_rate": 0.0005, "loss": 2.1306, "step": 19350 }, { "epoch": 0.07368893828551419, "grad_norm": 0.12399695813655853, "learning_rate": 0.0005, "loss": 2.1348, "step": 19360 }, { "epoch": 0.07372700075363686, "grad_norm": 0.10966715216636658, "learning_rate": 0.0005, "loss": 2.1258, "step": 19370 }, { "epoch": 0.07376506322175955, "grad_norm": 0.12043928354978561, "learning_rate": 0.0005, "loss": 2.1436, "step": 19380 }, { "epoch": 0.07380312568988223, "grad_norm": 0.11464910954236984, "learning_rate": 0.0005, "loss": 2.1428, "step": 19390 }, { "epoch": 0.07384118815800492, "grad_norm": 0.11865679919719696, "learning_rate": 0.0005, "loss": 2.149, "step": 19400 }, { "epoch": 0.0738792506261276, "grad_norm": 0.12201295047998428, "learning_rate": 0.0005, "loss": 2.1527, "step": 19410 }, { "epoch": 0.07391731309425029, "grad_norm": 0.11938714981079102, "learning_rate": 0.0005, "loss": 2.1405, "step": 19420 }, { "epoch": 0.07395537556237297, "grad_norm": 0.1209445670247078, "learning_rate": 0.0005, "loss": 2.1497, "step": 19430 }, { "epoch": 0.07399343803049566, "grad_norm": 0.11020023375749588, "learning_rate": 0.0005, "loss": 2.1428, "step": 19440 }, { "epoch": 0.07403150049861833, "grad_norm": 0.12486252933740616, "learning_rate": 0.0005, "loss": 2.1439, "step": 19450 }, { "epoch": 0.07406956296674101, "grad_norm": 0.12022566050291061, "learning_rate": 0.0005, "loss": 2.1292, "step": 19460 }, { "epoch": 0.0741076254348637, "grad_norm": 0.11662788689136505, "learning_rate": 0.0005, "loss": 2.1371, "step": 19470 }, { "epoch": 0.07414568790298638, "grad_norm": 0.14027582108974457, "learning_rate": 0.0005, "loss": 2.1442, "step": 19480 }, { "epoch": 0.07418375037110907, "grad_norm": 0.11259403824806213, "learning_rate": 0.0005, "loss": 2.129, "step": 19490 }, { "epoch": 0.07422181283923175, "grad_norm": 0.12038365751504898, "learning_rate": 0.0005, "loss": 2.1433, "step": 19500 }, { "epoch": 0.07425987530735444, "grad_norm": 0.12720441818237305, "learning_rate": 0.0005, "loss": 2.1288, "step": 19510 }, { "epoch": 0.07429793777547711, "grad_norm": 0.11576971411705017, "learning_rate": 0.0005, "loss": 2.1374, "step": 19520 }, { "epoch": 0.07433600024359979, "grad_norm": 0.1155039444565773, "learning_rate": 0.0005, "loss": 2.1253, "step": 19530 }, { "epoch": 0.07437406271172248, "grad_norm": 0.12826068699359894, "learning_rate": 0.0005, "loss": 2.1451, "step": 19540 }, { "epoch": 0.07441212517984516, "grad_norm": 0.11534158885478973, "learning_rate": 0.0005, "loss": 2.1365, "step": 19550 }, { "epoch": 0.07445018764796785, "grad_norm": 0.11677207797765732, "learning_rate": 0.0005, "loss": 2.1415, "step": 19560 }, { "epoch": 0.07448825011609053, "grad_norm": 0.12580077350139618, "learning_rate": 0.0005, "loss": 2.125, "step": 19570 }, { "epoch": 0.07452631258421322, "grad_norm": 0.12557542324066162, "learning_rate": 0.0005, "loss": 2.1357, "step": 19580 }, { "epoch": 0.07456437505233589, "grad_norm": 0.1145382896065712, "learning_rate": 0.0005, "loss": 2.1437, "step": 19590 }, { "epoch": 0.07460243752045857, "grad_norm": 0.1154215931892395, "learning_rate": 0.0005, "loss": 2.1494, "step": 19600 }, { "epoch": 0.07464049998858126, "grad_norm": 0.11266138404607773, "learning_rate": 0.0005, "loss": 2.1514, "step": 19610 }, { "epoch": 0.07467856245670394, "grad_norm": 0.11759719252586365, "learning_rate": 0.0005, "loss": 2.1194, "step": 19620 }, { "epoch": 0.07471662492482663, "grad_norm": 0.12315239012241364, "learning_rate": 0.0005, "loss": 2.1343, "step": 19630 }, { "epoch": 0.07475468739294931, "grad_norm": 0.1262696534395218, "learning_rate": 0.0005, "loss": 2.1638, "step": 19640 }, { "epoch": 0.074792749861072, "grad_norm": 0.12901771068572998, "learning_rate": 0.0005, "loss": 2.1587, "step": 19650 }, { "epoch": 0.07483081232919467, "grad_norm": 0.12097518891096115, "learning_rate": 0.0005, "loss": 2.1526, "step": 19660 }, { "epoch": 0.07486887479731735, "grad_norm": 0.11667900532484055, "learning_rate": 0.0005, "loss": 2.1256, "step": 19670 }, { "epoch": 0.07490693726544004, "grad_norm": 0.12947173416614532, "learning_rate": 0.0005, "loss": 2.1405, "step": 19680 }, { "epoch": 0.07494499973356272, "grad_norm": 0.12104139477014542, "learning_rate": 0.0005, "loss": 2.1446, "step": 19690 }, { "epoch": 0.07498306220168541, "grad_norm": 0.12678302824497223, "learning_rate": 0.0005, "loss": 2.1381, "step": 19700 }, { "epoch": 0.07502112466980809, "grad_norm": 0.12390743941068649, "learning_rate": 0.0005, "loss": 2.1452, "step": 19710 }, { "epoch": 0.07505918713793078, "grad_norm": 0.13258126378059387, "learning_rate": 0.0005, "loss": 2.1461, "step": 19720 }, { "epoch": 0.07509724960605345, "grad_norm": 0.1314745992422104, "learning_rate": 0.0005, "loss": 2.1365, "step": 19730 }, { "epoch": 0.07513531207417613, "grad_norm": 0.12059265375137329, "learning_rate": 0.0005, "loss": 2.1429, "step": 19740 }, { "epoch": 0.07517337454229882, "grad_norm": 0.12166786938905716, "learning_rate": 0.0005, "loss": 2.1161, "step": 19750 }, { "epoch": 0.0752114370104215, "grad_norm": 0.12821173667907715, "learning_rate": 0.0005, "loss": 2.1538, "step": 19760 }, { "epoch": 0.07524949947854419, "grad_norm": 0.12376900017261505, "learning_rate": 0.0005, "loss": 2.144, "step": 19770 }, { "epoch": 0.07528756194666687, "grad_norm": 0.1299164593219757, "learning_rate": 0.0005, "loss": 2.1282, "step": 19780 }, { "epoch": 0.07532562441478956, "grad_norm": 0.1086520105600357, "learning_rate": 0.0005, "loss": 2.1195, "step": 19790 }, { "epoch": 0.07536368688291224, "grad_norm": 0.11544159054756165, "learning_rate": 0.0005, "loss": 2.1503, "step": 19800 }, { "epoch": 0.07540174935103491, "grad_norm": 0.12263761460781097, "learning_rate": 0.0005, "loss": 2.1581, "step": 19810 }, { "epoch": 0.0754398118191576, "grad_norm": 0.17075596749782562, "learning_rate": 0.0005, "loss": 2.1277, "step": 19820 }, { "epoch": 0.07547787428728028, "grad_norm": 0.1148160845041275, "learning_rate": 0.0005, "loss": 2.1187, "step": 19830 }, { "epoch": 0.07551593675540297, "grad_norm": 0.11638422310352325, "learning_rate": 0.0005, "loss": 2.1448, "step": 19840 }, { "epoch": 0.07555399922352565, "grad_norm": 0.10947652906179428, "learning_rate": 0.0005, "loss": 2.1478, "step": 19850 }, { "epoch": 0.07559206169164834, "grad_norm": 0.11749492585659027, "learning_rate": 0.0005, "loss": 2.1437, "step": 19860 }, { "epoch": 0.07563012415977102, "grad_norm": 0.12871646881103516, "learning_rate": 0.0005, "loss": 2.1427, "step": 19870 }, { "epoch": 0.0756681866278937, "grad_norm": 0.119835264980793, "learning_rate": 0.0005, "loss": 2.1375, "step": 19880 }, { "epoch": 0.07570624909601638, "grad_norm": 0.12297997623682022, "learning_rate": 0.0005, "loss": 2.1422, "step": 19890 }, { "epoch": 0.07574431156413906, "grad_norm": 0.11132816970348358, "learning_rate": 0.0005, "loss": 2.143, "step": 19900 }, { "epoch": 0.07578237403226175, "grad_norm": 0.12448112666606903, "learning_rate": 0.0005, "loss": 2.1444, "step": 19910 }, { "epoch": 0.07582043650038443, "grad_norm": 0.1350453794002533, "learning_rate": 0.0005, "loss": 2.156, "step": 19920 }, { "epoch": 0.07585849896850712, "grad_norm": 0.11363179236650467, "learning_rate": 0.0005, "loss": 2.1383, "step": 19930 }, { "epoch": 0.0758965614366298, "grad_norm": 0.23336747288703918, "learning_rate": 0.0005, "loss": 2.1301, "step": 19940 }, { "epoch": 0.07593462390475247, "grad_norm": 0.13528256118297577, "learning_rate": 0.0005, "loss": 2.1404, "step": 19950 }, { "epoch": 0.07597268637287516, "grad_norm": 0.11738921701908112, "learning_rate": 0.0005, "loss": 2.1426, "step": 19960 }, { "epoch": 0.07601074884099784, "grad_norm": 0.11897687613964081, "learning_rate": 0.0005, "loss": 2.1465, "step": 19970 }, { "epoch": 0.07604881130912053, "grad_norm": 0.12669093906879425, "learning_rate": 0.0005, "loss": 2.1329, "step": 19980 }, { "epoch": 0.07608687377724321, "grad_norm": 0.11751807481050491, "learning_rate": 0.0005, "loss": 2.1317, "step": 19990 }, { "epoch": 0.0761249362453659, "grad_norm": 0.13250021636486053, "learning_rate": 0.0005, "loss": 2.1254, "step": 20000 }, { "epoch": 0.07616299871348858, "grad_norm": 0.12080192565917969, "learning_rate": 0.0005, "loss": 2.1334, "step": 20010 }, { "epoch": 0.07620106118161125, "grad_norm": 0.13605637848377228, "learning_rate": 0.0005, "loss": 2.1417, "step": 20020 }, { "epoch": 0.07623912364973394, "grad_norm": 0.13749563694000244, "learning_rate": 0.0005, "loss": 2.1557, "step": 20030 }, { "epoch": 0.07627718611785662, "grad_norm": 0.12290322780609131, "learning_rate": 0.0005, "loss": 2.1404, "step": 20040 }, { "epoch": 0.07631524858597931, "grad_norm": 0.12153260409832001, "learning_rate": 0.0005, "loss": 2.148, "step": 20050 }, { "epoch": 0.076353311054102, "grad_norm": 0.11187773942947388, "learning_rate": 0.0005, "loss": 2.1516, "step": 20060 }, { "epoch": 0.07639137352222468, "grad_norm": 0.12065640836954117, "learning_rate": 0.0005, "loss": 2.133, "step": 20070 }, { "epoch": 0.07642943599034736, "grad_norm": 0.13967113196849823, "learning_rate": 0.0005, "loss": 2.1369, "step": 20080 }, { "epoch": 0.07646749845847003, "grad_norm": 0.12397325038909912, "learning_rate": 0.0005, "loss": 2.1395, "step": 20090 }, { "epoch": 0.07650556092659272, "grad_norm": 0.1470394879579544, "learning_rate": 0.0005, "loss": 2.1449, "step": 20100 }, { "epoch": 0.0765436233947154, "grad_norm": 0.11773227900266647, "learning_rate": 0.0005, "loss": 2.1407, "step": 20110 }, { "epoch": 0.07658168586283809, "grad_norm": 0.1158328652381897, "learning_rate": 0.0005, "loss": 2.1343, "step": 20120 }, { "epoch": 0.07661974833096077, "grad_norm": 0.1100144013762474, "learning_rate": 0.0005, "loss": 2.1441, "step": 20130 }, { "epoch": 0.07665781079908346, "grad_norm": 0.113619863986969, "learning_rate": 0.0005, "loss": 2.1456, "step": 20140 }, { "epoch": 0.07669587326720614, "grad_norm": 0.11518946290016174, "learning_rate": 0.0005, "loss": 2.1295, "step": 20150 }, { "epoch": 0.07673393573532881, "grad_norm": 0.1303129643201828, "learning_rate": 0.0005, "loss": 2.1496, "step": 20160 }, { "epoch": 0.0767719982034515, "grad_norm": 0.10941874235868454, "learning_rate": 0.0005, "loss": 2.117, "step": 20170 }, { "epoch": 0.07681006067157418, "grad_norm": 0.12623172998428345, "learning_rate": 0.0005, "loss": 2.1313, "step": 20180 }, { "epoch": 0.07684812313969687, "grad_norm": 0.1392190307378769, "learning_rate": 0.0005, "loss": 2.1438, "step": 20190 }, { "epoch": 0.07688618560781955, "grad_norm": 0.12022106349468231, "learning_rate": 0.0005, "loss": 2.1476, "step": 20200 }, { "epoch": 0.07692424807594224, "grad_norm": 0.12242559343576431, "learning_rate": 0.0005, "loss": 2.1439, "step": 20210 }, { "epoch": 0.07696231054406492, "grad_norm": 0.1221131682395935, "learning_rate": 0.0005, "loss": 2.1493, "step": 20220 }, { "epoch": 0.07700037301218761, "grad_norm": 0.14182278513908386, "learning_rate": 0.0005, "loss": 2.1629, "step": 20230 }, { "epoch": 0.07703843548031028, "grad_norm": 0.11639798432588577, "learning_rate": 0.0005, "loss": 2.1366, "step": 20240 }, { "epoch": 0.07707649794843296, "grad_norm": 0.1260325163602829, "learning_rate": 0.0005, "loss": 2.1323, "step": 20250 }, { "epoch": 0.07711456041655565, "grad_norm": 0.1177816316485405, "learning_rate": 0.0005, "loss": 2.1332, "step": 20260 }, { "epoch": 0.07715262288467833, "grad_norm": 0.12593698501586914, "learning_rate": 0.0005, "loss": 2.1402, "step": 20270 }, { "epoch": 0.07719068535280102, "grad_norm": 0.12568175792694092, "learning_rate": 0.0005, "loss": 2.1379, "step": 20280 }, { "epoch": 0.0772287478209237, "grad_norm": 0.12054798752069473, "learning_rate": 0.0005, "loss": 2.1264, "step": 20290 }, { "epoch": 0.07726681028904639, "grad_norm": 0.1292949765920639, "learning_rate": 0.0005, "loss": 2.1257, "step": 20300 }, { "epoch": 0.07730487275716906, "grad_norm": 0.11451704055070877, "learning_rate": 0.0005, "loss": 2.1143, "step": 20310 }, { "epoch": 0.07734293522529175, "grad_norm": 0.12161093950271606, "learning_rate": 0.0005, "loss": 2.1402, "step": 20320 }, { "epoch": 0.07738099769341443, "grad_norm": 0.12055949866771698, "learning_rate": 0.0005, "loss": 2.1457, "step": 20330 }, { "epoch": 0.07741906016153711, "grad_norm": 0.133913055062294, "learning_rate": 0.0005, "loss": 2.1354, "step": 20340 }, { "epoch": 0.0774571226296598, "grad_norm": 0.12069426476955414, "learning_rate": 0.0005, "loss": 2.1216, "step": 20350 }, { "epoch": 0.07749518509778248, "grad_norm": 0.11371367424726486, "learning_rate": 0.0005, "loss": 2.1416, "step": 20360 }, { "epoch": 0.07753324756590517, "grad_norm": 0.13321714103221893, "learning_rate": 0.0005, "loss": 2.13, "step": 20370 }, { "epoch": 0.07757131003402784, "grad_norm": 0.11763419955968857, "learning_rate": 0.0005, "loss": 2.149, "step": 20380 }, { "epoch": 0.07760937250215053, "grad_norm": 0.1404077112674713, "learning_rate": 0.0005, "loss": 2.1392, "step": 20390 }, { "epoch": 0.07764743497027321, "grad_norm": 0.11764439940452576, "learning_rate": 0.0005, "loss": 2.1436, "step": 20400 }, { "epoch": 0.0776854974383959, "grad_norm": 0.14752964675426483, "learning_rate": 0.0005, "loss": 2.1255, "step": 20410 }, { "epoch": 0.07772355990651858, "grad_norm": 0.1245315670967102, "learning_rate": 0.0005, "loss": 2.1383, "step": 20420 }, { "epoch": 0.07776162237464126, "grad_norm": 0.1200486496090889, "learning_rate": 0.0005, "loss": 2.1355, "step": 20430 }, { "epoch": 0.07779968484276395, "grad_norm": 0.1259072721004486, "learning_rate": 0.0005, "loss": 2.1631, "step": 20440 }, { "epoch": 0.07783774731088662, "grad_norm": 0.11854323744773865, "learning_rate": 0.0005, "loss": 2.1282, "step": 20450 }, { "epoch": 0.0778758097790093, "grad_norm": 0.14033794403076172, "learning_rate": 0.0005, "loss": 2.1521, "step": 20460 }, { "epoch": 0.07791387224713199, "grad_norm": 0.11894816905260086, "learning_rate": 0.0005, "loss": 2.1466, "step": 20470 }, { "epoch": 0.07795193471525468, "grad_norm": 0.11670060455799103, "learning_rate": 0.0005, "loss": 2.1298, "step": 20480 }, { "epoch": 0.07798999718337736, "grad_norm": 0.1166292354464531, "learning_rate": 0.0005, "loss": 2.1395, "step": 20490 }, { "epoch": 0.07802805965150005, "grad_norm": 0.12800370156764984, "learning_rate": 0.0005, "loss": 2.1474, "step": 20500 }, { "epoch": 0.07806612211962273, "grad_norm": 0.11468150466680527, "learning_rate": 0.0005, "loss": 2.1349, "step": 20510 }, { "epoch": 0.0781041845877454, "grad_norm": 0.12797722220420837, "learning_rate": 0.0005, "loss": 2.1434, "step": 20520 }, { "epoch": 0.07814224705586809, "grad_norm": 0.1424039900302887, "learning_rate": 0.0005, "loss": 2.1278, "step": 20530 }, { "epoch": 0.07818030952399077, "grad_norm": 0.11964821815490723, "learning_rate": 0.0005, "loss": 2.1425, "step": 20540 }, { "epoch": 0.07821837199211346, "grad_norm": 0.13776551187038422, "learning_rate": 0.0005, "loss": 2.146, "step": 20550 }, { "epoch": 0.07825643446023614, "grad_norm": 0.13417179882526398, "learning_rate": 0.0005, "loss": 2.1381, "step": 20560 }, { "epoch": 0.07829449692835883, "grad_norm": 0.13099367916584015, "learning_rate": 0.0005, "loss": 2.1519, "step": 20570 }, { "epoch": 0.07833255939648151, "grad_norm": 0.1310262531042099, "learning_rate": 0.0005, "loss": 2.1336, "step": 20580 }, { "epoch": 0.0783706218646042, "grad_norm": 0.10515132546424866, "learning_rate": 0.0005, "loss": 2.1367, "step": 20590 }, { "epoch": 0.07840868433272687, "grad_norm": 0.13627183437347412, "learning_rate": 0.0005, "loss": 2.1488, "step": 20600 }, { "epoch": 0.07844674680084955, "grad_norm": 0.12111663818359375, "learning_rate": 0.0005, "loss": 2.1482, "step": 20610 }, { "epoch": 0.07848480926897224, "grad_norm": 0.11795415729284286, "learning_rate": 0.0005, "loss": 2.1394, "step": 20620 }, { "epoch": 0.07852287173709492, "grad_norm": 0.11769380420446396, "learning_rate": 0.0005, "loss": 2.1698, "step": 20630 }, { "epoch": 0.0785609342052176, "grad_norm": 0.12819325923919678, "learning_rate": 0.0005, "loss": 2.1428, "step": 20640 }, { "epoch": 0.07859899667334029, "grad_norm": 0.135583758354187, "learning_rate": 0.0005, "loss": 2.1341, "step": 20650 }, { "epoch": 0.07863705914146298, "grad_norm": 0.13728412985801697, "learning_rate": 0.0005, "loss": 2.1338, "step": 20660 }, { "epoch": 0.07867512160958565, "grad_norm": 0.11297822743654251, "learning_rate": 0.0005, "loss": 2.143, "step": 20670 }, { "epoch": 0.07871318407770833, "grad_norm": 0.13838008046150208, "learning_rate": 0.0005, "loss": 2.1469, "step": 20680 }, { "epoch": 0.07875124654583102, "grad_norm": 0.11964000761508942, "learning_rate": 0.0005, "loss": 2.1286, "step": 20690 }, { "epoch": 0.0787893090139537, "grad_norm": 0.11306457221508026, "learning_rate": 0.0005, "loss": 2.1383, "step": 20700 }, { "epoch": 0.07882737148207639, "grad_norm": 0.1265837848186493, "learning_rate": 0.0005, "loss": 2.1495, "step": 20710 }, { "epoch": 0.07886543395019907, "grad_norm": 0.13833211362361908, "learning_rate": 0.0005, "loss": 2.1489, "step": 20720 }, { "epoch": 0.07890349641832176, "grad_norm": 0.12850770354270935, "learning_rate": 0.0005, "loss": 2.1381, "step": 20730 }, { "epoch": 0.07894155888644443, "grad_norm": 0.125825434923172, "learning_rate": 0.0005, "loss": 2.1485, "step": 20740 }, { "epoch": 0.07897962135456711, "grad_norm": 0.11365757882595062, "learning_rate": 0.0005, "loss": 2.1355, "step": 20750 }, { "epoch": 0.0790176838226898, "grad_norm": 0.11496775597333908, "learning_rate": 0.0005, "loss": 2.141, "step": 20760 }, { "epoch": 0.07905574629081248, "grad_norm": 0.11879292875528336, "learning_rate": 0.0005, "loss": 2.1573, "step": 20770 }, { "epoch": 0.07909380875893517, "grad_norm": 0.4306030571460724, "learning_rate": 0.0005, "loss": 2.1394, "step": 20780 }, { "epoch": 0.07913187122705785, "grad_norm": 0.12225326895713806, "learning_rate": 0.0005, "loss": 2.1462, "step": 20790 }, { "epoch": 0.07916993369518054, "grad_norm": 0.10665999352931976, "learning_rate": 0.0005, "loss": 2.1351, "step": 20800 }, { "epoch": 0.07920799616330321, "grad_norm": 0.11364707350730896, "learning_rate": 0.0005, "loss": 2.1286, "step": 20810 }, { "epoch": 0.07924605863142589, "grad_norm": 0.11900816112756729, "learning_rate": 0.0005, "loss": 2.1308, "step": 20820 }, { "epoch": 0.07928412109954858, "grad_norm": 0.1321338266134262, "learning_rate": 0.0005, "loss": 2.1249, "step": 20830 }, { "epoch": 0.07932218356767126, "grad_norm": 0.11729200184345245, "learning_rate": 0.0005, "loss": 2.1452, "step": 20840 }, { "epoch": 0.07936024603579395, "grad_norm": 0.1253681480884552, "learning_rate": 0.0005, "loss": 2.1412, "step": 20850 }, { "epoch": 0.07939830850391663, "grad_norm": 0.10999830067157745, "learning_rate": 0.0005, "loss": 2.1467, "step": 20860 }, { "epoch": 0.07943637097203932, "grad_norm": 0.1298547387123108, "learning_rate": 0.0005, "loss": 2.1387, "step": 20870 }, { "epoch": 0.07947443344016199, "grad_norm": 0.11368982493877411, "learning_rate": 0.0005, "loss": 2.1346, "step": 20880 }, { "epoch": 0.07951249590828467, "grad_norm": 0.10747841745615005, "learning_rate": 0.0005, "loss": 2.1528, "step": 20890 }, { "epoch": 0.07955055837640736, "grad_norm": 0.12383821606636047, "learning_rate": 0.0005, "loss": 2.1518, "step": 20900 }, { "epoch": 0.07958862084453004, "grad_norm": 0.1289435178041458, "learning_rate": 0.0005, "loss": 2.1321, "step": 20910 }, { "epoch": 0.07962668331265273, "grad_norm": 0.1302112489938736, "learning_rate": 0.0005, "loss": 2.1442, "step": 20920 }, { "epoch": 0.07966474578077541, "grad_norm": 0.11910293251276016, "learning_rate": 0.0005, "loss": 2.1392, "step": 20930 }, { "epoch": 0.0797028082488981, "grad_norm": 0.12185361236333847, "learning_rate": 0.0005, "loss": 2.1481, "step": 20940 }, { "epoch": 0.07974087071702077, "grad_norm": 0.12269507348537445, "learning_rate": 0.0005, "loss": 2.1388, "step": 20950 }, { "epoch": 0.07977893318514345, "grad_norm": 0.12856151163578033, "learning_rate": 0.0005, "loss": 2.1424, "step": 20960 }, { "epoch": 0.07981699565326614, "grad_norm": 0.11458587646484375, "learning_rate": 0.0005, "loss": 2.1272, "step": 20970 }, { "epoch": 0.07985505812138882, "grad_norm": 0.11583131551742554, "learning_rate": 0.0005, "loss": 2.1291, "step": 20980 }, { "epoch": 0.07989312058951151, "grad_norm": 0.121613048017025, "learning_rate": 0.0005, "loss": 2.1466, "step": 20990 }, { "epoch": 0.07993118305763419, "grad_norm": 0.11847091466188431, "learning_rate": 0.0005, "loss": 2.1403, "step": 21000 }, { "epoch": 0.07996924552575688, "grad_norm": 0.13026578724384308, "learning_rate": 0.0005, "loss": 2.1493, "step": 21010 }, { "epoch": 0.08000730799387956, "grad_norm": 0.10834717005491257, "learning_rate": 0.0005, "loss": 2.1306, "step": 21020 }, { "epoch": 0.08004537046200223, "grad_norm": 0.12242411822080612, "learning_rate": 0.0005, "loss": 2.1312, "step": 21030 }, { "epoch": 0.08008343293012492, "grad_norm": 0.11778593808412552, "learning_rate": 0.0005, "loss": 2.1484, "step": 21040 }, { "epoch": 0.0801214953982476, "grad_norm": 0.11754197627305984, "learning_rate": 0.0005, "loss": 2.1465, "step": 21050 }, { "epoch": 0.08015955786637029, "grad_norm": 0.11059972643852234, "learning_rate": 0.0005, "loss": 2.1415, "step": 21060 }, { "epoch": 0.08019762033449297, "grad_norm": 0.12480046600103378, "learning_rate": 0.0005, "loss": 2.1269, "step": 21070 }, { "epoch": 0.08023568280261566, "grad_norm": 0.10805027186870575, "learning_rate": 0.0005, "loss": 2.137, "step": 21080 }, { "epoch": 0.08027374527073834, "grad_norm": 0.12280254811048508, "learning_rate": 0.0005, "loss": 2.1391, "step": 21090 }, { "epoch": 0.08031180773886101, "grad_norm": 0.12043575197458267, "learning_rate": 0.0005, "loss": 2.1338, "step": 21100 }, { "epoch": 0.0803498702069837, "grad_norm": 0.11959745734930038, "learning_rate": 0.0005, "loss": 2.1307, "step": 21110 }, { "epoch": 0.08038793267510638, "grad_norm": 0.11902160197496414, "learning_rate": 0.0005, "loss": 2.1368, "step": 21120 }, { "epoch": 0.08042599514322907, "grad_norm": 0.12124022096395493, "learning_rate": 0.0005, "loss": 2.1381, "step": 21130 }, { "epoch": 0.08046405761135175, "grad_norm": 0.12325721979141235, "learning_rate": 0.0005, "loss": 2.1452, "step": 21140 }, { "epoch": 0.08050212007947444, "grad_norm": 0.12744925916194916, "learning_rate": 0.0005, "loss": 2.1409, "step": 21150 }, { "epoch": 0.08054018254759712, "grad_norm": 0.12019761651754379, "learning_rate": 0.0005, "loss": 2.1462, "step": 21160 }, { "epoch": 0.0805782450157198, "grad_norm": 0.1297874003648758, "learning_rate": 0.0005, "loss": 2.1299, "step": 21170 }, { "epoch": 0.08061630748384248, "grad_norm": 0.11909880489110947, "learning_rate": 0.0005, "loss": 2.1451, "step": 21180 }, { "epoch": 0.08065436995196516, "grad_norm": 0.11740684509277344, "learning_rate": 0.0005, "loss": 2.1328, "step": 21190 }, { "epoch": 0.08069243242008785, "grad_norm": 0.11909528821706772, "learning_rate": 0.0005, "loss": 2.1486, "step": 21200 }, { "epoch": 0.08073049488821053, "grad_norm": 0.12825709581375122, "learning_rate": 0.0005, "loss": 2.146, "step": 21210 }, { "epoch": 0.08076855735633322, "grad_norm": 0.12015367299318314, "learning_rate": 0.0005, "loss": 2.1368, "step": 21220 }, { "epoch": 0.0808066198244559, "grad_norm": 0.12286186218261719, "learning_rate": 0.0005, "loss": 2.1518, "step": 21230 }, { "epoch": 0.08084468229257857, "grad_norm": 0.13856559991836548, "learning_rate": 0.0005, "loss": 2.1384, "step": 21240 }, { "epoch": 0.08088274476070126, "grad_norm": 0.12698425352573395, "learning_rate": 0.0005, "loss": 2.1392, "step": 21250 }, { "epoch": 0.08092080722882394, "grad_norm": 0.12557373940944672, "learning_rate": 0.0005, "loss": 2.1343, "step": 21260 }, { "epoch": 0.08095886969694663, "grad_norm": 0.10690121352672577, "learning_rate": 0.0005, "loss": 2.1462, "step": 21270 }, { "epoch": 0.08099693216506931, "grad_norm": 0.10676196217536926, "learning_rate": 0.0005, "loss": 2.1276, "step": 21280 }, { "epoch": 0.081034994633192, "grad_norm": 0.13777486979961395, "learning_rate": 0.0005, "loss": 2.1399, "step": 21290 }, { "epoch": 0.08107305710131468, "grad_norm": 0.1320984661579132, "learning_rate": 0.0005, "loss": 2.138, "step": 21300 }, { "epoch": 0.08111111956943735, "grad_norm": 0.11867671459913254, "learning_rate": 0.0005, "loss": 2.1424, "step": 21310 }, { "epoch": 0.08114918203756004, "grad_norm": 0.12420900166034698, "learning_rate": 0.0005, "loss": 2.1453, "step": 21320 }, { "epoch": 0.08118724450568272, "grad_norm": 0.12168706208467484, "learning_rate": 0.0005, "loss": 2.1273, "step": 21330 }, { "epoch": 0.08122530697380541, "grad_norm": 0.11081932485103607, "learning_rate": 0.0005, "loss": 2.1371, "step": 21340 }, { "epoch": 0.0812633694419281, "grad_norm": 0.1164914146065712, "learning_rate": 0.0005, "loss": 2.1421, "step": 21350 }, { "epoch": 0.08130143191005078, "grad_norm": 0.11893417686223984, "learning_rate": 0.0005, "loss": 2.1468, "step": 21360 }, { "epoch": 0.08133949437817346, "grad_norm": 0.14425797760486603, "learning_rate": 0.0005, "loss": 2.144, "step": 21370 }, { "epoch": 0.08137755684629615, "grad_norm": 0.12183461338281631, "learning_rate": 0.0005, "loss": 2.1391, "step": 21380 }, { "epoch": 0.08141561931441882, "grad_norm": 0.12465500086545944, "learning_rate": 0.0005, "loss": 2.1462, "step": 21390 }, { "epoch": 0.0814536817825415, "grad_norm": 0.14262641966342926, "learning_rate": 0.0005, "loss": 2.1458, "step": 21400 }, { "epoch": 0.08149174425066419, "grad_norm": 0.12002576887607574, "learning_rate": 0.0005, "loss": 2.1357, "step": 21410 }, { "epoch": 0.08152980671878687, "grad_norm": 0.12708404660224915, "learning_rate": 0.0005, "loss": 2.1402, "step": 21420 }, { "epoch": 0.08156786918690956, "grad_norm": 0.131977841258049, "learning_rate": 0.0005, "loss": 2.1457, "step": 21430 }, { "epoch": 0.08160593165503224, "grad_norm": 0.1188463419675827, "learning_rate": 0.0005, "loss": 2.1442, "step": 21440 }, { "epoch": 0.08164399412315493, "grad_norm": 0.12172247469425201, "learning_rate": 0.0005, "loss": 2.152, "step": 21450 }, { "epoch": 0.0816820565912776, "grad_norm": 0.1287251114845276, "learning_rate": 0.0005, "loss": 2.1334, "step": 21460 }, { "epoch": 0.08172011905940028, "grad_norm": 0.12985315918922424, "learning_rate": 0.0005, "loss": 2.1349, "step": 21470 }, { "epoch": 0.08175818152752297, "grad_norm": 0.136166512966156, "learning_rate": 0.0005, "loss": 2.1602, "step": 21480 }, { "epoch": 0.08179624399564565, "grad_norm": 0.12115434557199478, "learning_rate": 0.0005, "loss": 2.1456, "step": 21490 }, { "epoch": 0.08183430646376834, "grad_norm": 0.11187921464443207, "learning_rate": 0.0005, "loss": 2.1413, "step": 21500 }, { "epoch": 0.08187236893189102, "grad_norm": 0.13115552067756653, "learning_rate": 0.0005, "loss": 2.1371, "step": 21510 }, { "epoch": 0.08191043140001371, "grad_norm": 0.12254226207733154, "learning_rate": 0.0005, "loss": 2.1365, "step": 21520 }, { "epoch": 0.08194849386813638, "grad_norm": 0.11350936442613602, "learning_rate": 0.0005, "loss": 2.1428, "step": 21530 }, { "epoch": 0.08198655633625906, "grad_norm": 0.13715927302837372, "learning_rate": 0.0005, "loss": 2.1264, "step": 21540 }, { "epoch": 0.08202461880438175, "grad_norm": 0.11191634833812714, "learning_rate": 0.0005, "loss": 2.1252, "step": 21550 }, { "epoch": 0.08206268127250443, "grad_norm": 0.12232932448387146, "learning_rate": 0.0005, "loss": 2.1449, "step": 21560 }, { "epoch": 0.08210074374062712, "grad_norm": 0.10877176374197006, "learning_rate": 0.0005, "loss": 2.1313, "step": 21570 }, { "epoch": 0.0821388062087498, "grad_norm": 0.11842214316129684, "learning_rate": 0.0005, "loss": 2.1302, "step": 21580 }, { "epoch": 0.08217686867687249, "grad_norm": 0.1253902018070221, "learning_rate": 0.0005, "loss": 2.1417, "step": 21590 }, { "epoch": 0.08221493114499516, "grad_norm": 0.12462165206670761, "learning_rate": 0.0005, "loss": 2.1442, "step": 21600 }, { "epoch": 0.08225299361311784, "grad_norm": 0.11757472157478333, "learning_rate": 0.0005, "loss": 2.1403, "step": 21610 }, { "epoch": 0.08229105608124053, "grad_norm": 0.12682631611824036, "learning_rate": 0.0005, "loss": 2.1333, "step": 21620 }, { "epoch": 0.08232911854936321, "grad_norm": 0.12222106754779816, "learning_rate": 0.0005, "loss": 2.1496, "step": 21630 }, { "epoch": 0.0823671810174859, "grad_norm": 0.12464617937803268, "learning_rate": 0.0005, "loss": 2.1381, "step": 21640 }, { "epoch": 0.08240524348560858, "grad_norm": 0.11862632632255554, "learning_rate": 0.0005, "loss": 2.1295, "step": 21650 }, { "epoch": 0.08244330595373127, "grad_norm": 0.1316101849079132, "learning_rate": 0.0005, "loss": 2.1333, "step": 21660 }, { "epoch": 0.08248136842185394, "grad_norm": 0.12959465384483337, "learning_rate": 0.0005, "loss": 2.1342, "step": 21670 }, { "epoch": 0.08251943088997662, "grad_norm": 0.1375904530286789, "learning_rate": 0.0005, "loss": 2.1326, "step": 21680 }, { "epoch": 0.08255749335809931, "grad_norm": 0.11778242141008377, "learning_rate": 0.0005, "loss": 2.146, "step": 21690 }, { "epoch": 0.082595555826222, "grad_norm": 0.13062040507793427, "learning_rate": 0.0005, "loss": 2.1527, "step": 21700 }, { "epoch": 0.08263361829434468, "grad_norm": 0.11083482950925827, "learning_rate": 0.0005, "loss": 2.1429, "step": 21710 }, { "epoch": 0.08267168076246736, "grad_norm": 0.11338507384061813, "learning_rate": 0.0005, "loss": 2.1438, "step": 21720 }, { "epoch": 0.08270974323059005, "grad_norm": 0.12225829064846039, "learning_rate": 0.0005, "loss": 2.1371, "step": 21730 }, { "epoch": 0.08274780569871273, "grad_norm": 0.13471394777297974, "learning_rate": 0.0005, "loss": 2.1512, "step": 21740 }, { "epoch": 0.0827858681668354, "grad_norm": 0.12748363614082336, "learning_rate": 0.0005, "loss": 2.1467, "step": 21750 }, { "epoch": 0.08282393063495809, "grad_norm": 0.15540693700313568, "learning_rate": 0.0005, "loss": 2.1334, "step": 21760 }, { "epoch": 0.08286199310308077, "grad_norm": 0.11434046924114227, "learning_rate": 0.0005, "loss": 2.1336, "step": 21770 }, { "epoch": 0.08290005557120346, "grad_norm": 0.12636405229568481, "learning_rate": 0.0005, "loss": 2.1524, "step": 21780 }, { "epoch": 0.08293811803932614, "grad_norm": 0.1177835464477539, "learning_rate": 0.0005, "loss": 2.1333, "step": 21790 }, { "epoch": 0.08297618050744883, "grad_norm": 0.11512091010808945, "learning_rate": 0.0005, "loss": 2.1348, "step": 21800 }, { "epoch": 0.08301424297557151, "grad_norm": 0.10817056149244308, "learning_rate": 0.0005, "loss": 2.1392, "step": 21810 }, { "epoch": 0.08305230544369419, "grad_norm": 0.1211874708533287, "learning_rate": 0.0005, "loss": 2.1328, "step": 21820 }, { "epoch": 0.08309036791181687, "grad_norm": 0.12446257472038269, "learning_rate": 0.0005, "loss": 2.1461, "step": 21830 }, { "epoch": 0.08312843037993956, "grad_norm": 0.11935817450284958, "learning_rate": 0.0005, "loss": 2.1409, "step": 21840 }, { "epoch": 0.08316649284806224, "grad_norm": 0.12793755531311035, "learning_rate": 0.0005, "loss": 2.1257, "step": 21850 }, { "epoch": 0.08320455531618492, "grad_norm": 0.12205001711845398, "learning_rate": 0.0005, "loss": 2.1416, "step": 21860 }, { "epoch": 0.08324261778430761, "grad_norm": 0.11728016287088394, "learning_rate": 0.0005, "loss": 2.1487, "step": 21870 }, { "epoch": 0.0832806802524303, "grad_norm": 0.12212223559617996, "learning_rate": 0.0005, "loss": 2.1446, "step": 21880 }, { "epoch": 0.08331874272055297, "grad_norm": 0.12426520138978958, "learning_rate": 0.0005, "loss": 2.1465, "step": 21890 }, { "epoch": 0.08335680518867565, "grad_norm": 0.1372053176164627, "learning_rate": 0.0005, "loss": 2.1311, "step": 21900 }, { "epoch": 0.08339486765679834, "grad_norm": 0.1247352659702301, "learning_rate": 0.0005, "loss": 2.1416, "step": 21910 }, { "epoch": 0.08343293012492102, "grad_norm": 0.11587857455015182, "learning_rate": 0.0005, "loss": 2.136, "step": 21920 }, { "epoch": 0.0834709925930437, "grad_norm": 0.11178795248270035, "learning_rate": 0.0005, "loss": 2.1435, "step": 21930 }, { "epoch": 0.08350905506116639, "grad_norm": 0.12461380660533905, "learning_rate": 0.0005, "loss": 2.1522, "step": 21940 }, { "epoch": 0.08354711752928907, "grad_norm": 0.10774627327919006, "learning_rate": 0.0005, "loss": 2.1438, "step": 21950 }, { "epoch": 0.08358517999741175, "grad_norm": 0.12161616235971451, "learning_rate": 0.0005, "loss": 2.1341, "step": 21960 }, { "epoch": 0.08362324246553443, "grad_norm": 0.12391353398561478, "learning_rate": 0.0005, "loss": 2.1363, "step": 21970 }, { "epoch": 0.08366130493365712, "grad_norm": 0.1828557401895523, "learning_rate": 0.0005, "loss": 2.1303, "step": 21980 }, { "epoch": 0.0836993674017798, "grad_norm": 0.11445656418800354, "learning_rate": 0.0005, "loss": 2.1481, "step": 21990 }, { "epoch": 0.08373742986990249, "grad_norm": 0.12685362994670868, "learning_rate": 0.0005, "loss": 2.1441, "step": 22000 }, { "epoch": 0.08377549233802517, "grad_norm": 0.14515061676502228, "learning_rate": 0.0005, "loss": 2.1318, "step": 22010 }, { "epoch": 0.08381355480614786, "grad_norm": 0.11717475205659866, "learning_rate": 0.0005, "loss": 2.1369, "step": 22020 }, { "epoch": 0.08385161727427053, "grad_norm": 0.1403116136789322, "learning_rate": 0.0005, "loss": 2.1442, "step": 22030 }, { "epoch": 0.08388967974239321, "grad_norm": 0.11670785397291183, "learning_rate": 0.0005, "loss": 2.1373, "step": 22040 }, { "epoch": 0.0839277422105159, "grad_norm": 0.13050960004329681, "learning_rate": 0.0005, "loss": 2.1462, "step": 22050 }, { "epoch": 0.08396580467863858, "grad_norm": 0.11913710087537766, "learning_rate": 0.0005, "loss": 2.1408, "step": 22060 }, { "epoch": 0.08400386714676127, "grad_norm": 0.12010898441076279, "learning_rate": 0.0005, "loss": 2.149, "step": 22070 }, { "epoch": 0.08404192961488395, "grad_norm": 0.122723788022995, "learning_rate": 0.0005, "loss": 2.1456, "step": 22080 }, { "epoch": 0.08407999208300664, "grad_norm": 0.12780143320560455, "learning_rate": 0.0005, "loss": 2.1359, "step": 22090 }, { "epoch": 0.0841180545511293, "grad_norm": 0.11837394535541534, "learning_rate": 0.0005, "loss": 2.1553, "step": 22100 }, { "epoch": 0.08415611701925199, "grad_norm": 0.1171930804848671, "learning_rate": 0.0005, "loss": 2.1418, "step": 22110 }, { "epoch": 0.08419417948737468, "grad_norm": 0.12089846283197403, "learning_rate": 0.0005, "loss": 2.1548, "step": 22120 }, { "epoch": 0.08423224195549736, "grad_norm": 0.11761198937892914, "learning_rate": 0.0005, "loss": 2.133, "step": 22130 }, { "epoch": 0.08427030442362005, "grad_norm": 0.11975211650133133, "learning_rate": 0.0005, "loss": 2.1326, "step": 22140 }, { "epoch": 0.08430836689174273, "grad_norm": 0.12231068313121796, "learning_rate": 0.0005, "loss": 2.1405, "step": 22150 }, { "epoch": 0.08434642935986542, "grad_norm": 0.12235705554485321, "learning_rate": 0.0005, "loss": 2.1439, "step": 22160 }, { "epoch": 0.0843844918279881, "grad_norm": 0.12323001772165298, "learning_rate": 0.0005, "loss": 2.1469, "step": 22170 }, { "epoch": 0.08442255429611077, "grad_norm": 0.14007116854190826, "learning_rate": 0.0005, "loss": 2.1476, "step": 22180 }, { "epoch": 0.08446061676423346, "grad_norm": 0.11277811229228973, "learning_rate": 0.0005, "loss": 2.1378, "step": 22190 }, { "epoch": 0.08449867923235614, "grad_norm": 0.12346283346414566, "learning_rate": 0.0005, "loss": 2.1359, "step": 22200 }, { "epoch": 0.08453674170047883, "grad_norm": 0.1161845400929451, "learning_rate": 0.0005, "loss": 2.1422, "step": 22210 }, { "epoch": 0.08457480416860151, "grad_norm": 0.11737547814846039, "learning_rate": 0.0005, "loss": 2.1305, "step": 22220 }, { "epoch": 0.0846128666367242, "grad_norm": 0.1171225979924202, "learning_rate": 0.0005, "loss": 2.1319, "step": 22230 }, { "epoch": 0.08465092910484688, "grad_norm": 0.11654467135667801, "learning_rate": 0.0005, "loss": 2.1409, "step": 22240 }, { "epoch": 0.08468899157296955, "grad_norm": 0.11299663782119751, "learning_rate": 0.0005, "loss": 2.1303, "step": 22250 }, { "epoch": 0.08472705404109224, "grad_norm": 0.11551041901111603, "learning_rate": 0.0005, "loss": 2.1443, "step": 22260 }, { "epoch": 0.08476511650921492, "grad_norm": 0.11503621935844421, "learning_rate": 0.0005, "loss": 2.1582, "step": 22270 }, { "epoch": 0.0848031789773376, "grad_norm": 0.12097814679145813, "learning_rate": 0.0005, "loss": 2.1508, "step": 22280 }, { "epoch": 0.08484124144546029, "grad_norm": 0.12010312080383301, "learning_rate": 0.0005, "loss": 2.1479, "step": 22290 }, { "epoch": 0.08487930391358298, "grad_norm": 0.12147096544504166, "learning_rate": 0.0005, "loss": 2.1339, "step": 22300 }, { "epoch": 0.08491736638170566, "grad_norm": 0.12864457070827484, "learning_rate": 0.0005, "loss": 2.135, "step": 22310 }, { "epoch": 0.08495542884982833, "grad_norm": 0.12055821716785431, "learning_rate": 0.0005, "loss": 2.1285, "step": 22320 }, { "epoch": 0.08499349131795102, "grad_norm": 0.11782816797494888, "learning_rate": 0.0005, "loss": 2.1283, "step": 22330 }, { "epoch": 0.0850315537860737, "grad_norm": 0.1284317523241043, "learning_rate": 0.0005, "loss": 2.1497, "step": 22340 }, { "epoch": 0.08506961625419639, "grad_norm": 0.1229197159409523, "learning_rate": 0.0005, "loss": 2.1426, "step": 22350 }, { "epoch": 0.08510767872231907, "grad_norm": 0.11341395974159241, "learning_rate": 0.0005, "loss": 2.1591, "step": 22360 }, { "epoch": 0.08514574119044176, "grad_norm": 0.13056255877017975, "learning_rate": 0.0005, "loss": 2.1327, "step": 22370 }, { "epoch": 0.08518380365856444, "grad_norm": 0.13087743520736694, "learning_rate": 0.0005, "loss": 2.1489, "step": 22380 }, { "epoch": 0.08522186612668711, "grad_norm": 0.12757542729377747, "learning_rate": 0.0005, "loss": 2.1356, "step": 22390 }, { "epoch": 0.0852599285948098, "grad_norm": 0.14698006212711334, "learning_rate": 0.0005, "loss": 2.1287, "step": 22400 }, { "epoch": 0.08529799106293248, "grad_norm": 0.11643604189157486, "learning_rate": 0.0005, "loss": 2.1394, "step": 22410 }, { "epoch": 0.08533605353105517, "grad_norm": 0.12725846469402313, "learning_rate": 0.0005, "loss": 2.1289, "step": 22420 }, { "epoch": 0.08537411599917785, "grad_norm": 0.14252601563930511, "learning_rate": 0.0005, "loss": 2.1531, "step": 22430 }, { "epoch": 0.08541217846730054, "grad_norm": 0.11785967648029327, "learning_rate": 0.0005, "loss": 2.1487, "step": 22440 }, { "epoch": 0.08545024093542322, "grad_norm": 0.11564429104328156, "learning_rate": 0.0005, "loss": 2.1399, "step": 22450 }, { "epoch": 0.08548830340354589, "grad_norm": 0.13538044691085815, "learning_rate": 0.0005, "loss": 2.1361, "step": 22460 }, { "epoch": 0.08552636587166858, "grad_norm": 0.11665502935647964, "learning_rate": 0.0005, "loss": 2.1389, "step": 22470 }, { "epoch": 0.08556442833979126, "grad_norm": 0.12820222973823547, "learning_rate": 0.0005, "loss": 2.1382, "step": 22480 }, { "epoch": 0.08560249080791395, "grad_norm": 0.1172187402844429, "learning_rate": 0.0005, "loss": 2.146, "step": 22490 }, { "epoch": 0.08564055327603663, "grad_norm": 0.15104034543037415, "learning_rate": 0.0005, "loss": 2.1468, "step": 22500 }, { "epoch": 0.08567861574415932, "grad_norm": 0.13248179852962494, "learning_rate": 0.0005, "loss": 2.1465, "step": 22510 }, { "epoch": 0.085716678212282, "grad_norm": 0.14016737043857574, "learning_rate": 0.0005, "loss": 2.1321, "step": 22520 }, { "epoch": 0.08575474068040469, "grad_norm": 0.1385992020368576, "learning_rate": 0.0005, "loss": 2.1304, "step": 22530 }, { "epoch": 0.08579280314852736, "grad_norm": 0.12560173869132996, "learning_rate": 0.0005, "loss": 2.1529, "step": 22540 }, { "epoch": 0.08583086561665004, "grad_norm": 0.12612128257751465, "learning_rate": 0.0005, "loss": 2.1324, "step": 22550 }, { "epoch": 0.08586892808477273, "grad_norm": 0.13057899475097656, "learning_rate": 0.0005, "loss": 2.1478, "step": 22560 }, { "epoch": 0.08590699055289541, "grad_norm": 0.1250109225511551, "learning_rate": 0.0005, "loss": 2.1489, "step": 22570 }, { "epoch": 0.0859450530210181, "grad_norm": 0.12412979453802109, "learning_rate": 0.0005, "loss": 2.147, "step": 22580 }, { "epoch": 0.08598311548914078, "grad_norm": 0.13171933591365814, "learning_rate": 0.0005, "loss": 2.1411, "step": 22590 }, { "epoch": 0.08602117795726347, "grad_norm": 0.11354347318410873, "learning_rate": 0.0005, "loss": 2.1384, "step": 22600 }, { "epoch": 0.08605924042538614, "grad_norm": 0.1296190619468689, "learning_rate": 0.0005, "loss": 2.1353, "step": 22610 }, { "epoch": 0.08609730289350882, "grad_norm": 0.12379322201013565, "learning_rate": 0.0005, "loss": 2.1498, "step": 22620 }, { "epoch": 0.08613536536163151, "grad_norm": 0.11893151700496674, "learning_rate": 0.0005, "loss": 2.1413, "step": 22630 }, { "epoch": 0.08617342782975419, "grad_norm": 0.11662375926971436, "learning_rate": 0.0005, "loss": 2.1234, "step": 22640 }, { "epoch": 0.08621149029787688, "grad_norm": 0.12412435561418533, "learning_rate": 0.0005, "loss": 2.1458, "step": 22650 }, { "epoch": 0.08624955276599956, "grad_norm": 0.12220267951488495, "learning_rate": 0.0005, "loss": 2.1402, "step": 22660 }, { "epoch": 0.08628761523412225, "grad_norm": 0.11996627599000931, "learning_rate": 0.0005, "loss": 2.1522, "step": 22670 }, { "epoch": 0.08632567770224492, "grad_norm": 0.13411886990070343, "learning_rate": 0.0005, "loss": 2.1463, "step": 22680 }, { "epoch": 0.0863637401703676, "grad_norm": 0.20231394469738007, "learning_rate": 0.0005, "loss": 2.1243, "step": 22690 }, { "epoch": 0.08640180263849029, "grad_norm": 0.11263010650873184, "learning_rate": 0.0005, "loss": 2.147, "step": 22700 }, { "epoch": 0.08643986510661297, "grad_norm": 0.11748948693275452, "learning_rate": 0.0005, "loss": 2.1445, "step": 22710 }, { "epoch": 0.08647792757473566, "grad_norm": 0.11988667398691177, "learning_rate": 0.0005, "loss": 2.153, "step": 22720 }, { "epoch": 0.08651599004285834, "grad_norm": 0.11542542278766632, "learning_rate": 0.0005, "loss": 2.1281, "step": 22730 }, { "epoch": 0.08655405251098103, "grad_norm": 0.11557333171367645, "learning_rate": 0.0005, "loss": 2.138, "step": 22740 }, { "epoch": 0.0865921149791037, "grad_norm": 0.13377471268177032, "learning_rate": 0.0005, "loss": 2.1306, "step": 22750 }, { "epoch": 0.08663017744722638, "grad_norm": 0.13291336596012115, "learning_rate": 0.0005, "loss": 2.1527, "step": 22760 }, { "epoch": 0.08666823991534907, "grad_norm": 0.13353805243968964, "learning_rate": 0.0005, "loss": 2.145, "step": 22770 }, { "epoch": 0.08670630238347175, "grad_norm": 0.11050818860530853, "learning_rate": 0.0005, "loss": 2.1372, "step": 22780 }, { "epoch": 0.08674436485159444, "grad_norm": 0.10883437842130661, "learning_rate": 0.0005, "loss": 2.1329, "step": 22790 }, { "epoch": 0.08678242731971712, "grad_norm": 0.12287956476211548, "learning_rate": 0.0005, "loss": 2.1584, "step": 22800 }, { "epoch": 0.08682048978783981, "grad_norm": 0.11929097026586533, "learning_rate": 0.0005, "loss": 2.1261, "step": 22810 }, { "epoch": 0.08685855225596248, "grad_norm": 0.11532725393772125, "learning_rate": 0.0005, "loss": 2.1525, "step": 22820 }, { "epoch": 0.08689661472408516, "grad_norm": 0.12584735453128815, "learning_rate": 0.0005, "loss": 2.147, "step": 22830 }, { "epoch": 0.08693467719220785, "grad_norm": 0.13039319217205048, "learning_rate": 0.0005, "loss": 2.1388, "step": 22840 }, { "epoch": 0.08697273966033053, "grad_norm": 0.11363296210765839, "learning_rate": 0.0005, "loss": 2.1484, "step": 22850 }, { "epoch": 0.08701080212845322, "grad_norm": 0.11446147412061691, "learning_rate": 0.0005, "loss": 2.1324, "step": 22860 }, { "epoch": 0.0870488645965759, "grad_norm": 0.13208778202533722, "learning_rate": 0.0005, "loss": 2.1387, "step": 22870 }, { "epoch": 0.08708692706469859, "grad_norm": 0.11517845839262009, "learning_rate": 0.0005, "loss": 2.1387, "step": 22880 }, { "epoch": 0.08712498953282127, "grad_norm": 0.11906059831380844, "learning_rate": 0.0005, "loss": 2.1388, "step": 22890 }, { "epoch": 0.08716305200094394, "grad_norm": 0.1303129941225052, "learning_rate": 0.0005, "loss": 2.1382, "step": 22900 }, { "epoch": 0.08720111446906663, "grad_norm": 0.11496607959270477, "learning_rate": 0.0005, "loss": 2.1438, "step": 22910 }, { "epoch": 0.08723917693718931, "grad_norm": 0.11427242308855057, "learning_rate": 0.0005, "loss": 2.1384, "step": 22920 }, { "epoch": 0.087277239405312, "grad_norm": 0.11897694319486618, "learning_rate": 0.0005, "loss": 2.1363, "step": 22930 }, { "epoch": 0.08731530187343468, "grad_norm": 0.11437114328145981, "learning_rate": 0.0005, "loss": 2.1419, "step": 22940 }, { "epoch": 0.08735336434155737, "grad_norm": 0.12996020913124084, "learning_rate": 0.0005, "loss": 2.1606, "step": 22950 }, { "epoch": 0.08739142680968005, "grad_norm": 0.11425703763961792, "learning_rate": 0.0005, "loss": 2.1427, "step": 22960 }, { "epoch": 0.08742948927780272, "grad_norm": 0.12618638575077057, "learning_rate": 0.0005, "loss": 2.1387, "step": 22970 }, { "epoch": 0.08746755174592541, "grad_norm": 0.1312779039144516, "learning_rate": 0.0005, "loss": 2.1455, "step": 22980 }, { "epoch": 0.0875056142140481, "grad_norm": 0.11608152091503143, "learning_rate": 0.0005, "loss": 2.1425, "step": 22990 }, { "epoch": 0.08754367668217078, "grad_norm": 0.1256219446659088, "learning_rate": 0.0005, "loss": 2.1196, "step": 23000 }, { "epoch": 0.08758173915029346, "grad_norm": 0.1317347139120102, "learning_rate": 0.0005, "loss": 2.1361, "step": 23010 }, { "epoch": 0.08761980161841615, "grad_norm": 0.11706885695457458, "learning_rate": 0.0005, "loss": 2.1309, "step": 23020 }, { "epoch": 0.08765786408653883, "grad_norm": 0.10700934380292892, "learning_rate": 0.0005, "loss": 2.1316, "step": 23030 }, { "epoch": 0.0876959265546615, "grad_norm": 0.11388903111219406, "learning_rate": 0.0005, "loss": 2.1276, "step": 23040 }, { "epoch": 0.08773398902278419, "grad_norm": 0.12671348452568054, "learning_rate": 0.0005, "loss": 2.1405, "step": 23050 }, { "epoch": 0.08777205149090687, "grad_norm": 0.12644319236278534, "learning_rate": 0.0005, "loss": 2.141, "step": 23060 }, { "epoch": 0.08781011395902956, "grad_norm": 0.1404653638601303, "learning_rate": 0.0005, "loss": 2.1459, "step": 23070 }, { "epoch": 0.08784817642715224, "grad_norm": 0.12288325279951096, "learning_rate": 0.0005, "loss": 2.1462, "step": 23080 }, { "epoch": 0.08788623889527493, "grad_norm": 0.12028903514146805, "learning_rate": 0.0005, "loss": 2.1426, "step": 23090 }, { "epoch": 0.08792430136339761, "grad_norm": 0.12061980366706848, "learning_rate": 0.0005, "loss": 2.129, "step": 23100 }, { "epoch": 0.08796236383152028, "grad_norm": 0.13747268915176392, "learning_rate": 0.0005, "loss": 2.1489, "step": 23110 }, { "epoch": 0.08800042629964297, "grad_norm": 0.1254771500825882, "learning_rate": 0.0005, "loss": 2.1381, "step": 23120 }, { "epoch": 0.08803848876776565, "grad_norm": 0.11595059186220169, "learning_rate": 0.0005, "loss": 2.1451, "step": 23130 }, { "epoch": 0.08807655123588834, "grad_norm": 0.11654561758041382, "learning_rate": 0.0005, "loss": 2.1539, "step": 23140 }, { "epoch": 0.08811461370401102, "grad_norm": 0.12951432168483734, "learning_rate": 0.0005, "loss": 2.1431, "step": 23150 }, { "epoch": 0.08815267617213371, "grad_norm": 0.11898689717054367, "learning_rate": 0.0005, "loss": 2.1384, "step": 23160 }, { "epoch": 0.0881907386402564, "grad_norm": 0.11868039518594742, "learning_rate": 0.0005, "loss": 2.1258, "step": 23170 }, { "epoch": 0.08822880110837907, "grad_norm": 0.1292949616909027, "learning_rate": 0.0005, "loss": 2.1498, "step": 23180 }, { "epoch": 0.08826686357650175, "grad_norm": 0.11264042556285858, "learning_rate": 0.0005, "loss": 2.1363, "step": 23190 }, { "epoch": 0.08830492604462443, "grad_norm": 0.1304251104593277, "learning_rate": 0.0005, "loss": 2.141, "step": 23200 }, { "epoch": 0.08834298851274712, "grad_norm": 0.11987806111574173, "learning_rate": 0.0005, "loss": 2.1465, "step": 23210 }, { "epoch": 0.0883810509808698, "grad_norm": 0.12419069558382034, "learning_rate": 0.0005, "loss": 2.1255, "step": 23220 }, { "epoch": 0.08841911344899249, "grad_norm": 0.12453112006187439, "learning_rate": 0.0005, "loss": 2.1334, "step": 23230 }, { "epoch": 0.08845717591711517, "grad_norm": 0.1267368644475937, "learning_rate": 0.0005, "loss": 2.1464, "step": 23240 }, { "epoch": 0.08849523838523785, "grad_norm": 0.11241846531629562, "learning_rate": 0.0005, "loss": 2.139, "step": 23250 }, { "epoch": 0.08853330085336053, "grad_norm": 0.12173543125391006, "learning_rate": 0.0005, "loss": 2.1582, "step": 23260 }, { "epoch": 0.08857136332148322, "grad_norm": 0.13116681575775146, "learning_rate": 0.0005, "loss": 2.1512, "step": 23270 }, { "epoch": 0.0886094257896059, "grad_norm": 0.11620502173900604, "learning_rate": 0.0005, "loss": 2.1467, "step": 23280 }, { "epoch": 0.08864748825772858, "grad_norm": 0.11876282095909119, "learning_rate": 0.0005, "loss": 2.1406, "step": 23290 }, { "epoch": 0.08868555072585127, "grad_norm": 0.11996292322874069, "learning_rate": 0.0005, "loss": 2.1313, "step": 23300 }, { "epoch": 0.08872361319397395, "grad_norm": 0.1141175851225853, "learning_rate": 0.0005, "loss": 2.1473, "step": 23310 }, { "epoch": 0.08876167566209664, "grad_norm": 0.12724260985851288, "learning_rate": 0.0005, "loss": 2.1357, "step": 23320 }, { "epoch": 0.08879973813021931, "grad_norm": 0.11278630793094635, "learning_rate": 0.0005, "loss": 2.1397, "step": 23330 }, { "epoch": 0.088837800598342, "grad_norm": 0.12381735444068909, "learning_rate": 0.0005, "loss": 2.1423, "step": 23340 }, { "epoch": 0.08887586306646468, "grad_norm": 0.12097107619047165, "learning_rate": 0.0005, "loss": 2.1266, "step": 23350 }, { "epoch": 0.08891392553458737, "grad_norm": 0.12126494944095612, "learning_rate": 0.0005, "loss": 2.1425, "step": 23360 }, { "epoch": 0.08895198800271005, "grad_norm": 0.12865309417247772, "learning_rate": 0.0005, "loss": 2.145, "step": 23370 }, { "epoch": 0.08899005047083274, "grad_norm": 0.13124258816242218, "learning_rate": 0.0005, "loss": 2.1265, "step": 23380 }, { "epoch": 0.08902811293895542, "grad_norm": 0.12501779198646545, "learning_rate": 0.0005, "loss": 2.1471, "step": 23390 }, { "epoch": 0.08906617540707809, "grad_norm": 0.1225801333785057, "learning_rate": 0.0005, "loss": 2.1483, "step": 23400 }, { "epoch": 0.08910423787520078, "grad_norm": 0.1309002786874771, "learning_rate": 0.0005, "loss": 2.1405, "step": 23410 }, { "epoch": 0.08914230034332346, "grad_norm": 0.11393202096223831, "learning_rate": 0.0005, "loss": 2.1563, "step": 23420 }, { "epoch": 0.08918036281144615, "grad_norm": 0.12912751734256744, "learning_rate": 0.0005, "loss": 2.1305, "step": 23430 }, { "epoch": 0.08921842527956883, "grad_norm": 0.1320345550775528, "learning_rate": 0.0005, "loss": 2.1529, "step": 23440 }, { "epoch": 0.08925648774769152, "grad_norm": 0.1260627806186676, "learning_rate": 0.0005, "loss": 2.1307, "step": 23450 }, { "epoch": 0.0892945502158142, "grad_norm": 0.12123764306306839, "learning_rate": 0.0005, "loss": 2.1454, "step": 23460 }, { "epoch": 0.08933261268393687, "grad_norm": 0.11679759621620178, "learning_rate": 0.0005, "loss": 2.1324, "step": 23470 }, { "epoch": 0.08937067515205956, "grad_norm": 0.11947986483573914, "learning_rate": 0.0005, "loss": 2.1174, "step": 23480 }, { "epoch": 0.08940873762018224, "grad_norm": 0.11585034430027008, "learning_rate": 0.0005, "loss": 2.1441, "step": 23490 }, { "epoch": 0.08944680008830493, "grad_norm": 0.1223374456167221, "learning_rate": 0.0005, "loss": 2.1327, "step": 23500 }, { "epoch": 0.08948486255642761, "grad_norm": 0.13731849193572998, "learning_rate": 0.0005, "loss": 2.1439, "step": 23510 }, { "epoch": 0.0895229250245503, "grad_norm": 0.12628504633903503, "learning_rate": 0.0005, "loss": 2.1337, "step": 23520 }, { "epoch": 0.08956098749267298, "grad_norm": 0.13378030061721802, "learning_rate": 0.0005, "loss": 2.1419, "step": 23530 }, { "epoch": 0.08959904996079565, "grad_norm": 0.11454793810844421, "learning_rate": 0.0005, "loss": 2.1372, "step": 23540 }, { "epoch": 0.08963711242891834, "grad_norm": 0.11796699464321136, "learning_rate": 0.0005, "loss": 2.1156, "step": 23550 }, { "epoch": 0.08967517489704102, "grad_norm": 0.1294197291135788, "learning_rate": 0.0005, "loss": 2.1383, "step": 23560 }, { "epoch": 0.0897132373651637, "grad_norm": 0.10944836586713791, "learning_rate": 0.0005, "loss": 2.129, "step": 23570 }, { "epoch": 0.08975129983328639, "grad_norm": 0.14641229808330536, "learning_rate": 0.0005, "loss": 2.1391, "step": 23580 }, { "epoch": 0.08978936230140908, "grad_norm": 0.12429311126470566, "learning_rate": 0.0005, "loss": 2.1571, "step": 23590 }, { "epoch": 0.08982742476953176, "grad_norm": 0.1437855064868927, "learning_rate": 0.0005, "loss": 2.1312, "step": 23600 }, { "epoch": 0.08986548723765443, "grad_norm": 0.1226692646741867, "learning_rate": 0.0005, "loss": 2.1462, "step": 23610 }, { "epoch": 0.08990354970577712, "grad_norm": 0.13100013136863708, "learning_rate": 0.0005, "loss": 2.1262, "step": 23620 }, { "epoch": 0.0899416121738998, "grad_norm": 0.11815108358860016, "learning_rate": 0.0005, "loss": 2.1523, "step": 23630 }, { "epoch": 0.08997967464202249, "grad_norm": 0.12475232779979706, "learning_rate": 0.0005, "loss": 2.1352, "step": 23640 }, { "epoch": 0.09001773711014517, "grad_norm": 0.11662489920854568, "learning_rate": 0.0005, "loss": 2.1401, "step": 23650 }, { "epoch": 0.09005579957826786, "grad_norm": 0.12282786518335342, "learning_rate": 0.0005, "loss": 2.144, "step": 23660 }, { "epoch": 0.09009386204639054, "grad_norm": 0.11744583398103714, "learning_rate": 0.0005, "loss": 2.1365, "step": 23670 }, { "epoch": 0.09013192451451323, "grad_norm": 0.14883597195148468, "learning_rate": 0.0005, "loss": 2.1352, "step": 23680 }, { "epoch": 0.0901699869826359, "grad_norm": 0.12501350045204163, "learning_rate": 0.0005, "loss": 2.1323, "step": 23690 }, { "epoch": 0.09020804945075858, "grad_norm": 0.1260724812746048, "learning_rate": 0.0005, "loss": 2.1295, "step": 23700 }, { "epoch": 0.09024611191888127, "grad_norm": 0.11755555868148804, "learning_rate": 0.0005, "loss": 2.1396, "step": 23710 }, { "epoch": 0.09028417438700395, "grad_norm": 0.11762434244155884, "learning_rate": 0.0005, "loss": 2.1554, "step": 23720 }, { "epoch": 0.09032223685512664, "grad_norm": 0.12634523212909698, "learning_rate": 0.0005, "loss": 2.1459, "step": 23730 }, { "epoch": 0.09036029932324932, "grad_norm": 0.11729571223258972, "learning_rate": 0.0005, "loss": 2.1245, "step": 23740 }, { "epoch": 0.090398361791372, "grad_norm": 0.11078878492116928, "learning_rate": 0.0005, "loss": 2.1496, "step": 23750 }, { "epoch": 0.09043642425949468, "grad_norm": 0.13865669071674347, "learning_rate": 0.0005, "loss": 2.1384, "step": 23760 }, { "epoch": 0.09047448672761736, "grad_norm": 0.11994817107915878, "learning_rate": 0.0005, "loss": 2.1455, "step": 23770 }, { "epoch": 0.09051254919574005, "grad_norm": 0.11820890754461288, "learning_rate": 0.0005, "loss": 2.1449, "step": 23780 }, { "epoch": 0.09055061166386273, "grad_norm": 0.11758625507354736, "learning_rate": 0.0005, "loss": 2.1344, "step": 23790 }, { "epoch": 0.09058867413198542, "grad_norm": 0.12771601974964142, "learning_rate": 0.0005, "loss": 2.1498, "step": 23800 }, { "epoch": 0.0906267366001081, "grad_norm": 0.13300511240959167, "learning_rate": 0.0005, "loss": 2.1451, "step": 23810 }, { "epoch": 0.09066479906823079, "grad_norm": 0.13406731188297272, "learning_rate": 0.0005, "loss": 2.1282, "step": 23820 }, { "epoch": 0.09070286153635346, "grad_norm": 0.12391498684883118, "learning_rate": 0.0005, "loss": 2.1432, "step": 23830 }, { "epoch": 0.09074092400447614, "grad_norm": 0.11452176421880722, "learning_rate": 0.0005, "loss": 2.1341, "step": 23840 }, { "epoch": 0.09077898647259883, "grad_norm": 0.12261798977851868, "learning_rate": 0.0005, "loss": 2.1291, "step": 23850 }, { "epoch": 0.09081704894072151, "grad_norm": 0.1292411983013153, "learning_rate": 0.0005, "loss": 2.1378, "step": 23860 }, { "epoch": 0.0908551114088442, "grad_norm": 0.11482907086610794, "learning_rate": 0.0005, "loss": 2.134, "step": 23870 }, { "epoch": 0.09089317387696688, "grad_norm": 0.13183918595314026, "learning_rate": 0.0005, "loss": 2.1285, "step": 23880 }, { "epoch": 0.09093123634508957, "grad_norm": 0.1228523850440979, "learning_rate": 0.0005, "loss": 2.1392, "step": 23890 }, { "epoch": 0.09096929881321224, "grad_norm": 0.12467924505472183, "learning_rate": 0.0005, "loss": 2.1459, "step": 23900 }, { "epoch": 0.09100736128133492, "grad_norm": 0.10971734672784805, "learning_rate": 0.0005, "loss": 2.1475, "step": 23910 }, { "epoch": 0.09104542374945761, "grad_norm": 0.1282947063446045, "learning_rate": 0.0005, "loss": 2.1365, "step": 23920 }, { "epoch": 0.09108348621758029, "grad_norm": 0.14029517769813538, "learning_rate": 0.0005, "loss": 2.1311, "step": 23930 }, { "epoch": 0.09112154868570298, "grad_norm": 0.1280580312013626, "learning_rate": 0.0005, "loss": 2.1379, "step": 23940 }, { "epoch": 0.09115961115382566, "grad_norm": 0.13255015015602112, "learning_rate": 0.0005, "loss": 2.1531, "step": 23950 }, { "epoch": 0.09119767362194835, "grad_norm": 0.12445107102394104, "learning_rate": 0.0005, "loss": 2.1328, "step": 23960 }, { "epoch": 0.09123573609007102, "grad_norm": 0.127155140042305, "learning_rate": 0.0005, "loss": 2.1522, "step": 23970 }, { "epoch": 0.0912737985581937, "grad_norm": 0.12632615864276886, "learning_rate": 0.0005, "loss": 2.1236, "step": 23980 }, { "epoch": 0.09131186102631639, "grad_norm": 0.11836609989404678, "learning_rate": 0.0005, "loss": 2.1327, "step": 23990 }, { "epoch": 0.09134992349443907, "grad_norm": 0.13590745627880096, "learning_rate": 0.0005, "loss": 2.1215, "step": 24000 }, { "epoch": 0.09138798596256176, "grad_norm": 0.1197049468755722, "learning_rate": 0.0005, "loss": 2.1449, "step": 24010 }, { "epoch": 0.09142604843068444, "grad_norm": 0.13206225633621216, "learning_rate": 0.0005, "loss": 2.1245, "step": 24020 }, { "epoch": 0.09146411089880713, "grad_norm": 0.13378150761127472, "learning_rate": 0.0005, "loss": 2.1384, "step": 24030 }, { "epoch": 0.09150217336692981, "grad_norm": 0.11511759459972382, "learning_rate": 0.0005, "loss": 2.1321, "step": 24040 }, { "epoch": 0.09154023583505248, "grad_norm": 0.1247391626238823, "learning_rate": 0.0005, "loss": 2.1407, "step": 24050 }, { "epoch": 0.09157829830317517, "grad_norm": 0.12974436581134796, "learning_rate": 0.0005, "loss": 2.1538, "step": 24060 }, { "epoch": 0.09161636077129785, "grad_norm": 0.11919248849153519, "learning_rate": 0.0005, "loss": 2.1279, "step": 24070 }, { "epoch": 0.09165442323942054, "grad_norm": 0.1240987479686737, "learning_rate": 0.0005, "loss": 2.1488, "step": 24080 }, { "epoch": 0.09169248570754322, "grad_norm": 0.12819018959999084, "learning_rate": 0.0005, "loss": 2.1432, "step": 24090 }, { "epoch": 0.09173054817566591, "grad_norm": 0.11645195633172989, "learning_rate": 0.0005, "loss": 2.1337, "step": 24100 }, { "epoch": 0.09176861064378859, "grad_norm": 0.1362362951040268, "learning_rate": 0.0005, "loss": 2.1343, "step": 24110 }, { "epoch": 0.09180667311191126, "grad_norm": 0.12752680480480194, "learning_rate": 0.0005, "loss": 2.1327, "step": 24120 }, { "epoch": 0.09184473558003395, "grad_norm": 0.11361732333898544, "learning_rate": 0.0005, "loss": 2.1315, "step": 24130 }, { "epoch": 0.09188279804815663, "grad_norm": 0.11131453514099121, "learning_rate": 0.0005, "loss": 2.1377, "step": 24140 }, { "epoch": 0.09192086051627932, "grad_norm": 0.11890824884176254, "learning_rate": 0.0005, "loss": 2.1399, "step": 24150 }, { "epoch": 0.091958922984402, "grad_norm": 0.11522199213504791, "learning_rate": 0.0005, "loss": 2.1267, "step": 24160 }, { "epoch": 0.09199698545252469, "grad_norm": 0.12634597718715668, "learning_rate": 0.0005, "loss": 2.1542, "step": 24170 }, { "epoch": 0.09203504792064737, "grad_norm": 0.11525263637304306, "learning_rate": 0.0005, "loss": 2.13, "step": 24180 }, { "epoch": 0.09207311038877004, "grad_norm": 0.11317164450883865, "learning_rate": 0.0005, "loss": 2.1453, "step": 24190 }, { "epoch": 0.09211117285689273, "grad_norm": 0.10907386243343353, "learning_rate": 0.0005, "loss": 2.1523, "step": 24200 }, { "epoch": 0.09214923532501541, "grad_norm": 0.11778762191534042, "learning_rate": 0.0005, "loss": 2.1326, "step": 24210 }, { "epoch": 0.0921872977931381, "grad_norm": 0.11565057933330536, "learning_rate": 0.0005, "loss": 2.1252, "step": 24220 }, { "epoch": 0.09222536026126078, "grad_norm": 0.13700050115585327, "learning_rate": 0.0005, "loss": 2.1314, "step": 24230 }, { "epoch": 0.09226342272938347, "grad_norm": 0.13002556562423706, "learning_rate": 0.0005, "loss": 2.161, "step": 24240 }, { "epoch": 0.09230148519750615, "grad_norm": 0.11756189167499542, "learning_rate": 0.0005, "loss": 2.1418, "step": 24250 }, { "epoch": 0.09233954766562882, "grad_norm": 0.12187279015779495, "learning_rate": 0.0005, "loss": 2.1477, "step": 24260 }, { "epoch": 0.09237761013375151, "grad_norm": 0.11067736148834229, "learning_rate": 0.0005, "loss": 2.1444, "step": 24270 }, { "epoch": 0.0924156726018742, "grad_norm": 0.1086835041642189, "learning_rate": 0.0005, "loss": 2.1388, "step": 24280 }, { "epoch": 0.09245373506999688, "grad_norm": 0.11878637224435806, "learning_rate": 0.0005, "loss": 2.1361, "step": 24290 }, { "epoch": 0.09249179753811956, "grad_norm": 0.13691183924674988, "learning_rate": 0.0005, "loss": 2.1414, "step": 24300 }, { "epoch": 0.09252986000624225, "grad_norm": 0.13181743025779724, "learning_rate": 0.0005, "loss": 2.1525, "step": 24310 }, { "epoch": 0.09256792247436493, "grad_norm": 0.13542625308036804, "learning_rate": 0.0005, "loss": 2.1298, "step": 24320 }, { "epoch": 0.0926059849424876, "grad_norm": 0.12063945829868317, "learning_rate": 0.0005, "loss": 2.1429, "step": 24330 }, { "epoch": 0.09264404741061029, "grad_norm": 0.12136218696832657, "learning_rate": 0.0005, "loss": 2.1335, "step": 24340 }, { "epoch": 0.09268210987873297, "grad_norm": 0.10949312150478363, "learning_rate": 0.0005, "loss": 2.1149, "step": 24350 }, { "epoch": 0.09272017234685566, "grad_norm": 0.1149957925081253, "learning_rate": 0.0005, "loss": 2.1177, "step": 24360 }, { "epoch": 0.09275823481497834, "grad_norm": 0.1147845983505249, "learning_rate": 0.0005, "loss": 2.1533, "step": 24370 }, { "epoch": 0.09279629728310103, "grad_norm": 0.1364920735359192, "learning_rate": 0.0005, "loss": 2.1399, "step": 24380 }, { "epoch": 0.09283435975122371, "grad_norm": 0.13210929930210114, "learning_rate": 0.0005, "loss": 2.1324, "step": 24390 }, { "epoch": 0.09287242221934638, "grad_norm": 0.13170039653778076, "learning_rate": 0.0005, "loss": 2.1267, "step": 24400 }, { "epoch": 0.09291048468746907, "grad_norm": 0.12380106002092361, "learning_rate": 0.0005, "loss": 2.1353, "step": 24410 }, { "epoch": 0.09294854715559175, "grad_norm": 0.11916085332632065, "learning_rate": 0.0005, "loss": 2.1366, "step": 24420 }, { "epoch": 0.09298660962371444, "grad_norm": 0.11754573881626129, "learning_rate": 0.0005, "loss": 2.1358, "step": 24430 }, { "epoch": 0.09302467209183712, "grad_norm": 0.11410285532474518, "learning_rate": 0.0005, "loss": 2.1508, "step": 24440 }, { "epoch": 0.09306273455995981, "grad_norm": 0.12948638200759888, "learning_rate": 0.0005, "loss": 2.1292, "step": 24450 }, { "epoch": 0.0931007970280825, "grad_norm": 0.14242137968540192, "learning_rate": 0.0005, "loss": 2.1313, "step": 24460 }, { "epoch": 0.09313885949620518, "grad_norm": 0.11610152572393417, "learning_rate": 0.0005, "loss": 2.1506, "step": 24470 }, { "epoch": 0.09317692196432785, "grad_norm": 0.11516385525465012, "learning_rate": 0.0005, "loss": 2.1417, "step": 24480 }, { "epoch": 0.09321498443245053, "grad_norm": 0.11933036893606186, "learning_rate": 0.0005, "loss": 2.1296, "step": 24490 }, { "epoch": 0.09325304690057322, "grad_norm": 0.11288411915302277, "learning_rate": 0.0005, "loss": 2.1246, "step": 24500 }, { "epoch": 0.0932911093686959, "grad_norm": 0.11823565512895584, "learning_rate": 0.0005, "loss": 2.1532, "step": 24510 }, { "epoch": 0.09332917183681859, "grad_norm": 0.14338666200637817, "learning_rate": 0.0005, "loss": 2.1394, "step": 24520 }, { "epoch": 0.09336723430494127, "grad_norm": 0.12625204026699066, "learning_rate": 0.0005, "loss": 2.1521, "step": 24530 }, { "epoch": 0.09340529677306396, "grad_norm": 0.13908584415912628, "learning_rate": 0.0005, "loss": 2.1493, "step": 24540 }, { "epoch": 0.09344335924118663, "grad_norm": 0.11579623818397522, "learning_rate": 0.0005, "loss": 2.151, "step": 24550 }, { "epoch": 0.09348142170930931, "grad_norm": 0.1155904158949852, "learning_rate": 0.0005, "loss": 2.1367, "step": 24560 }, { "epoch": 0.093519484177432, "grad_norm": 0.11854032427072525, "learning_rate": 0.0005, "loss": 2.1416, "step": 24570 }, { "epoch": 0.09355754664555468, "grad_norm": 0.1252460926771164, "learning_rate": 0.0005, "loss": 2.1317, "step": 24580 }, { "epoch": 0.09359560911367737, "grad_norm": 0.1279488205909729, "learning_rate": 0.0005, "loss": 2.1306, "step": 24590 }, { "epoch": 0.09363367158180005, "grad_norm": 0.12807022035121918, "learning_rate": 0.0005, "loss": 2.1129, "step": 24600 }, { "epoch": 0.09367173404992274, "grad_norm": 0.1232905313372612, "learning_rate": 0.0005, "loss": 2.1284, "step": 24610 }, { "epoch": 0.09370979651804541, "grad_norm": 0.13406997919082642, "learning_rate": 0.0005, "loss": 2.1348, "step": 24620 }, { "epoch": 0.0937478589861681, "grad_norm": 0.11872179806232452, "learning_rate": 0.0005, "loss": 2.147, "step": 24630 }, { "epoch": 0.09378592145429078, "grad_norm": 0.12115013599395752, "learning_rate": 0.0005, "loss": 2.148, "step": 24640 }, { "epoch": 0.09382398392241346, "grad_norm": 0.12303373962640762, "learning_rate": 0.0005, "loss": 2.1491, "step": 24650 }, { "epoch": 0.09386204639053615, "grad_norm": 0.11456926167011261, "learning_rate": 0.0005, "loss": 2.1415, "step": 24660 }, { "epoch": 0.09390010885865883, "grad_norm": 0.13177600502967834, "learning_rate": 0.0005, "loss": 2.1573, "step": 24670 }, { "epoch": 0.09393817132678152, "grad_norm": 0.1192450150847435, "learning_rate": 0.0005, "loss": 2.1454, "step": 24680 }, { "epoch": 0.09397623379490419, "grad_norm": 0.11517942696809769, "learning_rate": 0.0005, "loss": 2.1443, "step": 24690 }, { "epoch": 0.09401429626302688, "grad_norm": 0.1183677464723587, "learning_rate": 0.0005, "loss": 2.1166, "step": 24700 }, { "epoch": 0.09405235873114956, "grad_norm": 0.12436609715223312, "learning_rate": 0.0005, "loss": 2.1326, "step": 24710 }, { "epoch": 0.09409042119927225, "grad_norm": 0.12491034716367722, "learning_rate": 0.0005, "loss": 2.1366, "step": 24720 }, { "epoch": 0.09412848366739493, "grad_norm": 0.1131734699010849, "learning_rate": 0.0005, "loss": 2.1497, "step": 24730 }, { "epoch": 0.09416654613551761, "grad_norm": 0.11752540618181229, "learning_rate": 0.0005, "loss": 2.1395, "step": 24740 }, { "epoch": 0.0942046086036403, "grad_norm": 0.12445096671581268, "learning_rate": 0.0005, "loss": 2.1331, "step": 24750 }, { "epoch": 0.09424267107176297, "grad_norm": 0.13811765611171722, "learning_rate": 0.0005, "loss": 2.1406, "step": 24760 }, { "epoch": 0.09428073353988566, "grad_norm": 0.11550748348236084, "learning_rate": 0.0005, "loss": 2.1407, "step": 24770 }, { "epoch": 0.09431879600800834, "grad_norm": 0.12589634954929352, "learning_rate": 0.0005, "loss": 2.1504, "step": 24780 }, { "epoch": 0.09435685847613103, "grad_norm": 0.12445308268070221, "learning_rate": 0.0005, "loss": 2.1301, "step": 24790 }, { "epoch": 0.09439492094425371, "grad_norm": 0.12869003415107727, "learning_rate": 0.0005, "loss": 2.1474, "step": 24800 }, { "epoch": 0.0944329834123764, "grad_norm": 0.12242034077644348, "learning_rate": 0.0005, "loss": 2.1337, "step": 24810 }, { "epoch": 0.09447104588049908, "grad_norm": 0.12127193808555603, "learning_rate": 0.0005, "loss": 2.1364, "step": 24820 }, { "epoch": 0.09450910834862176, "grad_norm": 0.12238039821386337, "learning_rate": 0.0005, "loss": 2.1387, "step": 24830 }, { "epoch": 0.09454717081674444, "grad_norm": 0.12273263186216354, "learning_rate": 0.0005, "loss": 2.1466, "step": 24840 }, { "epoch": 0.09458523328486712, "grad_norm": 0.12089051306247711, "learning_rate": 0.0005, "loss": 2.1448, "step": 24850 }, { "epoch": 0.0946232957529898, "grad_norm": 0.12131716310977936, "learning_rate": 0.0005, "loss": 2.1404, "step": 24860 }, { "epoch": 0.09466135822111249, "grad_norm": 0.13538216054439545, "learning_rate": 0.0005, "loss": 2.1277, "step": 24870 }, { "epoch": 0.09469942068923518, "grad_norm": 0.1291877180337906, "learning_rate": 0.0005, "loss": 2.1392, "step": 24880 }, { "epoch": 0.09473748315735786, "grad_norm": 0.11714226007461548, "learning_rate": 0.0005, "loss": 2.1397, "step": 24890 }, { "epoch": 0.09477554562548055, "grad_norm": 0.12920613586902618, "learning_rate": 0.0005, "loss": 2.1522, "step": 24900 }, { "epoch": 0.09481360809360322, "grad_norm": 0.1269528567790985, "learning_rate": 0.0005, "loss": 2.1484, "step": 24910 }, { "epoch": 0.0948516705617259, "grad_norm": 0.11459819227457047, "learning_rate": 0.0005, "loss": 2.1427, "step": 24920 }, { "epoch": 0.09488973302984859, "grad_norm": 0.1146237924695015, "learning_rate": 0.0005, "loss": 2.1509, "step": 24930 }, { "epoch": 0.09492779549797127, "grad_norm": 0.12081960588693619, "learning_rate": 0.0005, "loss": 2.1445, "step": 24940 }, { "epoch": 0.09496585796609396, "grad_norm": 0.13120268285274506, "learning_rate": 0.0005, "loss": 2.14, "step": 24950 }, { "epoch": 0.09500392043421664, "grad_norm": 0.12134882062673569, "learning_rate": 0.0005, "loss": 2.1403, "step": 24960 }, { "epoch": 0.09504198290233933, "grad_norm": 0.11924862116575241, "learning_rate": 0.0005, "loss": 2.1473, "step": 24970 }, { "epoch": 0.095080045370462, "grad_norm": 0.13948355615139008, "learning_rate": 0.0005, "loss": 2.1439, "step": 24980 }, { "epoch": 0.09511810783858468, "grad_norm": 0.11267533153295517, "learning_rate": 0.0005, "loss": 2.1418, "step": 24990 }, { "epoch": 0.09515617030670737, "grad_norm": 0.1245555654168129, "learning_rate": 0.0005, "loss": 2.13, "step": 25000 }, { "epoch": 0.09519423277483005, "grad_norm": 0.12775032222270966, "learning_rate": 0.0005, "loss": 2.1442, "step": 25010 }, { "epoch": 0.09523229524295274, "grad_norm": 0.12445028126239777, "learning_rate": 0.0005, "loss": 2.1371, "step": 25020 }, { "epoch": 0.09527035771107542, "grad_norm": 0.11729653179645538, "learning_rate": 0.0005, "loss": 2.1282, "step": 25030 }, { "epoch": 0.0953084201791981, "grad_norm": 0.11761856824159622, "learning_rate": 0.0005, "loss": 2.1268, "step": 25040 }, { "epoch": 0.09534648264732078, "grad_norm": 0.11484331637620926, "learning_rate": 0.0005, "loss": 2.1218, "step": 25050 }, { "epoch": 0.09538454511544346, "grad_norm": 0.12111509591341019, "learning_rate": 0.0005, "loss": 2.1478, "step": 25060 }, { "epoch": 0.09542260758356615, "grad_norm": 0.11838772892951965, "learning_rate": 0.0005, "loss": 2.1475, "step": 25070 }, { "epoch": 0.09546067005168883, "grad_norm": 0.12760284543037415, "learning_rate": 0.0005, "loss": 2.1406, "step": 25080 }, { "epoch": 0.09549873251981152, "grad_norm": 0.13428868353366852, "learning_rate": 0.0005, "loss": 2.1187, "step": 25090 }, { "epoch": 0.0955367949879342, "grad_norm": 0.13183973729610443, "learning_rate": 0.0005, "loss": 2.1406, "step": 25100 }, { "epoch": 0.09557485745605689, "grad_norm": 0.12350989133119583, "learning_rate": 0.0005, "loss": 2.125, "step": 25110 }, { "epoch": 0.09561291992417956, "grad_norm": 0.13064606487751007, "learning_rate": 0.0005, "loss": 2.1352, "step": 25120 }, { "epoch": 0.09565098239230224, "grad_norm": 0.11381707340478897, "learning_rate": 0.0005, "loss": 2.1169, "step": 25130 }, { "epoch": 0.09568904486042493, "grad_norm": 0.1271132081747055, "learning_rate": 0.0005, "loss": 2.1391, "step": 25140 }, { "epoch": 0.09572710732854761, "grad_norm": 0.12241170555353165, "learning_rate": 0.0005, "loss": 2.1395, "step": 25150 }, { "epoch": 0.0957651697966703, "grad_norm": 0.27211326360702515, "learning_rate": 0.0005, "loss": 2.1386, "step": 25160 }, { "epoch": 0.09580323226479298, "grad_norm": 0.13097335398197174, "learning_rate": 0.0005, "loss": 2.1415, "step": 25170 }, { "epoch": 0.09584129473291567, "grad_norm": 0.12096305191516876, "learning_rate": 0.0005, "loss": 2.1382, "step": 25180 }, { "epoch": 0.09587935720103835, "grad_norm": 0.12811264395713806, "learning_rate": 0.0005, "loss": 2.1196, "step": 25190 }, { "epoch": 0.09591741966916102, "grad_norm": 0.13960425555706024, "learning_rate": 0.0005, "loss": 2.1337, "step": 25200 }, { "epoch": 0.0959554821372837, "grad_norm": 0.1224188506603241, "learning_rate": 0.0005, "loss": 2.1505, "step": 25210 }, { "epoch": 0.09599354460540639, "grad_norm": 0.12553152441978455, "learning_rate": 0.0005, "loss": 2.1488, "step": 25220 }, { "epoch": 0.09603160707352908, "grad_norm": 0.12428688257932663, "learning_rate": 0.0005, "loss": 2.1418, "step": 25230 }, { "epoch": 0.09606966954165176, "grad_norm": 0.12976130843162537, "learning_rate": 0.0005, "loss": 2.1526, "step": 25240 }, { "epoch": 0.09610773200977445, "grad_norm": 0.13057775795459747, "learning_rate": 0.0005, "loss": 2.1499, "step": 25250 }, { "epoch": 0.09614579447789713, "grad_norm": 0.13561537861824036, "learning_rate": 0.0005, "loss": 2.1357, "step": 25260 }, { "epoch": 0.0961838569460198, "grad_norm": 0.12290742993354797, "learning_rate": 0.0005, "loss": 2.1602, "step": 25270 }, { "epoch": 0.09622191941414249, "grad_norm": 0.1177307739853859, "learning_rate": 0.0005, "loss": 2.1393, "step": 25280 }, { "epoch": 0.09625998188226517, "grad_norm": 0.13508401811122894, "learning_rate": 0.0005, "loss": 2.1346, "step": 25290 }, { "epoch": 0.09629804435038786, "grad_norm": 0.11391041427850723, "learning_rate": 0.0005, "loss": 2.1278, "step": 25300 }, { "epoch": 0.09633610681851054, "grad_norm": 0.11419051140546799, "learning_rate": 0.0005, "loss": 2.1322, "step": 25310 }, { "epoch": 0.09637416928663323, "grad_norm": 0.12973779439926147, "learning_rate": 0.0005, "loss": 2.1279, "step": 25320 }, { "epoch": 0.09641223175475591, "grad_norm": 0.13356907665729523, "learning_rate": 0.0005, "loss": 2.1434, "step": 25330 }, { "epoch": 0.09645029422287858, "grad_norm": 0.12621325254440308, "learning_rate": 0.0005, "loss": 2.1418, "step": 25340 }, { "epoch": 0.09648835669100127, "grad_norm": 0.12215954065322876, "learning_rate": 0.0005, "loss": 2.1403, "step": 25350 }, { "epoch": 0.09652641915912395, "grad_norm": 0.1351398378610611, "learning_rate": 0.0005, "loss": 2.1308, "step": 25360 }, { "epoch": 0.09656448162724664, "grad_norm": 0.1220025047659874, "learning_rate": 0.0005, "loss": 2.1361, "step": 25370 }, { "epoch": 0.09660254409536932, "grad_norm": 0.12556889653205872, "learning_rate": 0.0005, "loss": 2.1375, "step": 25380 }, { "epoch": 0.09664060656349201, "grad_norm": 0.13660888373851776, "learning_rate": 0.0005, "loss": 2.1248, "step": 25390 }, { "epoch": 0.09667866903161469, "grad_norm": 0.12122069299221039, "learning_rate": 0.0005, "loss": 2.1407, "step": 25400 }, { "epoch": 0.09671673149973736, "grad_norm": 0.12084165960550308, "learning_rate": 0.0005, "loss": 2.1331, "step": 25410 }, { "epoch": 0.09675479396786005, "grad_norm": 0.12457843124866486, "learning_rate": 0.0005, "loss": 2.1279, "step": 25420 }, { "epoch": 0.09679285643598273, "grad_norm": 0.13408710062503815, "learning_rate": 0.0005, "loss": 2.1304, "step": 25430 }, { "epoch": 0.09683091890410542, "grad_norm": 0.1271134912967682, "learning_rate": 0.0005, "loss": 2.1522, "step": 25440 }, { "epoch": 0.0968689813722281, "grad_norm": 0.11684239655733109, "learning_rate": 0.0005, "loss": 2.1297, "step": 25450 }, { "epoch": 0.09690704384035079, "grad_norm": 0.1285119354724884, "learning_rate": 0.0005, "loss": 2.1406, "step": 25460 }, { "epoch": 0.09694510630847347, "grad_norm": 0.11452360451221466, "learning_rate": 0.0005, "loss": 2.1404, "step": 25470 }, { "epoch": 0.09698316877659614, "grad_norm": 0.1238560900092125, "learning_rate": 0.0005, "loss": 2.1445, "step": 25480 }, { "epoch": 0.09702123124471883, "grad_norm": 0.11912228912115097, "learning_rate": 0.0005, "loss": 2.1357, "step": 25490 }, { "epoch": 0.09705929371284151, "grad_norm": 0.114809051156044, "learning_rate": 0.0005, "loss": 2.1451, "step": 25500 }, { "epoch": 0.0970973561809642, "grad_norm": 0.12524668872356415, "learning_rate": 0.0005, "loss": 2.1463, "step": 25510 }, { "epoch": 0.09713541864908688, "grad_norm": 0.11931375414133072, "learning_rate": 0.0005, "loss": 2.1312, "step": 25520 }, { "epoch": 0.09717348111720957, "grad_norm": 0.12906037271022797, "learning_rate": 0.0005, "loss": 2.1348, "step": 25530 }, { "epoch": 0.09721154358533225, "grad_norm": 0.11984331905841827, "learning_rate": 0.0005, "loss": 2.1347, "step": 25540 }, { "epoch": 0.09724960605345492, "grad_norm": 0.11141162365674973, "learning_rate": 0.0005, "loss": 2.1379, "step": 25550 }, { "epoch": 0.09728766852157761, "grad_norm": 0.1167030781507492, "learning_rate": 0.0005, "loss": 2.1302, "step": 25560 }, { "epoch": 0.0973257309897003, "grad_norm": 0.12758852541446686, "learning_rate": 0.0005, "loss": 2.1414, "step": 25570 }, { "epoch": 0.09736379345782298, "grad_norm": 0.12121187150478363, "learning_rate": 0.0005, "loss": 2.1359, "step": 25580 }, { "epoch": 0.09740185592594566, "grad_norm": 0.12168534845113754, "learning_rate": 0.0005, "loss": 2.1411, "step": 25590 }, { "epoch": 0.09743991839406835, "grad_norm": 0.12349078804254532, "learning_rate": 0.0005, "loss": 2.1362, "step": 25600 }, { "epoch": 0.09747798086219103, "grad_norm": 0.13133080303668976, "learning_rate": 0.0005, "loss": 2.1466, "step": 25610 }, { "epoch": 0.09751604333031372, "grad_norm": 0.12626859545707703, "learning_rate": 0.0005, "loss": 2.1345, "step": 25620 }, { "epoch": 0.09755410579843639, "grad_norm": 0.12265080213546753, "learning_rate": 0.0005, "loss": 2.1334, "step": 25630 }, { "epoch": 0.09759216826655907, "grad_norm": 0.12489853799343109, "learning_rate": 0.0005, "loss": 2.1387, "step": 25640 }, { "epoch": 0.09763023073468176, "grad_norm": 0.1116076335310936, "learning_rate": 0.0005, "loss": 2.1441, "step": 25650 }, { "epoch": 0.09766829320280444, "grad_norm": 0.12283624708652496, "learning_rate": 0.0005, "loss": 2.1355, "step": 25660 }, { "epoch": 0.09770635567092713, "grad_norm": 0.12440288066864014, "learning_rate": 0.0005, "loss": 2.1439, "step": 25670 }, { "epoch": 0.09774441813904981, "grad_norm": 0.1259060502052307, "learning_rate": 0.0005, "loss": 2.1402, "step": 25680 }, { "epoch": 0.0977824806071725, "grad_norm": 0.12068597972393036, "learning_rate": 0.0005, "loss": 2.1376, "step": 25690 }, { "epoch": 0.09782054307529517, "grad_norm": 0.10977241396903992, "learning_rate": 0.0005, "loss": 2.1247, "step": 25700 }, { "epoch": 0.09785860554341785, "grad_norm": 0.11703129857778549, "learning_rate": 0.0005, "loss": 2.1508, "step": 25710 }, { "epoch": 0.09789666801154054, "grad_norm": 0.11471304297447205, "learning_rate": 0.0005, "loss": 2.1465, "step": 25720 }, { "epoch": 0.09793473047966322, "grad_norm": 0.12228024005889893, "learning_rate": 0.0005, "loss": 2.1554, "step": 25730 }, { "epoch": 0.09797279294778591, "grad_norm": 0.12173251062631607, "learning_rate": 0.0005, "loss": 2.1234, "step": 25740 }, { "epoch": 0.0980108554159086, "grad_norm": 0.14087113738059998, "learning_rate": 0.0005, "loss": 2.136, "step": 25750 }, { "epoch": 0.09804891788403128, "grad_norm": 0.1175365000963211, "learning_rate": 0.0005, "loss": 2.1292, "step": 25760 }, { "epoch": 0.09808698035215395, "grad_norm": 0.12194423377513885, "learning_rate": 0.0005, "loss": 2.1306, "step": 25770 }, { "epoch": 0.09812504282027663, "grad_norm": 0.10810630023479462, "learning_rate": 0.0005, "loss": 2.1405, "step": 25780 }, { "epoch": 0.09816310528839932, "grad_norm": 0.11678377538919449, "learning_rate": 0.0005, "loss": 2.1412, "step": 25790 }, { "epoch": 0.098201167756522, "grad_norm": 0.11521114408969879, "learning_rate": 0.0005, "loss": 2.1342, "step": 25800 }, { "epoch": 0.09823923022464469, "grad_norm": 0.12144433706998825, "learning_rate": 0.0005, "loss": 2.1321, "step": 25810 }, { "epoch": 0.09827729269276737, "grad_norm": 0.13035742938518524, "learning_rate": 0.0005, "loss": 2.1283, "step": 25820 }, { "epoch": 0.09831535516089006, "grad_norm": 0.12414418160915375, "learning_rate": 0.0005, "loss": 2.1365, "step": 25830 }, { "epoch": 0.09835341762901273, "grad_norm": 0.12440216541290283, "learning_rate": 0.0005, "loss": 2.1443, "step": 25840 }, { "epoch": 0.09839148009713541, "grad_norm": 0.1393919289112091, "learning_rate": 0.0005, "loss": 2.1278, "step": 25850 }, { "epoch": 0.0984295425652581, "grad_norm": 0.13919106125831604, "learning_rate": 0.0005, "loss": 2.1354, "step": 25860 }, { "epoch": 0.09846760503338078, "grad_norm": 0.1080850213766098, "learning_rate": 0.0005, "loss": 2.1442, "step": 25870 }, { "epoch": 0.09850566750150347, "grad_norm": 0.12063482403755188, "learning_rate": 0.0005, "loss": 2.1377, "step": 25880 }, { "epoch": 0.09854372996962615, "grad_norm": 0.11347249150276184, "learning_rate": 0.0005, "loss": 2.1213, "step": 25890 }, { "epoch": 0.09858179243774884, "grad_norm": 0.11986780911684036, "learning_rate": 0.0005, "loss": 2.1384, "step": 25900 }, { "epoch": 0.09861985490587151, "grad_norm": 0.12909086048603058, "learning_rate": 0.0005, "loss": 2.1369, "step": 25910 }, { "epoch": 0.0986579173739942, "grad_norm": 0.12397724390029907, "learning_rate": 0.0005, "loss": 2.1589, "step": 25920 }, { "epoch": 0.09869597984211688, "grad_norm": 0.12564894556999207, "learning_rate": 0.0005, "loss": 2.153, "step": 25930 }, { "epoch": 0.09873404231023956, "grad_norm": 0.1232154592871666, "learning_rate": 0.0005, "loss": 2.1387, "step": 25940 }, { "epoch": 0.09877210477836225, "grad_norm": 0.13897015154361725, "learning_rate": 0.0005, "loss": 2.1317, "step": 25950 }, { "epoch": 0.09881016724648493, "grad_norm": 0.12695081532001495, "learning_rate": 0.0005, "loss": 2.1495, "step": 25960 }, { "epoch": 0.09884822971460762, "grad_norm": 0.13669979572296143, "learning_rate": 0.0005, "loss": 2.15, "step": 25970 }, { "epoch": 0.0988862921827303, "grad_norm": 0.1393309384584427, "learning_rate": 0.0005, "loss": 2.1318, "step": 25980 }, { "epoch": 0.09892435465085297, "grad_norm": 0.12158825248479843, "learning_rate": 0.0005, "loss": 2.1289, "step": 25990 }, { "epoch": 0.09896241711897566, "grad_norm": 0.12393995374441147, "learning_rate": 0.0005, "loss": 2.1328, "step": 26000 }, { "epoch": 0.09900047958709834, "grad_norm": 0.12239037454128265, "learning_rate": 0.0005, "loss": 2.1494, "step": 26010 }, { "epoch": 0.09903854205522103, "grad_norm": 0.12146341055631638, "learning_rate": 0.0005, "loss": 2.131, "step": 26020 }, { "epoch": 0.09907660452334371, "grad_norm": 0.11653164029121399, "learning_rate": 0.0005, "loss": 2.1467, "step": 26030 }, { "epoch": 0.0991146669914664, "grad_norm": 0.12449685484170914, "learning_rate": 0.0005, "loss": 2.1258, "step": 26040 }, { "epoch": 0.09915272945958908, "grad_norm": 0.11595933139324188, "learning_rate": 0.0005, "loss": 2.1371, "step": 26050 }, { "epoch": 0.09919079192771176, "grad_norm": 0.1267801821231842, "learning_rate": 0.0005, "loss": 2.134, "step": 26060 }, { "epoch": 0.09922885439583444, "grad_norm": 0.11027488112449646, "learning_rate": 0.0005, "loss": 2.1252, "step": 26070 }, { "epoch": 0.09926691686395712, "grad_norm": 0.15915116667747498, "learning_rate": 0.0005, "loss": 2.1235, "step": 26080 }, { "epoch": 0.09930497933207981, "grad_norm": 0.12621380388736725, "learning_rate": 0.0005, "loss": 2.1461, "step": 26090 }, { "epoch": 0.0993430418002025, "grad_norm": 0.14087072014808655, "learning_rate": 0.0005, "loss": 2.135, "step": 26100 }, { "epoch": 0.09938110426832518, "grad_norm": 0.12439528107643127, "learning_rate": 0.0005, "loss": 2.1452, "step": 26110 }, { "epoch": 0.09941916673644786, "grad_norm": 0.12714435160160065, "learning_rate": 0.0005, "loss": 2.1401, "step": 26120 }, { "epoch": 0.09945722920457054, "grad_norm": 0.12384948879480362, "learning_rate": 0.0005, "loss": 2.1422, "step": 26130 }, { "epoch": 0.09949529167269322, "grad_norm": 0.15680019557476044, "learning_rate": 0.0005, "loss": 2.1275, "step": 26140 }, { "epoch": 0.0995333541408159, "grad_norm": 0.14214445650577545, "learning_rate": 0.0005, "loss": 2.1278, "step": 26150 }, { "epoch": 0.09957141660893859, "grad_norm": 0.12956596910953522, "learning_rate": 0.0005, "loss": 2.1163, "step": 26160 }, { "epoch": 0.09960947907706127, "grad_norm": 0.11495132744312286, "learning_rate": 0.0005, "loss": 2.1498, "step": 26170 }, { "epoch": 0.09964754154518396, "grad_norm": 0.11616240441799164, "learning_rate": 0.0005, "loss": 2.1385, "step": 26180 }, { "epoch": 0.09968560401330664, "grad_norm": 0.11863888055086136, "learning_rate": 0.0005, "loss": 2.1295, "step": 26190 }, { "epoch": 0.09972366648142932, "grad_norm": 0.1459449827671051, "learning_rate": 0.0005, "loss": 2.1444, "step": 26200 }, { "epoch": 0.099761728949552, "grad_norm": 0.11841373145580292, "learning_rate": 0.0005, "loss": 2.137, "step": 26210 }, { "epoch": 0.09979979141767469, "grad_norm": 0.11825912445783615, "learning_rate": 0.0005, "loss": 2.1472, "step": 26220 }, { "epoch": 0.09983785388579737, "grad_norm": 0.1280236691236496, "learning_rate": 0.0005, "loss": 2.1567, "step": 26230 }, { "epoch": 0.09987591635392006, "grad_norm": 0.11771514266729355, "learning_rate": 0.0005, "loss": 2.1401, "step": 26240 }, { "epoch": 0.09991397882204274, "grad_norm": 0.1152941957116127, "learning_rate": 0.0005, "loss": 2.128, "step": 26250 }, { "epoch": 0.09995204129016542, "grad_norm": 0.11362841725349426, "learning_rate": 0.0005, "loss": 2.1392, "step": 26260 }, { "epoch": 0.0999901037582881, "grad_norm": 0.13889946043491364, "learning_rate": 0.0005, "loss": 2.1198, "step": 26270 }, { "epoch": 0.10002816622641078, "grad_norm": 0.11212174594402313, "learning_rate": 0.0005, "loss": 2.1512, "step": 26280 }, { "epoch": 0.10006622869453347, "grad_norm": 0.12263673543930054, "learning_rate": 0.0005, "loss": 2.1286, "step": 26290 }, { "epoch": 0.10010429116265615, "grad_norm": 0.12970253825187683, "learning_rate": 0.0005, "loss": 2.1368, "step": 26300 }, { "epoch": 0.10014235363077884, "grad_norm": 0.12810315191745758, "learning_rate": 0.0005, "loss": 2.1351, "step": 26310 }, { "epoch": 0.10018041609890152, "grad_norm": 0.1226223036646843, "learning_rate": 0.0005, "loss": 2.1327, "step": 26320 }, { "epoch": 0.1002184785670242, "grad_norm": 0.1228988990187645, "learning_rate": 0.0005, "loss": 2.1288, "step": 26330 }, { "epoch": 0.10025654103514689, "grad_norm": 0.13820667564868927, "learning_rate": 0.0005, "loss": 2.1506, "step": 26340 }, { "epoch": 0.10029460350326956, "grad_norm": 0.1304493099451065, "learning_rate": 0.0005, "loss": 2.1542, "step": 26350 }, { "epoch": 0.10033266597139225, "grad_norm": 0.12311537563800812, "learning_rate": 0.0005, "loss": 2.137, "step": 26360 }, { "epoch": 0.10037072843951493, "grad_norm": 0.11408624798059464, "learning_rate": 0.0005, "loss": 2.1292, "step": 26370 }, { "epoch": 0.10040879090763762, "grad_norm": 0.16143427789211273, "learning_rate": 0.0005, "loss": 2.133, "step": 26380 }, { "epoch": 0.1004468533757603, "grad_norm": 0.12645834684371948, "learning_rate": 0.0005, "loss": 2.134, "step": 26390 }, { "epoch": 0.10048491584388299, "grad_norm": 0.1337132304906845, "learning_rate": 0.0005, "loss": 2.133, "step": 26400 }, { "epoch": 0.10052297831200567, "grad_norm": 0.1292242407798767, "learning_rate": 0.0005, "loss": 2.1269, "step": 26410 }, { "epoch": 0.10056104078012834, "grad_norm": 0.11790705472230911, "learning_rate": 0.0005, "loss": 2.1456, "step": 26420 }, { "epoch": 0.10059910324825103, "grad_norm": 0.11704066395759583, "learning_rate": 0.0005, "loss": 2.1543, "step": 26430 }, { "epoch": 0.10063716571637371, "grad_norm": 0.11400522291660309, "learning_rate": 0.0005, "loss": 2.143, "step": 26440 }, { "epoch": 0.1006752281844964, "grad_norm": 0.12126346677541733, "learning_rate": 0.0005, "loss": 2.1352, "step": 26450 }, { "epoch": 0.10071329065261908, "grad_norm": 0.11732926219701767, "learning_rate": 0.0005, "loss": 2.1342, "step": 26460 }, { "epoch": 0.10075135312074177, "grad_norm": 0.12718385457992554, "learning_rate": 0.0005, "loss": 2.1274, "step": 26470 }, { "epoch": 0.10078941558886445, "grad_norm": 0.11345270276069641, "learning_rate": 0.0005, "loss": 2.1412, "step": 26480 }, { "epoch": 0.10082747805698712, "grad_norm": 0.13248255848884583, "learning_rate": 0.0005, "loss": 2.1301, "step": 26490 }, { "epoch": 0.1008655405251098, "grad_norm": 0.12024351954460144, "learning_rate": 0.0005, "loss": 2.1341, "step": 26500 }, { "epoch": 0.10090360299323249, "grad_norm": 0.13912230730056763, "learning_rate": 0.0005, "loss": 2.1328, "step": 26510 }, { "epoch": 0.10094166546135518, "grad_norm": 0.11878001689910889, "learning_rate": 0.0005, "loss": 2.1296, "step": 26520 }, { "epoch": 0.10097972792947786, "grad_norm": 0.12029380351305008, "learning_rate": 0.0005, "loss": 2.1308, "step": 26530 }, { "epoch": 0.10101779039760055, "grad_norm": 0.1200578361749649, "learning_rate": 0.0005, "loss": 2.1306, "step": 26540 }, { "epoch": 0.10105585286572323, "grad_norm": 0.12079061567783356, "learning_rate": 0.0005, "loss": 2.1352, "step": 26550 }, { "epoch": 0.1010939153338459, "grad_norm": 0.13137206435203552, "learning_rate": 0.0005, "loss": 2.1366, "step": 26560 }, { "epoch": 0.10113197780196859, "grad_norm": 0.13583819568157196, "learning_rate": 0.0005, "loss": 2.1381, "step": 26570 }, { "epoch": 0.10117004027009127, "grad_norm": 0.12240844964981079, "learning_rate": 0.0005, "loss": 2.1352, "step": 26580 }, { "epoch": 0.10120810273821396, "grad_norm": 0.14458432793617249, "learning_rate": 0.0005, "loss": 2.1412, "step": 26590 }, { "epoch": 0.10124616520633664, "grad_norm": 0.1237378790974617, "learning_rate": 0.0005, "loss": 2.1343, "step": 26600 }, { "epoch": 0.10128422767445933, "grad_norm": 0.12536272406578064, "learning_rate": 0.0005, "loss": 2.1392, "step": 26610 }, { "epoch": 0.10132229014258201, "grad_norm": 0.14227186143398285, "learning_rate": 0.0005, "loss": 2.1333, "step": 26620 }, { "epoch": 0.10136035261070468, "grad_norm": 0.11502430588006973, "learning_rate": 0.0005, "loss": 2.1553, "step": 26630 }, { "epoch": 0.10139841507882737, "grad_norm": 0.12336134910583496, "learning_rate": 0.0005, "loss": 2.1386, "step": 26640 }, { "epoch": 0.10143647754695005, "grad_norm": 0.4810260832309723, "learning_rate": 0.0005, "loss": 2.1352, "step": 26650 }, { "epoch": 0.10147454001507274, "grad_norm": 0.12972472608089447, "learning_rate": 0.0005, "loss": 2.1292, "step": 26660 }, { "epoch": 0.10151260248319542, "grad_norm": 0.13647037744522095, "learning_rate": 0.0005, "loss": 2.1454, "step": 26670 }, { "epoch": 0.1015506649513181, "grad_norm": 0.12054859101772308, "learning_rate": 0.0005, "loss": 2.1319, "step": 26680 }, { "epoch": 0.10158872741944079, "grad_norm": 0.1270749419927597, "learning_rate": 0.0005, "loss": 2.1417, "step": 26690 }, { "epoch": 0.10162678988756346, "grad_norm": 0.12211668491363525, "learning_rate": 0.0005, "loss": 2.1529, "step": 26700 }, { "epoch": 0.10166485235568615, "grad_norm": 0.11024312674999237, "learning_rate": 0.0005, "loss": 2.1305, "step": 26710 }, { "epoch": 0.10170291482380883, "grad_norm": 0.1252600997686386, "learning_rate": 0.0005, "loss": 2.135, "step": 26720 }, { "epoch": 0.10174097729193152, "grad_norm": 0.12198615819215775, "learning_rate": 0.0005, "loss": 2.1444, "step": 26730 }, { "epoch": 0.1017790397600542, "grad_norm": 0.11293346434831619, "learning_rate": 0.0005, "loss": 2.1395, "step": 26740 }, { "epoch": 0.10181710222817689, "grad_norm": 0.11615917831659317, "learning_rate": 0.0005, "loss": 2.1325, "step": 26750 }, { "epoch": 0.10185516469629957, "grad_norm": 0.11647096276283264, "learning_rate": 0.0005, "loss": 2.1318, "step": 26760 }, { "epoch": 0.10189322716442226, "grad_norm": 0.1299409121274948, "learning_rate": 0.0005, "loss": 2.1326, "step": 26770 }, { "epoch": 0.10193128963254493, "grad_norm": 0.13532549142837524, "learning_rate": 0.0005, "loss": 2.1445, "step": 26780 }, { "epoch": 0.10196935210066761, "grad_norm": 0.12113643437623978, "learning_rate": 0.0005, "loss": 2.141, "step": 26790 }, { "epoch": 0.1020074145687903, "grad_norm": 0.12588463723659515, "learning_rate": 0.0005, "loss": 2.1313, "step": 26800 }, { "epoch": 0.10204547703691298, "grad_norm": 0.12778820097446442, "learning_rate": 0.0005, "loss": 2.1415, "step": 26810 }, { "epoch": 0.10208353950503567, "grad_norm": 0.11738457530736923, "learning_rate": 0.0005, "loss": 2.1323, "step": 26820 }, { "epoch": 0.10212160197315835, "grad_norm": 0.1250801533460617, "learning_rate": 0.0005, "loss": 2.14, "step": 26830 }, { "epoch": 0.10215966444128104, "grad_norm": 0.11063364148139954, "learning_rate": 0.0005, "loss": 2.1354, "step": 26840 }, { "epoch": 0.10219772690940371, "grad_norm": 0.12232097238302231, "learning_rate": 0.0005, "loss": 2.143, "step": 26850 }, { "epoch": 0.10223578937752639, "grad_norm": 0.12268010526895523, "learning_rate": 0.0005, "loss": 2.127, "step": 26860 }, { "epoch": 0.10227385184564908, "grad_norm": 0.11671310663223267, "learning_rate": 0.0005, "loss": 2.152, "step": 26870 }, { "epoch": 0.10231191431377176, "grad_norm": 0.12211109697818756, "learning_rate": 0.0005, "loss": 2.1267, "step": 26880 }, { "epoch": 0.10234997678189445, "grad_norm": 0.12568749487400055, "learning_rate": 0.0005, "loss": 2.1419, "step": 26890 }, { "epoch": 0.10238803925001713, "grad_norm": 0.12220566719770432, "learning_rate": 0.0005, "loss": 2.1448, "step": 26900 }, { "epoch": 0.10242610171813982, "grad_norm": 0.13257314264774323, "learning_rate": 0.0005, "loss": 2.1457, "step": 26910 }, { "epoch": 0.10246416418626249, "grad_norm": 0.14253629744052887, "learning_rate": 0.0005, "loss": 2.1474, "step": 26920 }, { "epoch": 0.10250222665438517, "grad_norm": 0.12506955862045288, "learning_rate": 0.0005, "loss": 2.1202, "step": 26930 }, { "epoch": 0.10254028912250786, "grad_norm": 0.1251208335161209, "learning_rate": 0.0005, "loss": 2.1511, "step": 26940 }, { "epoch": 0.10257835159063054, "grad_norm": 0.1233387216925621, "learning_rate": 0.0005, "loss": 2.1294, "step": 26950 }, { "epoch": 0.10261641405875323, "grad_norm": 0.12301129847764969, "learning_rate": 0.0005, "loss": 2.1339, "step": 26960 }, { "epoch": 0.10265447652687591, "grad_norm": 0.1209215372800827, "learning_rate": 0.0005, "loss": 2.1405, "step": 26970 }, { "epoch": 0.1026925389949986, "grad_norm": 0.11270321160554886, "learning_rate": 0.0005, "loss": 2.1362, "step": 26980 }, { "epoch": 0.10273060146312127, "grad_norm": 0.1325046867132187, "learning_rate": 0.0005, "loss": 2.1244, "step": 26990 }, { "epoch": 0.10276866393124395, "grad_norm": 0.12131819874048233, "learning_rate": 0.0005, "loss": 2.139, "step": 27000 }, { "epoch": 0.10280672639936664, "grad_norm": 0.12465294450521469, "learning_rate": 0.0005, "loss": 2.1432, "step": 27010 }, { "epoch": 0.10284478886748932, "grad_norm": 0.13112106919288635, "learning_rate": 0.0005, "loss": 2.1528, "step": 27020 }, { "epoch": 0.10288285133561201, "grad_norm": 0.11870895326137543, "learning_rate": 0.0005, "loss": 2.1561, "step": 27030 }, { "epoch": 0.10292091380373469, "grad_norm": 0.12203743308782578, "learning_rate": 0.0005, "loss": 2.1472, "step": 27040 }, { "epoch": 0.10295897627185738, "grad_norm": 0.11765111237764359, "learning_rate": 0.0005, "loss": 2.1219, "step": 27050 }, { "epoch": 0.10299703873998005, "grad_norm": 0.12385214120149612, "learning_rate": 0.0005, "loss": 2.1301, "step": 27060 }, { "epoch": 0.10303510120810273, "grad_norm": 0.1228310838341713, "learning_rate": 0.0005, "loss": 2.1391, "step": 27070 }, { "epoch": 0.10307316367622542, "grad_norm": 0.11405244469642639, "learning_rate": 0.0005, "loss": 2.1429, "step": 27080 }, { "epoch": 0.1031112261443481, "grad_norm": 0.12276868522167206, "learning_rate": 0.0005, "loss": 2.1292, "step": 27090 }, { "epoch": 0.10314928861247079, "grad_norm": 0.11903540045022964, "learning_rate": 0.0005, "loss": 2.1463, "step": 27100 }, { "epoch": 0.10318735108059347, "grad_norm": 0.14137893915176392, "learning_rate": 0.0005, "loss": 2.138, "step": 27110 }, { "epoch": 0.10322541354871616, "grad_norm": 0.12298958003520966, "learning_rate": 0.0005, "loss": 2.1366, "step": 27120 }, { "epoch": 0.10326347601683884, "grad_norm": 0.11589296162128448, "learning_rate": 0.0005, "loss": 2.1414, "step": 27130 }, { "epoch": 0.10330153848496151, "grad_norm": 0.10948190093040466, "learning_rate": 0.0005, "loss": 2.1415, "step": 27140 }, { "epoch": 0.1033396009530842, "grad_norm": 0.12524929642677307, "learning_rate": 0.0005, "loss": 2.1345, "step": 27150 }, { "epoch": 0.10337766342120688, "grad_norm": 0.12025979161262512, "learning_rate": 0.0005, "loss": 2.1337, "step": 27160 }, { "epoch": 0.10341572588932957, "grad_norm": 0.1243286207318306, "learning_rate": 0.0005, "loss": 2.1297, "step": 27170 }, { "epoch": 0.10345378835745225, "grad_norm": 0.11419710516929626, "learning_rate": 0.0005, "loss": 2.1404, "step": 27180 }, { "epoch": 0.10349185082557494, "grad_norm": 0.13044367730617523, "learning_rate": 0.0005, "loss": 2.1266, "step": 27190 }, { "epoch": 0.10352991329369762, "grad_norm": 0.12243938446044922, "learning_rate": 0.0005, "loss": 2.1387, "step": 27200 }, { "epoch": 0.1035679757618203, "grad_norm": 0.11654973775148392, "learning_rate": 0.0005, "loss": 2.1215, "step": 27210 }, { "epoch": 0.10360603822994298, "grad_norm": 0.12980137765407562, "learning_rate": 0.0005, "loss": 2.1335, "step": 27220 }, { "epoch": 0.10364410069806566, "grad_norm": 0.11914009600877762, "learning_rate": 0.0005, "loss": 2.1291, "step": 27230 }, { "epoch": 0.10368216316618835, "grad_norm": 0.12766490876674652, "learning_rate": 0.0005, "loss": 2.1356, "step": 27240 }, { "epoch": 0.10372022563431103, "grad_norm": 0.12520195543766022, "learning_rate": 0.0005, "loss": 2.125, "step": 27250 }, { "epoch": 0.10375828810243372, "grad_norm": 0.13088081777095795, "learning_rate": 0.0005, "loss": 2.1402, "step": 27260 }, { "epoch": 0.1037963505705564, "grad_norm": 0.12486676871776581, "learning_rate": 0.0005, "loss": 2.1393, "step": 27270 }, { "epoch": 0.10383441303867907, "grad_norm": 0.12424474954605103, "learning_rate": 0.0005, "loss": 2.1354, "step": 27280 }, { "epoch": 0.10387247550680176, "grad_norm": 0.17023976147174835, "learning_rate": 0.0005, "loss": 2.1417, "step": 27290 }, { "epoch": 0.10391053797492444, "grad_norm": 0.1305316537618637, "learning_rate": 0.0005, "loss": 2.1412, "step": 27300 }, { "epoch": 0.10394860044304713, "grad_norm": 0.11967485398054123, "learning_rate": 0.0005, "loss": 2.1499, "step": 27310 }, { "epoch": 0.10398666291116981, "grad_norm": 0.13274578750133514, "learning_rate": 0.0005, "loss": 2.1423, "step": 27320 }, { "epoch": 0.1040247253792925, "grad_norm": 0.12439022213220596, "learning_rate": 0.0005, "loss": 2.1359, "step": 27330 }, { "epoch": 0.10406278784741518, "grad_norm": 0.1212477907538414, "learning_rate": 0.0005, "loss": 2.15, "step": 27340 }, { "epoch": 0.10410085031553785, "grad_norm": 0.12385757267475128, "learning_rate": 0.0005, "loss": 2.1335, "step": 27350 }, { "epoch": 0.10413891278366054, "grad_norm": 0.12493318319320679, "learning_rate": 0.0005, "loss": 2.1449, "step": 27360 }, { "epoch": 0.10417697525178322, "grad_norm": 0.12151855230331421, "learning_rate": 0.0005, "loss": 2.1255, "step": 27370 }, { "epoch": 0.10421503771990591, "grad_norm": 0.11811287701129913, "learning_rate": 0.0005, "loss": 2.1494, "step": 27380 }, { "epoch": 0.1042531001880286, "grad_norm": 0.11648032814264297, "learning_rate": 0.0005, "loss": 2.1373, "step": 27390 }, { "epoch": 0.10429116265615128, "grad_norm": 0.11927644908428192, "learning_rate": 0.0005, "loss": 2.1332, "step": 27400 }, { "epoch": 0.10432922512427396, "grad_norm": 0.12469673901796341, "learning_rate": 0.0005, "loss": 2.1375, "step": 27410 }, { "epoch": 0.10436728759239663, "grad_norm": 0.1321975588798523, "learning_rate": 0.0005, "loss": 2.1284, "step": 27420 }, { "epoch": 0.10440535006051932, "grad_norm": 0.11792142689228058, "learning_rate": 0.0005, "loss": 2.1272, "step": 27430 }, { "epoch": 0.104443412528642, "grad_norm": 0.11825040727853775, "learning_rate": 0.0005, "loss": 2.1357, "step": 27440 }, { "epoch": 0.10448147499676469, "grad_norm": 0.12313884496688843, "learning_rate": 0.0005, "loss": 2.1336, "step": 27450 }, { "epoch": 0.10451953746488737, "grad_norm": 0.130935400724411, "learning_rate": 0.0005, "loss": 2.1524, "step": 27460 }, { "epoch": 0.10455759993301006, "grad_norm": 0.11850696802139282, "learning_rate": 0.0005, "loss": 2.1326, "step": 27470 }, { "epoch": 0.10459566240113274, "grad_norm": 0.1413838118314743, "learning_rate": 0.0005, "loss": 2.1251, "step": 27480 }, { "epoch": 0.10463372486925543, "grad_norm": 0.1255698800086975, "learning_rate": 0.0005, "loss": 2.1293, "step": 27490 }, { "epoch": 0.1046717873373781, "grad_norm": 0.11011414974927902, "learning_rate": 0.0005, "loss": 2.1253, "step": 27500 }, { "epoch": 0.10470984980550078, "grad_norm": 0.12200158834457397, "learning_rate": 0.0005, "loss": 2.1316, "step": 27510 }, { "epoch": 0.10474791227362347, "grad_norm": 0.12272480875253677, "learning_rate": 0.0005, "loss": 2.145, "step": 27520 }, { "epoch": 0.10478597474174615, "grad_norm": 0.1249915137887001, "learning_rate": 0.0005, "loss": 2.1301, "step": 27530 }, { "epoch": 0.10482403720986884, "grad_norm": 0.11734936386346817, "learning_rate": 0.0005, "loss": 2.1297, "step": 27540 }, { "epoch": 0.10486209967799152, "grad_norm": 0.1365039348602295, "learning_rate": 0.0005, "loss": 2.132, "step": 27550 }, { "epoch": 0.10490016214611421, "grad_norm": 0.11033467203378677, "learning_rate": 0.0005, "loss": 2.1388, "step": 27560 }, { "epoch": 0.10493822461423688, "grad_norm": 0.1132252886891365, "learning_rate": 0.0005, "loss": 2.1178, "step": 27570 }, { "epoch": 0.10497628708235957, "grad_norm": 0.11575080454349518, "learning_rate": 0.0005, "loss": 2.1411, "step": 27580 }, { "epoch": 0.10501434955048225, "grad_norm": 0.129554882645607, "learning_rate": 0.0005, "loss": 2.1567, "step": 27590 }, { "epoch": 0.10505241201860493, "grad_norm": 0.11766406148672104, "learning_rate": 0.0005, "loss": 2.1366, "step": 27600 }, { "epoch": 0.10509047448672762, "grad_norm": 0.12703871726989746, "learning_rate": 0.0005, "loss": 2.1388, "step": 27610 }, { "epoch": 0.1051285369548503, "grad_norm": 0.1272251456975937, "learning_rate": 0.0005, "loss": 2.1517, "step": 27620 }, { "epoch": 0.10516659942297299, "grad_norm": 0.12048023194074631, "learning_rate": 0.0005, "loss": 2.1396, "step": 27630 }, { "epoch": 0.10520466189109566, "grad_norm": 0.12507270276546478, "learning_rate": 0.0005, "loss": 2.1365, "step": 27640 }, { "epoch": 0.10524272435921835, "grad_norm": 0.1116430014371872, "learning_rate": 0.0005, "loss": 2.1379, "step": 27650 }, { "epoch": 0.10528078682734103, "grad_norm": 0.1226653978228569, "learning_rate": 0.0005, "loss": 2.1333, "step": 27660 }, { "epoch": 0.10531884929546372, "grad_norm": 0.1241232305765152, "learning_rate": 0.0005, "loss": 2.1258, "step": 27670 }, { "epoch": 0.1053569117635864, "grad_norm": 0.12949034571647644, "learning_rate": 0.0005, "loss": 2.1401, "step": 27680 }, { "epoch": 0.10539497423170908, "grad_norm": 0.13007037341594696, "learning_rate": 0.0005, "loss": 2.1424, "step": 27690 }, { "epoch": 0.10543303669983177, "grad_norm": 0.29032719135284424, "learning_rate": 0.0005, "loss": 2.1265, "step": 27700 }, { "epoch": 0.10547109916795444, "grad_norm": 0.11723814159631729, "learning_rate": 0.0005, "loss": 2.1569, "step": 27710 }, { "epoch": 0.10550916163607713, "grad_norm": 0.14339888095855713, "learning_rate": 0.0005, "loss": 2.1265, "step": 27720 }, { "epoch": 0.10554722410419981, "grad_norm": 0.1379217654466629, "learning_rate": 0.0005, "loss": 2.1314, "step": 27730 }, { "epoch": 0.1055852865723225, "grad_norm": 0.1293562948703766, "learning_rate": 0.0005, "loss": 2.1215, "step": 27740 }, { "epoch": 0.10562334904044518, "grad_norm": 0.12406893074512482, "learning_rate": 0.0005, "loss": 2.1358, "step": 27750 }, { "epoch": 0.10566141150856787, "grad_norm": 0.12450039386749268, "learning_rate": 0.0005, "loss": 2.1385, "step": 27760 }, { "epoch": 0.10569947397669055, "grad_norm": 0.11588918417692184, "learning_rate": 0.0005, "loss": 2.1353, "step": 27770 }, { "epoch": 0.10573753644481322, "grad_norm": 0.11539763957262039, "learning_rate": 0.0005, "loss": 2.1415, "step": 27780 }, { "epoch": 0.1057755989129359, "grad_norm": 0.12182429432868958, "learning_rate": 0.0005, "loss": 2.1367, "step": 27790 }, { "epoch": 0.10581366138105859, "grad_norm": 0.11426839977502823, "learning_rate": 0.0005, "loss": 2.1142, "step": 27800 }, { "epoch": 0.10585172384918128, "grad_norm": 0.13639409840106964, "learning_rate": 0.0005, "loss": 2.1264, "step": 27810 }, { "epoch": 0.10588978631730396, "grad_norm": 0.11492707580327988, "learning_rate": 0.0005, "loss": 2.1378, "step": 27820 }, { "epoch": 0.10592784878542665, "grad_norm": 0.11488751322031021, "learning_rate": 0.0005, "loss": 2.1449, "step": 27830 }, { "epoch": 0.10596591125354933, "grad_norm": 0.12409866601228714, "learning_rate": 0.0005, "loss": 2.1238, "step": 27840 }, { "epoch": 0.106003973721672, "grad_norm": 0.11955158412456512, "learning_rate": 0.0005, "loss": 2.1385, "step": 27850 }, { "epoch": 0.10604203618979469, "grad_norm": 0.12790343165397644, "learning_rate": 0.0005, "loss": 2.1357, "step": 27860 }, { "epoch": 0.10608009865791737, "grad_norm": 0.13152866065502167, "learning_rate": 0.0005, "loss": 2.1469, "step": 27870 }, { "epoch": 0.10611816112604006, "grad_norm": 0.13242776691913605, "learning_rate": 0.0005, "loss": 2.1468, "step": 27880 }, { "epoch": 0.10615622359416274, "grad_norm": 0.13706664741039276, "learning_rate": 0.0005, "loss": 2.145, "step": 27890 }, { "epoch": 0.10619428606228543, "grad_norm": 0.11688551306724548, "learning_rate": 0.0005, "loss": 2.1252, "step": 27900 }, { "epoch": 0.10623234853040811, "grad_norm": 0.14044661819934845, "learning_rate": 0.0005, "loss": 2.1278, "step": 27910 }, { "epoch": 0.1062704109985308, "grad_norm": 0.17804557085037231, "learning_rate": 0.0005, "loss": 2.1389, "step": 27920 }, { "epoch": 0.10630847346665347, "grad_norm": 0.11811062693595886, "learning_rate": 0.0005, "loss": 2.1398, "step": 27930 }, { "epoch": 0.10634653593477615, "grad_norm": 0.1283624917268753, "learning_rate": 0.0005, "loss": 2.1434, "step": 27940 }, { "epoch": 0.10638459840289884, "grad_norm": 0.1313037872314453, "learning_rate": 0.0005, "loss": 2.1476, "step": 27950 }, { "epoch": 0.10642266087102152, "grad_norm": 0.1112949475646019, "learning_rate": 0.0005, "loss": 2.134, "step": 27960 }, { "epoch": 0.1064607233391442, "grad_norm": 0.1099490225315094, "learning_rate": 0.0005, "loss": 2.1438, "step": 27970 }, { "epoch": 0.10649878580726689, "grad_norm": 0.1101028248667717, "learning_rate": 0.0005, "loss": 2.1435, "step": 27980 }, { "epoch": 0.10653684827538958, "grad_norm": 0.12001043558120728, "learning_rate": 0.0005, "loss": 2.1381, "step": 27990 }, { "epoch": 0.10657491074351225, "grad_norm": 0.11522848159074783, "learning_rate": 0.0005, "loss": 2.1142, "step": 28000 }, { "epoch": 0.10661297321163493, "grad_norm": 0.12180564552545547, "learning_rate": 0.0005, "loss": 2.1427, "step": 28010 }, { "epoch": 0.10665103567975762, "grad_norm": 0.13159476220607758, "learning_rate": 0.0005, "loss": 2.1296, "step": 28020 }, { "epoch": 0.1066890981478803, "grad_norm": 0.12761522829532623, "learning_rate": 0.0005, "loss": 2.1548, "step": 28030 }, { "epoch": 0.10672716061600299, "grad_norm": 0.11224870383739471, "learning_rate": 0.0005, "loss": 2.1115, "step": 28040 }, { "epoch": 0.10676522308412567, "grad_norm": 0.11419076472520828, "learning_rate": 0.0005, "loss": 2.1376, "step": 28050 }, { "epoch": 0.10680328555224836, "grad_norm": 0.12258830666542053, "learning_rate": 0.0005, "loss": 2.1374, "step": 28060 }, { "epoch": 0.10684134802037103, "grad_norm": 0.11559466272592545, "learning_rate": 0.0005, "loss": 2.1385, "step": 28070 }, { "epoch": 0.10687941048849371, "grad_norm": 0.1134631335735321, "learning_rate": 0.0005, "loss": 2.1322, "step": 28080 }, { "epoch": 0.1069174729566164, "grad_norm": 0.13319198787212372, "learning_rate": 0.0005, "loss": 2.1135, "step": 28090 }, { "epoch": 0.10695553542473908, "grad_norm": 0.1264987289905548, "learning_rate": 0.0005, "loss": 2.1323, "step": 28100 }, { "epoch": 0.10699359789286177, "grad_norm": 0.12210672348737717, "learning_rate": 0.0005, "loss": 2.146, "step": 28110 }, { "epoch": 0.10703166036098445, "grad_norm": 0.12532632052898407, "learning_rate": 0.0005, "loss": 2.1418, "step": 28120 }, { "epoch": 0.10706972282910714, "grad_norm": 0.11959918588399887, "learning_rate": 0.0005, "loss": 2.1509, "step": 28130 }, { "epoch": 0.10710778529722981, "grad_norm": 0.2721995413303375, "learning_rate": 0.0005, "loss": 2.1368, "step": 28140 }, { "epoch": 0.10714584776535249, "grad_norm": 0.11988692730665207, "learning_rate": 0.0005, "loss": 2.1277, "step": 28150 }, { "epoch": 0.10718391023347518, "grad_norm": 0.12983128428459167, "learning_rate": 0.0005, "loss": 2.1381, "step": 28160 }, { "epoch": 0.10722197270159786, "grad_norm": 0.11977101862430573, "learning_rate": 0.0005, "loss": 2.1509, "step": 28170 }, { "epoch": 0.10726003516972055, "grad_norm": 0.12527891993522644, "learning_rate": 0.0005, "loss": 2.1504, "step": 28180 }, { "epoch": 0.10729809763784323, "grad_norm": 0.12681803107261658, "learning_rate": 0.0005, "loss": 2.1318, "step": 28190 }, { "epoch": 0.10733616010596592, "grad_norm": 0.12825150787830353, "learning_rate": 0.0005, "loss": 2.1429, "step": 28200 }, { "epoch": 0.10737422257408859, "grad_norm": 0.12182246893644333, "learning_rate": 0.0005, "loss": 2.1392, "step": 28210 }, { "epoch": 0.10741228504221127, "grad_norm": 0.13522516191005707, "learning_rate": 0.0005, "loss": 2.1454, "step": 28220 }, { "epoch": 0.10745034751033396, "grad_norm": 0.12266946583986282, "learning_rate": 0.0005, "loss": 2.1259, "step": 28230 }, { "epoch": 0.10748840997845664, "grad_norm": 0.11807511746883392, "learning_rate": 0.0005, "loss": 2.1372, "step": 28240 }, { "epoch": 0.10752647244657933, "grad_norm": 0.1240006610751152, "learning_rate": 0.0005, "loss": 2.1331, "step": 28250 }, { "epoch": 0.10756453491470201, "grad_norm": 0.11999105662107468, "learning_rate": 0.0005, "loss": 2.1272, "step": 28260 }, { "epoch": 0.1076025973828247, "grad_norm": 0.11139243096113205, "learning_rate": 0.0005, "loss": 2.1315, "step": 28270 }, { "epoch": 0.10764065985094738, "grad_norm": 0.1327504962682724, "learning_rate": 0.0005, "loss": 2.142, "step": 28280 }, { "epoch": 0.10767872231907005, "grad_norm": 0.12326204031705856, "learning_rate": 0.0005, "loss": 2.1304, "step": 28290 }, { "epoch": 0.10771678478719274, "grad_norm": 0.11592575907707214, "learning_rate": 0.0005, "loss": 2.1257, "step": 28300 }, { "epoch": 0.10775484725531542, "grad_norm": 0.12289170175790787, "learning_rate": 0.0005, "loss": 2.1278, "step": 28310 }, { "epoch": 0.10779290972343811, "grad_norm": 0.1368006020784378, "learning_rate": 0.0005, "loss": 2.1436, "step": 28320 }, { "epoch": 0.10783097219156079, "grad_norm": 0.12864282727241516, "learning_rate": 0.0005, "loss": 2.1373, "step": 28330 }, { "epoch": 0.10786903465968348, "grad_norm": 0.12400609254837036, "learning_rate": 0.0005, "loss": 2.1316, "step": 28340 }, { "epoch": 0.10790709712780616, "grad_norm": 0.1271352469921112, "learning_rate": 0.0005, "loss": 2.1318, "step": 28350 }, { "epoch": 0.10794515959592883, "grad_norm": 0.1236211434006691, "learning_rate": 0.0005, "loss": 2.128, "step": 28360 }, { "epoch": 0.10798322206405152, "grad_norm": 0.1169639304280281, "learning_rate": 0.0005, "loss": 2.1239, "step": 28370 }, { "epoch": 0.1080212845321742, "grad_norm": 0.1226038709282875, "learning_rate": 0.0005, "loss": 2.1397, "step": 28380 }, { "epoch": 0.10805934700029689, "grad_norm": 0.12057903409004211, "learning_rate": 0.0005, "loss": 2.1393, "step": 28390 }, { "epoch": 0.10809740946841957, "grad_norm": 0.12454306334257126, "learning_rate": 0.0005, "loss": 2.1536, "step": 28400 }, { "epoch": 0.10813547193654226, "grad_norm": 0.10948032885789871, "learning_rate": 0.0005, "loss": 2.1516, "step": 28410 }, { "epoch": 0.10817353440466494, "grad_norm": 0.13948583602905273, "learning_rate": 0.0005, "loss": 2.1381, "step": 28420 }, { "epoch": 0.10821159687278761, "grad_norm": 0.11749628931283951, "learning_rate": 0.0005, "loss": 2.1329, "step": 28430 }, { "epoch": 0.1082496593409103, "grad_norm": 0.1229894757270813, "learning_rate": 0.0005, "loss": 2.1452, "step": 28440 }, { "epoch": 0.10828772180903298, "grad_norm": 0.12310022860765457, "learning_rate": 0.0005, "loss": 2.1311, "step": 28450 }, { "epoch": 0.10832578427715567, "grad_norm": 0.1235974133014679, "learning_rate": 0.0005, "loss": 2.1481, "step": 28460 }, { "epoch": 0.10836384674527835, "grad_norm": 0.12037523090839386, "learning_rate": 0.0005, "loss": 2.1403, "step": 28470 }, { "epoch": 0.10840190921340104, "grad_norm": 0.1255193054676056, "learning_rate": 0.0005, "loss": 2.1347, "step": 28480 }, { "epoch": 0.10843997168152372, "grad_norm": 0.11792249232530594, "learning_rate": 0.0005, "loss": 2.1362, "step": 28490 }, { "epoch": 0.1084780341496464, "grad_norm": 0.11522363871335983, "learning_rate": 0.0005, "loss": 2.1392, "step": 28500 }, { "epoch": 0.10851609661776908, "grad_norm": 0.1397753655910492, "learning_rate": 0.0005, "loss": 2.1415, "step": 28510 }, { "epoch": 0.10855415908589176, "grad_norm": 0.1190962940454483, "learning_rate": 0.0005, "loss": 2.1324, "step": 28520 }, { "epoch": 0.10859222155401445, "grad_norm": 0.12126445770263672, "learning_rate": 0.0005, "loss": 2.1308, "step": 28530 }, { "epoch": 0.10863028402213713, "grad_norm": 0.1267649233341217, "learning_rate": 0.0005, "loss": 2.1457, "step": 28540 }, { "epoch": 0.10866834649025982, "grad_norm": 0.11734792590141296, "learning_rate": 0.0005, "loss": 2.1471, "step": 28550 }, { "epoch": 0.1087064089583825, "grad_norm": 0.13454262912273407, "learning_rate": 0.0005, "loss": 2.1211, "step": 28560 }, { "epoch": 0.10874447142650517, "grad_norm": 0.12421667575836182, "learning_rate": 0.0005, "loss": 2.1496, "step": 28570 }, { "epoch": 0.10878253389462786, "grad_norm": 0.12329269200563431, "learning_rate": 0.0005, "loss": 2.1379, "step": 28580 }, { "epoch": 0.10882059636275054, "grad_norm": 0.1253172904253006, "learning_rate": 0.0005, "loss": 2.1469, "step": 28590 }, { "epoch": 0.10885865883087323, "grad_norm": 0.12167198956012726, "learning_rate": 0.0005, "loss": 2.1496, "step": 28600 }, { "epoch": 0.10889672129899591, "grad_norm": 0.11725557595491409, "learning_rate": 0.0005, "loss": 2.1288, "step": 28610 }, { "epoch": 0.1089347837671186, "grad_norm": 0.14386829733848572, "learning_rate": 0.0005, "loss": 2.1306, "step": 28620 }, { "epoch": 0.10897284623524128, "grad_norm": 0.11399870365858078, "learning_rate": 0.0005, "loss": 2.1422, "step": 28630 }, { "epoch": 0.10901090870336395, "grad_norm": 0.12038716673851013, "learning_rate": 0.0005, "loss": 2.1413, "step": 28640 }, { "epoch": 0.10904897117148664, "grad_norm": 0.12456893920898438, "learning_rate": 0.0005, "loss": 2.1355, "step": 28650 }, { "epoch": 0.10908703363960932, "grad_norm": 0.13040480017662048, "learning_rate": 0.0005, "loss": 2.1475, "step": 28660 }, { "epoch": 0.10912509610773201, "grad_norm": 0.12755368649959564, "learning_rate": 0.0005, "loss": 2.1399, "step": 28670 }, { "epoch": 0.1091631585758547, "grad_norm": 0.1210189163684845, "learning_rate": 0.0005, "loss": 2.1257, "step": 28680 }, { "epoch": 0.10920122104397738, "grad_norm": 0.12757094204425812, "learning_rate": 0.0005, "loss": 2.1359, "step": 28690 }, { "epoch": 0.10923928351210006, "grad_norm": 0.12309765070676804, "learning_rate": 0.0005, "loss": 2.1271, "step": 28700 }, { "epoch": 0.10927734598022275, "grad_norm": 0.1145697832107544, "learning_rate": 0.0005, "loss": 2.1332, "step": 28710 }, { "epoch": 0.10931540844834542, "grad_norm": 0.22067514061927795, "learning_rate": 0.0005, "loss": 2.1405, "step": 28720 }, { "epoch": 0.1093534709164681, "grad_norm": 0.12916387617588043, "learning_rate": 0.0005, "loss": 2.1472, "step": 28730 }, { "epoch": 0.10939153338459079, "grad_norm": 0.13922347128391266, "learning_rate": 0.0005, "loss": 2.1345, "step": 28740 }, { "epoch": 0.10942959585271347, "grad_norm": 0.12103313952684402, "learning_rate": 0.0005, "loss": 2.1319, "step": 28750 }, { "epoch": 0.10946765832083616, "grad_norm": 0.11259905248880386, "learning_rate": 0.0005, "loss": 2.1372, "step": 28760 }, { "epoch": 0.10950572078895884, "grad_norm": 0.11306339502334595, "learning_rate": 0.0005, "loss": 2.1362, "step": 28770 }, { "epoch": 0.10954378325708153, "grad_norm": 0.12106167525053024, "learning_rate": 0.0005, "loss": 2.1377, "step": 28780 }, { "epoch": 0.1095818457252042, "grad_norm": 0.1086219921708107, "learning_rate": 0.0005, "loss": 2.15, "step": 28790 }, { "epoch": 0.10961990819332688, "grad_norm": 0.12661181390285492, "learning_rate": 0.0005, "loss": 2.1313, "step": 28800 }, { "epoch": 0.10965797066144957, "grad_norm": 0.11590348929166794, "learning_rate": 0.0005, "loss": 2.1231, "step": 28810 }, { "epoch": 0.10969603312957225, "grad_norm": 0.12942777574062347, "learning_rate": 0.0005, "loss": 2.1456, "step": 28820 }, { "epoch": 0.10973409559769494, "grad_norm": 0.12246377021074295, "learning_rate": 0.0005, "loss": 2.1415, "step": 28830 }, { "epoch": 0.10977215806581762, "grad_norm": 0.11831900477409363, "learning_rate": 0.0005, "loss": 2.1215, "step": 28840 }, { "epoch": 0.10981022053394031, "grad_norm": 0.12228821218013763, "learning_rate": 0.0005, "loss": 2.1398, "step": 28850 }, { "epoch": 0.10984828300206298, "grad_norm": 0.12343055009841919, "learning_rate": 0.0005, "loss": 2.1357, "step": 28860 }, { "epoch": 0.10988634547018566, "grad_norm": 0.12655648589134216, "learning_rate": 0.0005, "loss": 2.1505, "step": 28870 }, { "epoch": 0.10992440793830835, "grad_norm": 0.12234000116586685, "learning_rate": 0.0005, "loss": 2.1277, "step": 28880 }, { "epoch": 0.10996247040643103, "grad_norm": 0.12989850342273712, "learning_rate": 0.0005, "loss": 2.1393, "step": 28890 }, { "epoch": 0.11000053287455372, "grad_norm": 0.11979439854621887, "learning_rate": 0.0005, "loss": 2.139, "step": 28900 }, { "epoch": 0.1100385953426764, "grad_norm": 0.133430615067482, "learning_rate": 0.0005, "loss": 2.1205, "step": 28910 }, { "epoch": 0.11007665781079909, "grad_norm": 0.11913889646530151, "learning_rate": 0.0005, "loss": 2.1398, "step": 28920 }, { "epoch": 0.11011472027892176, "grad_norm": 0.12442494928836823, "learning_rate": 0.0005, "loss": 2.1262, "step": 28930 }, { "epoch": 0.11015278274704444, "grad_norm": 0.14333483576774597, "learning_rate": 0.0005, "loss": 2.1417, "step": 28940 }, { "epoch": 0.11019084521516713, "grad_norm": 0.12277472019195557, "learning_rate": 0.0005, "loss": 2.1304, "step": 28950 }, { "epoch": 0.11022890768328981, "grad_norm": 0.10870832204818726, "learning_rate": 0.0005, "loss": 2.1351, "step": 28960 }, { "epoch": 0.1102669701514125, "grad_norm": 0.12688913941383362, "learning_rate": 0.0005, "loss": 2.1211, "step": 28970 }, { "epoch": 0.11030503261953518, "grad_norm": 0.12579499185085297, "learning_rate": 0.0005, "loss": 2.1293, "step": 28980 }, { "epoch": 0.11034309508765787, "grad_norm": 0.1273648887872696, "learning_rate": 0.0005, "loss": 2.1365, "step": 28990 }, { "epoch": 0.11038115755578054, "grad_norm": 0.12454326450824738, "learning_rate": 0.0005, "loss": 2.1421, "step": 29000 }, { "epoch": 0.11041922002390323, "grad_norm": 0.12940780818462372, "learning_rate": 0.0005, "loss": 2.1293, "step": 29010 }, { "epoch": 0.11045728249202591, "grad_norm": 0.11854497343301773, "learning_rate": 0.0005, "loss": 2.1347, "step": 29020 }, { "epoch": 0.1104953449601486, "grad_norm": 0.11319740861654282, "learning_rate": 0.0005, "loss": 2.1157, "step": 29030 }, { "epoch": 0.11053340742827128, "grad_norm": 0.12677529454231262, "learning_rate": 0.0005, "loss": 2.145, "step": 29040 }, { "epoch": 0.11057146989639396, "grad_norm": 0.12414832413196564, "learning_rate": 0.0005, "loss": 2.1306, "step": 29050 }, { "epoch": 0.11060953236451665, "grad_norm": 0.12062861770391464, "learning_rate": 0.0005, "loss": 2.1379, "step": 29060 }, { "epoch": 0.11064759483263933, "grad_norm": 0.13710670173168182, "learning_rate": 0.0005, "loss": 2.1391, "step": 29070 }, { "epoch": 0.110685657300762, "grad_norm": 0.13636557757854462, "learning_rate": 0.0005, "loss": 2.1352, "step": 29080 }, { "epoch": 0.11072371976888469, "grad_norm": 0.13088567554950714, "learning_rate": 0.0005, "loss": 2.1454, "step": 29090 }, { "epoch": 0.11076178223700738, "grad_norm": 0.11774623394012451, "learning_rate": 0.0005, "loss": 2.143, "step": 29100 }, { "epoch": 0.11079984470513006, "grad_norm": 0.13022582232952118, "learning_rate": 0.0005, "loss": 2.1432, "step": 29110 }, { "epoch": 0.11083790717325274, "grad_norm": 0.1218847781419754, "learning_rate": 0.0005, "loss": 2.1516, "step": 29120 }, { "epoch": 0.11087596964137543, "grad_norm": 0.13264073431491852, "learning_rate": 0.0005, "loss": 2.146, "step": 29130 }, { "epoch": 0.11091403210949811, "grad_norm": 0.12379482388496399, "learning_rate": 0.0005, "loss": 2.1413, "step": 29140 }, { "epoch": 0.11095209457762079, "grad_norm": 0.1278836876153946, "learning_rate": 0.0005, "loss": 2.1311, "step": 29150 }, { "epoch": 0.11099015704574347, "grad_norm": 0.12311187386512756, "learning_rate": 0.0005, "loss": 2.1341, "step": 29160 }, { "epoch": 0.11102821951386616, "grad_norm": 0.11225911229848862, "learning_rate": 0.0005, "loss": 2.1392, "step": 29170 }, { "epoch": 0.11106628198198884, "grad_norm": 0.12489734590053558, "learning_rate": 0.0005, "loss": 2.1332, "step": 29180 }, { "epoch": 0.11110434445011153, "grad_norm": 0.131637841463089, "learning_rate": 0.0005, "loss": 2.1301, "step": 29190 }, { "epoch": 0.11114240691823421, "grad_norm": 0.1125171110033989, "learning_rate": 0.0005, "loss": 2.1539, "step": 29200 }, { "epoch": 0.1111804693863569, "grad_norm": 0.120730459690094, "learning_rate": 0.0005, "loss": 2.1273, "step": 29210 }, { "epoch": 0.11121853185447957, "grad_norm": 0.13983865082263947, "learning_rate": 0.0005, "loss": 2.1419, "step": 29220 }, { "epoch": 0.11125659432260225, "grad_norm": 0.12853026390075684, "learning_rate": 0.0005, "loss": 2.1324, "step": 29230 }, { "epoch": 0.11129465679072494, "grad_norm": 0.12522001564502716, "learning_rate": 0.0005, "loss": 2.133, "step": 29240 }, { "epoch": 0.11133271925884762, "grad_norm": 0.15114690363407135, "learning_rate": 0.0005, "loss": 2.1352, "step": 29250 }, { "epoch": 0.1113707817269703, "grad_norm": 0.11838917434215546, "learning_rate": 0.0005, "loss": 2.1273, "step": 29260 }, { "epoch": 0.11140884419509299, "grad_norm": 0.11473576724529266, "learning_rate": 0.0005, "loss": 2.1267, "step": 29270 }, { "epoch": 0.11144690666321568, "grad_norm": 0.11432286351919174, "learning_rate": 0.0005, "loss": 2.1465, "step": 29280 }, { "epoch": 0.11148496913133835, "grad_norm": 0.12724675238132477, "learning_rate": 0.0005, "loss": 2.1523, "step": 29290 }, { "epoch": 0.11152303159946103, "grad_norm": 0.1361140012741089, "learning_rate": 0.0005, "loss": 2.1271, "step": 29300 }, { "epoch": 0.11156109406758372, "grad_norm": 0.11614620685577393, "learning_rate": 0.0005, "loss": 2.1326, "step": 29310 }, { "epoch": 0.1115991565357064, "grad_norm": 0.12610851228237152, "learning_rate": 0.0005, "loss": 2.1504, "step": 29320 }, { "epoch": 0.11163721900382909, "grad_norm": 0.12461981177330017, "learning_rate": 0.0005, "loss": 2.126, "step": 29330 }, { "epoch": 0.11167528147195177, "grad_norm": 0.11165129393339157, "learning_rate": 0.0005, "loss": 2.1316, "step": 29340 }, { "epoch": 0.11171334394007446, "grad_norm": 0.16419917345046997, "learning_rate": 0.0005, "loss": 2.1456, "step": 29350 }, { "epoch": 0.11175140640819713, "grad_norm": 0.1316666156053543, "learning_rate": 0.0005, "loss": 2.1258, "step": 29360 }, { "epoch": 0.11178946887631981, "grad_norm": 0.12397214025259018, "learning_rate": 0.0005, "loss": 2.134, "step": 29370 }, { "epoch": 0.1118275313444425, "grad_norm": 0.12347505986690521, "learning_rate": 0.0005, "loss": 2.1295, "step": 29380 }, { "epoch": 0.11186559381256518, "grad_norm": 0.13741114735603333, "learning_rate": 0.0005, "loss": 2.1228, "step": 29390 }, { "epoch": 0.11190365628068787, "grad_norm": 0.13383878767490387, "learning_rate": 0.0005, "loss": 2.1487, "step": 29400 }, { "epoch": 0.11194171874881055, "grad_norm": 0.1291329264640808, "learning_rate": 0.0005, "loss": 2.1293, "step": 29410 }, { "epoch": 0.11197978121693324, "grad_norm": 0.132411926984787, "learning_rate": 0.0005, "loss": 2.1527, "step": 29420 }, { "epoch": 0.11201784368505592, "grad_norm": 0.11728857457637787, "learning_rate": 0.0005, "loss": 2.1553, "step": 29430 }, { "epoch": 0.11205590615317859, "grad_norm": 0.11532270163297653, "learning_rate": 0.0005, "loss": 2.1371, "step": 29440 }, { "epoch": 0.11209396862130128, "grad_norm": 0.11879745870828629, "learning_rate": 0.0005, "loss": 2.1303, "step": 29450 }, { "epoch": 0.11213203108942396, "grad_norm": 0.1252891719341278, "learning_rate": 0.0005, "loss": 2.1298, "step": 29460 }, { "epoch": 0.11217009355754665, "grad_norm": 0.11775480210781097, "learning_rate": 0.0005, "loss": 2.1447, "step": 29470 }, { "epoch": 0.11220815602566933, "grad_norm": 0.13671059906482697, "learning_rate": 0.0005, "loss": 2.1413, "step": 29480 }, { "epoch": 0.11224621849379202, "grad_norm": 0.13583379983901978, "learning_rate": 0.0005, "loss": 2.1354, "step": 29490 }, { "epoch": 0.1122842809619147, "grad_norm": 0.1581609696149826, "learning_rate": 0.0005, "loss": 2.1558, "step": 29500 }, { "epoch": 0.11232234343003737, "grad_norm": 0.12587234377861023, "learning_rate": 0.0005, "loss": 2.1329, "step": 29510 }, { "epoch": 0.11236040589816006, "grad_norm": 0.12166285514831543, "learning_rate": 0.0005, "loss": 2.1312, "step": 29520 }, { "epoch": 0.11239846836628274, "grad_norm": 0.12624861299991608, "learning_rate": 0.0005, "loss": 2.1363, "step": 29530 }, { "epoch": 0.11243653083440543, "grad_norm": 0.10956565290689468, "learning_rate": 0.0005, "loss": 2.1391, "step": 29540 }, { "epoch": 0.11247459330252811, "grad_norm": 0.12157244980335236, "learning_rate": 0.0005, "loss": 2.1269, "step": 29550 }, { "epoch": 0.1125126557706508, "grad_norm": 0.12983137369155884, "learning_rate": 0.0005, "loss": 2.1354, "step": 29560 }, { "epoch": 0.11255071823877348, "grad_norm": 0.12666325271129608, "learning_rate": 0.0005, "loss": 2.1285, "step": 29570 }, { "epoch": 0.11258878070689615, "grad_norm": 0.1175815686583519, "learning_rate": 0.0005, "loss": 2.1142, "step": 29580 }, { "epoch": 0.11262684317501884, "grad_norm": 0.12979090213775635, "learning_rate": 0.0005, "loss": 2.1301, "step": 29590 }, { "epoch": 0.11266490564314152, "grad_norm": 0.12051022797822952, "learning_rate": 0.0005, "loss": 2.1472, "step": 29600 }, { "epoch": 0.1127029681112642, "grad_norm": 0.11917869746685028, "learning_rate": 0.0005, "loss": 2.1529, "step": 29610 }, { "epoch": 0.11274103057938689, "grad_norm": 0.11437923461198807, "learning_rate": 0.0005, "loss": 2.1306, "step": 29620 }, { "epoch": 0.11277909304750958, "grad_norm": 0.11656700819730759, "learning_rate": 0.0005, "loss": 2.142, "step": 29630 }, { "epoch": 0.11281715551563226, "grad_norm": 0.11907721310853958, "learning_rate": 0.0005, "loss": 2.13, "step": 29640 }, { "epoch": 0.11285521798375493, "grad_norm": 0.14156955480575562, "learning_rate": 0.0005, "loss": 2.1297, "step": 29650 }, { "epoch": 0.11289328045187762, "grad_norm": 0.12269634008407593, "learning_rate": 0.0005, "loss": 2.137, "step": 29660 }, { "epoch": 0.1129313429200003, "grad_norm": 0.11319083720445633, "learning_rate": 0.0005, "loss": 2.1449, "step": 29670 }, { "epoch": 0.11296940538812299, "grad_norm": 0.11652123928070068, "learning_rate": 0.0005, "loss": 2.1273, "step": 29680 }, { "epoch": 0.11300746785624567, "grad_norm": 0.12396130710840225, "learning_rate": 0.0005, "loss": 2.1605, "step": 29690 }, { "epoch": 0.11304553032436836, "grad_norm": 0.1111893430352211, "learning_rate": 0.0005, "loss": 2.1437, "step": 29700 }, { "epoch": 0.11308359279249104, "grad_norm": 0.12545843422412872, "learning_rate": 0.0005, "loss": 2.1508, "step": 29710 }, { "epoch": 0.11312165526061371, "grad_norm": 0.1228807270526886, "learning_rate": 0.0005, "loss": 2.1359, "step": 29720 }, { "epoch": 0.1131597177287364, "grad_norm": 0.11801854521036148, "learning_rate": 0.0005, "loss": 2.1449, "step": 29730 }, { "epoch": 0.11319778019685908, "grad_norm": 0.12060797214508057, "learning_rate": 0.0005, "loss": 2.1316, "step": 29740 }, { "epoch": 0.11323584266498177, "grad_norm": 0.12158960849046707, "learning_rate": 0.0005, "loss": 2.1404, "step": 29750 }, { "epoch": 0.11327390513310445, "grad_norm": 0.12883733212947845, "learning_rate": 0.0005, "loss": 2.1253, "step": 29760 }, { "epoch": 0.11331196760122714, "grad_norm": 0.12048101425170898, "learning_rate": 0.0005, "loss": 2.1233, "step": 29770 }, { "epoch": 0.11335003006934982, "grad_norm": 0.12671560049057007, "learning_rate": 0.0005, "loss": 2.1326, "step": 29780 }, { "epoch": 0.11338809253747249, "grad_norm": 0.1330437809228897, "learning_rate": 0.0005, "loss": 2.1345, "step": 29790 }, { "epoch": 0.11342615500559518, "grad_norm": 0.12295544892549515, "learning_rate": 0.0005, "loss": 2.1335, "step": 29800 }, { "epoch": 0.11346421747371786, "grad_norm": 0.12095817923545837, "learning_rate": 0.0005, "loss": 2.131, "step": 29810 }, { "epoch": 0.11350227994184055, "grad_norm": 0.11469054967164993, "learning_rate": 0.0005, "loss": 2.1329, "step": 29820 }, { "epoch": 0.11354034240996323, "grad_norm": 0.12663350999355316, "learning_rate": 0.0005, "loss": 2.1492, "step": 29830 }, { "epoch": 0.11357840487808592, "grad_norm": 0.12309986352920532, "learning_rate": 0.0005, "loss": 2.1386, "step": 29840 }, { "epoch": 0.1136164673462086, "grad_norm": 0.1423839032649994, "learning_rate": 0.0005, "loss": 2.1393, "step": 29850 }, { "epoch": 0.11365452981433129, "grad_norm": 0.13435962796211243, "learning_rate": 0.0005, "loss": 2.1459, "step": 29860 }, { "epoch": 0.11369259228245396, "grad_norm": 0.12411796301603317, "learning_rate": 0.0005, "loss": 2.1449, "step": 29870 }, { "epoch": 0.11373065475057664, "grad_norm": 0.11389677971601486, "learning_rate": 0.0005, "loss": 2.1217, "step": 29880 }, { "epoch": 0.11376871721869933, "grad_norm": 0.11975105851888657, "learning_rate": 0.0005, "loss": 2.127, "step": 29890 }, { "epoch": 0.11380677968682201, "grad_norm": 0.11731837689876556, "learning_rate": 0.0005, "loss": 2.1498, "step": 29900 }, { "epoch": 0.1138448421549447, "grad_norm": 0.12533852458000183, "learning_rate": 0.0005, "loss": 2.1371, "step": 29910 }, { "epoch": 0.11388290462306738, "grad_norm": 0.11128509044647217, "learning_rate": 0.0005, "loss": 2.137, "step": 29920 }, { "epoch": 0.11392096709119007, "grad_norm": 0.1130741611123085, "learning_rate": 0.0005, "loss": 2.1414, "step": 29930 }, { "epoch": 0.11395902955931274, "grad_norm": 0.11497542262077332, "learning_rate": 0.0005, "loss": 2.1413, "step": 29940 }, { "epoch": 0.11399709202743542, "grad_norm": 0.13669130206108093, "learning_rate": 0.0005, "loss": 2.1291, "step": 29950 }, { "epoch": 0.11403515449555811, "grad_norm": 0.1437700241804123, "learning_rate": 0.0005, "loss": 2.1304, "step": 29960 }, { "epoch": 0.11407321696368079, "grad_norm": 0.12841551005840302, "learning_rate": 0.0005, "loss": 2.1422, "step": 29970 }, { "epoch": 0.11411127943180348, "grad_norm": 0.13106156885623932, "learning_rate": 0.0005, "loss": 2.1445, "step": 29980 }, { "epoch": 0.11414934189992616, "grad_norm": 0.12786227464675903, "learning_rate": 0.0005, "loss": 2.1497, "step": 29990 }, { "epoch": 0.11418740436804885, "grad_norm": 0.1266452670097351, "learning_rate": 0.0005, "loss": 2.1362, "step": 30000 }, { "epoch": 0.11422546683617152, "grad_norm": 0.12195294350385666, "learning_rate": 0.0005, "loss": 2.1283, "step": 30010 }, { "epoch": 0.1142635293042942, "grad_norm": 0.1278504729270935, "learning_rate": 0.0005, "loss": 2.1246, "step": 30020 }, { "epoch": 0.11430159177241689, "grad_norm": 0.12020813673734665, "learning_rate": 0.0005, "loss": 2.1483, "step": 30030 }, { "epoch": 0.11433965424053957, "grad_norm": 0.12903952598571777, "learning_rate": 0.0005, "loss": 2.141, "step": 30040 }, { "epoch": 0.11437771670866226, "grad_norm": 0.12148747593164444, "learning_rate": 0.0005, "loss": 2.1426, "step": 30050 }, { "epoch": 0.11441577917678494, "grad_norm": 0.12210709601640701, "learning_rate": 0.0005, "loss": 2.1445, "step": 30060 }, { "epoch": 0.11445384164490763, "grad_norm": 0.12444771826267242, "learning_rate": 0.0005, "loss": 2.144, "step": 30070 }, { "epoch": 0.1144919041130303, "grad_norm": 0.12810124456882477, "learning_rate": 0.0005, "loss": 2.1394, "step": 30080 }, { "epoch": 0.11452996658115298, "grad_norm": 0.13028530776500702, "learning_rate": 0.0005, "loss": 2.1459, "step": 30090 }, { "epoch": 0.11456802904927567, "grad_norm": 0.1409108191728592, "learning_rate": 0.0005, "loss": 2.1423, "step": 30100 }, { "epoch": 0.11460609151739835, "grad_norm": 0.12987381219863892, "learning_rate": 0.0005, "loss": 2.1334, "step": 30110 }, { "epoch": 0.11464415398552104, "grad_norm": 0.12352680414915085, "learning_rate": 0.0005, "loss": 2.139, "step": 30120 }, { "epoch": 0.11468221645364372, "grad_norm": 0.12190854549407959, "learning_rate": 0.0005, "loss": 2.1403, "step": 30130 }, { "epoch": 0.11472027892176641, "grad_norm": 0.13718342781066895, "learning_rate": 0.0005, "loss": 2.1497, "step": 30140 }, { "epoch": 0.11475834138988908, "grad_norm": 0.16431915760040283, "learning_rate": 0.0005, "loss": 2.1319, "step": 30150 }, { "epoch": 0.11479640385801176, "grad_norm": 0.11458581686019897, "learning_rate": 0.0005, "loss": 2.1321, "step": 30160 }, { "epoch": 0.11483446632613445, "grad_norm": 0.1225656121969223, "learning_rate": 0.0005, "loss": 2.1487, "step": 30170 }, { "epoch": 0.11487252879425713, "grad_norm": 0.13149769604206085, "learning_rate": 0.0005, "loss": 2.1425, "step": 30180 }, { "epoch": 0.11491059126237982, "grad_norm": 0.1142268106341362, "learning_rate": 0.0005, "loss": 2.1419, "step": 30190 }, { "epoch": 0.1149486537305025, "grad_norm": 0.12473702430725098, "learning_rate": 0.0005, "loss": 2.1367, "step": 30200 }, { "epoch": 0.11498671619862519, "grad_norm": 0.12434647232294083, "learning_rate": 0.0005, "loss": 2.1504, "step": 30210 }, { "epoch": 0.11502477866674787, "grad_norm": 0.12315231561660767, "learning_rate": 0.0005, "loss": 2.145, "step": 30220 }, { "epoch": 0.11506284113487054, "grad_norm": 0.11632184684276581, "learning_rate": 0.0005, "loss": 2.1293, "step": 30230 }, { "epoch": 0.11510090360299323, "grad_norm": 0.1280062347650528, "learning_rate": 0.0005, "loss": 2.1452, "step": 30240 }, { "epoch": 0.11513896607111591, "grad_norm": 0.14986898005008698, "learning_rate": 0.0005, "loss": 2.1284, "step": 30250 }, { "epoch": 0.1151770285392386, "grad_norm": 0.12317207455635071, "learning_rate": 0.0005, "loss": 2.1347, "step": 30260 }, { "epoch": 0.11521509100736128, "grad_norm": 0.12557360529899597, "learning_rate": 0.0005, "loss": 2.1331, "step": 30270 }, { "epoch": 0.11525315347548397, "grad_norm": 0.5975351333618164, "learning_rate": 0.0005, "loss": 2.146, "step": 30280 }, { "epoch": 0.11529121594360665, "grad_norm": 0.12424667179584503, "learning_rate": 0.0005, "loss": 2.1248, "step": 30290 }, { "epoch": 0.11532927841172932, "grad_norm": 0.11362697929143906, "learning_rate": 0.0005, "loss": 2.1295, "step": 30300 }, { "epoch": 0.11536734087985201, "grad_norm": 0.1270321160554886, "learning_rate": 0.0005, "loss": 2.1377, "step": 30310 }, { "epoch": 0.1154054033479747, "grad_norm": 0.12559852004051208, "learning_rate": 0.0005, "loss": 2.1319, "step": 30320 }, { "epoch": 0.11544346581609738, "grad_norm": 0.12761664390563965, "learning_rate": 0.0005, "loss": 2.1351, "step": 30330 }, { "epoch": 0.11548152828422006, "grad_norm": 0.1325635462999344, "learning_rate": 0.0005, "loss": 2.133, "step": 30340 }, { "epoch": 0.11551959075234275, "grad_norm": 0.10914253443479538, "learning_rate": 0.0005, "loss": 2.1281, "step": 30350 }, { "epoch": 0.11555765322046543, "grad_norm": 0.14419271051883698, "learning_rate": 0.0005, "loss": 2.1385, "step": 30360 }, { "epoch": 0.1155957156885881, "grad_norm": 0.12012334167957306, "learning_rate": 0.0005, "loss": 2.1497, "step": 30370 }, { "epoch": 0.11563377815671079, "grad_norm": 0.14006179571151733, "learning_rate": 0.0005, "loss": 2.1419, "step": 30380 }, { "epoch": 0.11567184062483347, "grad_norm": 0.1367906928062439, "learning_rate": 0.0005, "loss": 2.1375, "step": 30390 }, { "epoch": 0.11570990309295616, "grad_norm": 0.11864303797483444, "learning_rate": 0.0005, "loss": 2.1436, "step": 30400 }, { "epoch": 0.11574796556107884, "grad_norm": 0.12833933532238007, "learning_rate": 0.0005, "loss": 2.1392, "step": 30410 }, { "epoch": 0.11578602802920153, "grad_norm": 0.1643187254667282, "learning_rate": 0.0005, "loss": 2.1384, "step": 30420 }, { "epoch": 0.11582409049732421, "grad_norm": 0.12334811687469482, "learning_rate": 0.0005, "loss": 2.1451, "step": 30430 }, { "epoch": 0.11586215296544689, "grad_norm": 0.11577615141868591, "learning_rate": 0.0005, "loss": 2.1401, "step": 30440 }, { "epoch": 0.11590021543356957, "grad_norm": 0.12428303807973862, "learning_rate": 0.0005, "loss": 2.1262, "step": 30450 }, { "epoch": 0.11593827790169225, "grad_norm": 0.12436975538730621, "learning_rate": 0.0005, "loss": 2.1488, "step": 30460 }, { "epoch": 0.11597634036981494, "grad_norm": 0.12434789538383484, "learning_rate": 0.0005, "loss": 2.1303, "step": 30470 }, { "epoch": 0.11601440283793762, "grad_norm": 0.11997382342815399, "learning_rate": 0.0005, "loss": 2.1365, "step": 30480 }, { "epoch": 0.11605246530606031, "grad_norm": 0.1221473217010498, "learning_rate": 0.0005, "loss": 2.1507, "step": 30490 }, { "epoch": 0.116090527774183, "grad_norm": 0.14918829500675201, "learning_rate": 0.0005, "loss": 2.1313, "step": 30500 }, { "epoch": 0.11612859024230567, "grad_norm": 0.1305091381072998, "learning_rate": 0.0005, "loss": 2.1327, "step": 30510 }, { "epoch": 0.11616665271042835, "grad_norm": 0.1277039498090744, "learning_rate": 0.0005, "loss": 2.134, "step": 30520 }, { "epoch": 0.11620471517855104, "grad_norm": 0.11043685674667358, "learning_rate": 0.0005, "loss": 2.134, "step": 30530 }, { "epoch": 0.11624277764667372, "grad_norm": 0.12230376899242401, "learning_rate": 0.0005, "loss": 2.1289, "step": 30540 }, { "epoch": 0.1162808401147964, "grad_norm": 0.11290695518255234, "learning_rate": 0.0005, "loss": 2.1206, "step": 30550 }, { "epoch": 0.11631890258291909, "grad_norm": 0.1329115480184555, "learning_rate": 0.0005, "loss": 2.1391, "step": 30560 }, { "epoch": 0.11635696505104177, "grad_norm": 0.12522819638252258, "learning_rate": 0.0005, "loss": 2.1439, "step": 30570 }, { "epoch": 0.11639502751916446, "grad_norm": 0.1319337785243988, "learning_rate": 0.0005, "loss": 2.1268, "step": 30580 }, { "epoch": 0.11643308998728713, "grad_norm": 0.11786749213933945, "learning_rate": 0.0005, "loss": 2.1358, "step": 30590 }, { "epoch": 0.11647115245540982, "grad_norm": 0.12414077669382095, "learning_rate": 0.0005, "loss": 2.1534, "step": 30600 }, { "epoch": 0.1165092149235325, "grad_norm": 0.12328702211380005, "learning_rate": 0.0005, "loss": 2.1329, "step": 30610 }, { "epoch": 0.11654727739165519, "grad_norm": 0.12226787209510803, "learning_rate": 0.0005, "loss": 2.1219, "step": 30620 }, { "epoch": 0.11658533985977787, "grad_norm": 0.11650876700878143, "learning_rate": 0.0005, "loss": 2.1351, "step": 30630 }, { "epoch": 0.11662340232790055, "grad_norm": 0.12521617114543915, "learning_rate": 0.0005, "loss": 2.1364, "step": 30640 }, { "epoch": 0.11666146479602324, "grad_norm": 0.12409238517284393, "learning_rate": 0.0005, "loss": 2.1337, "step": 30650 }, { "epoch": 0.11669952726414591, "grad_norm": 0.1295614391565323, "learning_rate": 0.0005, "loss": 2.1278, "step": 30660 }, { "epoch": 0.1167375897322686, "grad_norm": 0.14065662026405334, "learning_rate": 0.0005, "loss": 2.1293, "step": 30670 }, { "epoch": 0.11677565220039128, "grad_norm": 0.12377720326185226, "learning_rate": 0.0005, "loss": 2.141, "step": 30680 }, { "epoch": 0.11681371466851397, "grad_norm": 0.1270572394132614, "learning_rate": 0.0005, "loss": 2.1312, "step": 30690 }, { "epoch": 0.11685177713663665, "grad_norm": 0.12031006813049316, "learning_rate": 0.0005, "loss": 2.1412, "step": 30700 }, { "epoch": 0.11688983960475934, "grad_norm": 0.1141844317317009, "learning_rate": 0.0005, "loss": 2.1452, "step": 30710 }, { "epoch": 0.11692790207288202, "grad_norm": 0.11144188046455383, "learning_rate": 0.0005, "loss": 2.1439, "step": 30720 }, { "epoch": 0.11696596454100469, "grad_norm": 0.1195073276758194, "learning_rate": 0.0005, "loss": 2.1481, "step": 30730 }, { "epoch": 0.11700402700912738, "grad_norm": 0.11948345601558685, "learning_rate": 0.0005, "loss": 2.1406, "step": 30740 }, { "epoch": 0.11704208947725006, "grad_norm": 0.11476597934961319, "learning_rate": 0.0005, "loss": 2.133, "step": 30750 }, { "epoch": 0.11708015194537275, "grad_norm": 0.12001378834247589, "learning_rate": 0.0005, "loss": 2.1327, "step": 30760 }, { "epoch": 0.11711821441349543, "grad_norm": 0.13760356605052948, "learning_rate": 0.0005, "loss": 2.1424, "step": 30770 }, { "epoch": 0.11715627688161812, "grad_norm": 0.12049812078475952, "learning_rate": 0.0005, "loss": 2.1283, "step": 30780 }, { "epoch": 0.1171943393497408, "grad_norm": 0.11435163021087646, "learning_rate": 0.0005, "loss": 2.1478, "step": 30790 }, { "epoch": 0.11723240181786347, "grad_norm": 0.1208324208855629, "learning_rate": 0.0005, "loss": 2.1428, "step": 30800 }, { "epoch": 0.11727046428598616, "grad_norm": 0.1224512904882431, "learning_rate": 0.0005, "loss": 2.1458, "step": 30810 }, { "epoch": 0.11730852675410884, "grad_norm": 0.14688503742218018, "learning_rate": 0.0005, "loss": 2.1321, "step": 30820 }, { "epoch": 0.11734658922223153, "grad_norm": 0.12300082296133041, "learning_rate": 0.0005, "loss": 2.132, "step": 30830 }, { "epoch": 0.11738465169035421, "grad_norm": 0.13545553386211395, "learning_rate": 0.0005, "loss": 2.1352, "step": 30840 }, { "epoch": 0.1174227141584769, "grad_norm": 0.13323470950126648, "learning_rate": 0.0005, "loss": 2.1581, "step": 30850 }, { "epoch": 0.11746077662659958, "grad_norm": 0.11637146770954132, "learning_rate": 0.0005, "loss": 2.1389, "step": 30860 }, { "epoch": 0.11749883909472225, "grad_norm": 0.11425112932920456, "learning_rate": 0.0005, "loss": 2.1294, "step": 30870 }, { "epoch": 0.11753690156284494, "grad_norm": 0.11201392859220505, "learning_rate": 0.0005, "loss": 2.1397, "step": 30880 }, { "epoch": 0.11757496403096762, "grad_norm": 0.11493717133998871, "learning_rate": 0.0005, "loss": 2.1281, "step": 30890 }, { "epoch": 0.1176130264990903, "grad_norm": 0.11629050970077515, "learning_rate": 0.0005, "loss": 2.1378, "step": 30900 }, { "epoch": 0.11765108896721299, "grad_norm": 0.1346382051706314, "learning_rate": 0.0005, "loss": 2.1351, "step": 30910 }, { "epoch": 0.11768915143533568, "grad_norm": 0.13523519039154053, "learning_rate": 0.0005, "loss": 2.147, "step": 30920 }, { "epoch": 0.11772721390345836, "grad_norm": 0.13149811327457428, "learning_rate": 0.0005, "loss": 2.1355, "step": 30930 }, { "epoch": 0.11776527637158103, "grad_norm": 0.11538290977478027, "learning_rate": 0.0005, "loss": 2.1297, "step": 30940 }, { "epoch": 0.11780333883970372, "grad_norm": 0.12799760699272156, "learning_rate": 0.0005, "loss": 2.1411, "step": 30950 }, { "epoch": 0.1178414013078264, "grad_norm": 0.11699873208999634, "learning_rate": 0.0005, "loss": 2.1261, "step": 30960 }, { "epoch": 0.11787946377594909, "grad_norm": 0.12116017192602158, "learning_rate": 0.0005, "loss": 2.1478, "step": 30970 }, { "epoch": 0.11791752624407177, "grad_norm": 0.12173012644052505, "learning_rate": 0.0005, "loss": 2.1302, "step": 30980 }, { "epoch": 0.11795558871219446, "grad_norm": 0.12548451125621796, "learning_rate": 0.0005, "loss": 2.1511, "step": 30990 }, { "epoch": 0.11799365118031714, "grad_norm": 0.1358858197927475, "learning_rate": 0.0005, "loss": 2.1437, "step": 31000 }, { "epoch": 0.11803171364843983, "grad_norm": 0.14300884306430817, "learning_rate": 0.0005, "loss": 2.1382, "step": 31010 }, { "epoch": 0.1180697761165625, "grad_norm": 0.14722184836864471, "learning_rate": 0.0005, "loss": 2.151, "step": 31020 }, { "epoch": 0.11810783858468518, "grad_norm": 0.11908479779958725, "learning_rate": 0.0005, "loss": 2.1482, "step": 31030 }, { "epoch": 0.11814590105280787, "grad_norm": 0.11892859637737274, "learning_rate": 0.0005, "loss": 2.1491, "step": 31040 }, { "epoch": 0.11818396352093055, "grad_norm": 0.11230923235416412, "learning_rate": 0.0005, "loss": 2.1308, "step": 31050 }, { "epoch": 0.11822202598905324, "grad_norm": 0.11231476813554764, "learning_rate": 0.0005, "loss": 2.1259, "step": 31060 }, { "epoch": 0.11826008845717592, "grad_norm": 0.13890352845191956, "learning_rate": 0.0005, "loss": 2.14, "step": 31070 }, { "epoch": 0.1182981509252986, "grad_norm": 0.12718895077705383, "learning_rate": 0.0005, "loss": 2.153, "step": 31080 }, { "epoch": 0.11833621339342128, "grad_norm": 0.12791119515895844, "learning_rate": 0.0005, "loss": 2.1404, "step": 31090 }, { "epoch": 0.11837427586154396, "grad_norm": 0.11950281262397766, "learning_rate": 0.0005, "loss": 2.1282, "step": 31100 }, { "epoch": 0.11841233832966665, "grad_norm": 0.12373452633619308, "learning_rate": 0.0005, "loss": 2.1248, "step": 31110 }, { "epoch": 0.11845040079778933, "grad_norm": 0.11845168471336365, "learning_rate": 0.0005, "loss": 2.1334, "step": 31120 }, { "epoch": 0.11848846326591202, "grad_norm": 0.12394628673791885, "learning_rate": 0.0005, "loss": 2.1321, "step": 31130 }, { "epoch": 0.1185265257340347, "grad_norm": 0.13420367240905762, "learning_rate": 0.0005, "loss": 2.1396, "step": 31140 }, { "epoch": 0.11856458820215739, "grad_norm": 0.12310107797384262, "learning_rate": 0.0005, "loss": 2.121, "step": 31150 }, { "epoch": 0.11860265067028006, "grad_norm": 0.11265326291322708, "learning_rate": 0.0005, "loss": 2.1267, "step": 31160 }, { "epoch": 0.11864071313840274, "grad_norm": 0.11336939036846161, "learning_rate": 0.0005, "loss": 2.1249, "step": 31170 }, { "epoch": 0.11867877560652543, "grad_norm": 0.12562648952007294, "learning_rate": 0.0005, "loss": 2.1405, "step": 31180 }, { "epoch": 0.11871683807464811, "grad_norm": 0.12459778040647507, "learning_rate": 0.0005, "loss": 2.134, "step": 31190 }, { "epoch": 0.1187549005427708, "grad_norm": 0.1319107562303543, "learning_rate": 0.0005, "loss": 2.1517, "step": 31200 }, { "epoch": 0.11879296301089348, "grad_norm": 0.1265908181667328, "learning_rate": 0.0005, "loss": 2.1301, "step": 31210 }, { "epoch": 0.11883102547901617, "grad_norm": 0.13179562985897064, "learning_rate": 0.0005, "loss": 2.1436, "step": 31220 }, { "epoch": 0.11886908794713884, "grad_norm": 0.12190480530261993, "learning_rate": 0.0005, "loss": 2.1363, "step": 31230 }, { "epoch": 0.11890715041526152, "grad_norm": 0.12243692576885223, "learning_rate": 0.0005, "loss": 2.1315, "step": 31240 }, { "epoch": 0.11894521288338421, "grad_norm": 0.13950267434120178, "learning_rate": 0.0005, "loss": 2.1311, "step": 31250 }, { "epoch": 0.11898327535150689, "grad_norm": 0.12174614518880844, "learning_rate": 0.0005, "loss": 2.1193, "step": 31260 }, { "epoch": 0.11902133781962958, "grad_norm": 0.12926694750785828, "learning_rate": 0.0005, "loss": 2.1441, "step": 31270 }, { "epoch": 0.11905940028775226, "grad_norm": 0.12084470689296722, "learning_rate": 0.0005, "loss": 2.1145, "step": 31280 }, { "epoch": 0.11909746275587495, "grad_norm": 0.13466468453407288, "learning_rate": 0.0005, "loss": 2.1359, "step": 31290 }, { "epoch": 0.11913552522399762, "grad_norm": 0.12772592902183533, "learning_rate": 0.0005, "loss": 2.1276, "step": 31300 }, { "epoch": 0.1191735876921203, "grad_norm": 0.11330266296863556, "learning_rate": 0.0005, "loss": 2.1456, "step": 31310 }, { "epoch": 0.11921165016024299, "grad_norm": 0.12213649600744247, "learning_rate": 0.0005, "loss": 2.1512, "step": 31320 }, { "epoch": 0.11924971262836567, "grad_norm": 0.134195476770401, "learning_rate": 0.0005, "loss": 2.1274, "step": 31330 }, { "epoch": 0.11928777509648836, "grad_norm": 0.12048410624265671, "learning_rate": 0.0005, "loss": 2.1396, "step": 31340 }, { "epoch": 0.11932583756461104, "grad_norm": 0.136560320854187, "learning_rate": 0.0005, "loss": 2.1438, "step": 31350 }, { "epoch": 0.11936390003273373, "grad_norm": 0.117899589240551, "learning_rate": 0.0005, "loss": 2.1323, "step": 31360 }, { "epoch": 0.11940196250085641, "grad_norm": 0.13653498888015747, "learning_rate": 0.0005, "loss": 2.1441, "step": 31370 }, { "epoch": 0.11944002496897908, "grad_norm": 0.13502705097198486, "learning_rate": 0.0005, "loss": 2.1398, "step": 31380 }, { "epoch": 0.11947808743710177, "grad_norm": 0.11919333785772324, "learning_rate": 0.0005, "loss": 2.162, "step": 31390 }, { "epoch": 0.11951614990522445, "grad_norm": 0.13186244666576385, "learning_rate": 0.0005, "loss": 2.1608, "step": 31400 }, { "epoch": 0.11955421237334714, "grad_norm": 0.11902909725904465, "learning_rate": 0.0005, "loss": 2.1464, "step": 31410 }, { "epoch": 0.11959227484146982, "grad_norm": 0.11676482111215591, "learning_rate": 0.0005, "loss": 2.1375, "step": 31420 }, { "epoch": 0.11963033730959251, "grad_norm": 0.13875927031040192, "learning_rate": 0.0005, "loss": 2.1342, "step": 31430 }, { "epoch": 0.11966839977771519, "grad_norm": 0.1109924167394638, "learning_rate": 0.0005, "loss": 2.1137, "step": 31440 }, { "epoch": 0.11970646224583786, "grad_norm": 0.1171596497297287, "learning_rate": 0.0005, "loss": 2.1307, "step": 31450 }, { "epoch": 0.11974452471396055, "grad_norm": 0.14218342304229736, "learning_rate": 0.0005, "loss": 2.1317, "step": 31460 }, { "epoch": 0.11978258718208323, "grad_norm": 0.1319875717163086, "learning_rate": 0.0005, "loss": 2.1434, "step": 31470 }, { "epoch": 0.11982064965020592, "grad_norm": 0.12911342084407806, "learning_rate": 0.0005, "loss": 2.1324, "step": 31480 }, { "epoch": 0.1198587121183286, "grad_norm": 0.12023784965276718, "learning_rate": 0.0005, "loss": 2.143, "step": 31490 }, { "epoch": 0.11989677458645129, "grad_norm": 0.12053267657756805, "learning_rate": 0.0005, "loss": 2.1404, "step": 31500 }, { "epoch": 0.11993483705457397, "grad_norm": 0.11411106586456299, "learning_rate": 0.0005, "loss": 2.1391, "step": 31510 }, { "epoch": 0.11997289952269664, "grad_norm": 0.11728163808584213, "learning_rate": 0.0005, "loss": 2.1383, "step": 31520 }, { "epoch": 0.12001096199081933, "grad_norm": 0.12292353808879852, "learning_rate": 0.0005, "loss": 2.1365, "step": 31530 }, { "epoch": 0.12004902445894201, "grad_norm": 0.12793846428394318, "learning_rate": 0.0005, "loss": 2.1231, "step": 31540 }, { "epoch": 0.1200870869270647, "grad_norm": 0.11590106785297394, "learning_rate": 0.0005, "loss": 2.1452, "step": 31550 }, { "epoch": 0.12012514939518738, "grad_norm": 0.12416353076696396, "learning_rate": 0.0005, "loss": 2.131, "step": 31560 }, { "epoch": 0.12016321186331007, "grad_norm": 0.12791256606578827, "learning_rate": 0.0005, "loss": 2.1467, "step": 31570 }, { "epoch": 0.12020127433143275, "grad_norm": 0.12399394810199738, "learning_rate": 0.0005, "loss": 2.1246, "step": 31580 }, { "epoch": 0.12023933679955542, "grad_norm": 0.11579488962888718, "learning_rate": 0.0005, "loss": 2.1387, "step": 31590 }, { "epoch": 0.12027739926767811, "grad_norm": 0.1366676241159439, "learning_rate": 0.0005, "loss": 2.1391, "step": 31600 }, { "epoch": 0.1203154617358008, "grad_norm": 0.13556742668151855, "learning_rate": 0.0005, "loss": 2.1213, "step": 31610 }, { "epoch": 0.12035352420392348, "grad_norm": 0.11851835995912552, "learning_rate": 0.0005, "loss": 2.1409, "step": 31620 }, { "epoch": 0.12039158667204616, "grad_norm": 0.11656619608402252, "learning_rate": 0.0005, "loss": 2.1427, "step": 31630 }, { "epoch": 0.12042964914016885, "grad_norm": 0.13701976835727692, "learning_rate": 0.0005, "loss": 2.1229, "step": 31640 }, { "epoch": 0.12046771160829153, "grad_norm": 0.13046278059482574, "learning_rate": 0.0005, "loss": 2.1245, "step": 31650 }, { "epoch": 0.1205057740764142, "grad_norm": 0.12305255234241486, "learning_rate": 0.0005, "loss": 2.1509, "step": 31660 }, { "epoch": 0.12054383654453689, "grad_norm": 0.12715394794940948, "learning_rate": 0.0005, "loss": 2.1482, "step": 31670 }, { "epoch": 0.12058189901265957, "grad_norm": 0.11681829392910004, "learning_rate": 0.0005, "loss": 2.1271, "step": 31680 }, { "epoch": 0.12061996148078226, "grad_norm": 0.11660370230674744, "learning_rate": 0.0005, "loss": 2.14, "step": 31690 }, { "epoch": 0.12065802394890494, "grad_norm": 0.1314779371023178, "learning_rate": 0.0005, "loss": 2.1375, "step": 31700 }, { "epoch": 0.12069608641702763, "grad_norm": 0.13059192895889282, "learning_rate": 0.0005, "loss": 2.1455, "step": 31710 }, { "epoch": 0.12073414888515031, "grad_norm": 0.14255261421203613, "learning_rate": 0.0005, "loss": 2.1473, "step": 31720 }, { "epoch": 0.120772211353273, "grad_norm": 0.12946201860904694, "learning_rate": 0.0005, "loss": 2.1345, "step": 31730 }, { "epoch": 0.12081027382139567, "grad_norm": 0.12346374243497849, "learning_rate": 0.0005, "loss": 2.1467, "step": 31740 }, { "epoch": 0.12084833628951835, "grad_norm": 0.12624172866344452, "learning_rate": 0.0005, "loss": 2.1395, "step": 31750 }, { "epoch": 0.12088639875764104, "grad_norm": 0.11160004884004593, "learning_rate": 0.0005, "loss": 2.1215, "step": 31760 }, { "epoch": 0.12092446122576372, "grad_norm": 0.12799963355064392, "learning_rate": 0.0005, "loss": 2.142, "step": 31770 }, { "epoch": 0.12096252369388641, "grad_norm": 0.11344436556100845, "learning_rate": 0.0005, "loss": 2.143, "step": 31780 }, { "epoch": 0.1210005861620091, "grad_norm": 0.12890136241912842, "learning_rate": 0.0005, "loss": 2.1543, "step": 31790 }, { "epoch": 0.12103864863013178, "grad_norm": 0.11942970007658005, "learning_rate": 0.0005, "loss": 2.1357, "step": 31800 }, { "epoch": 0.12107671109825445, "grad_norm": 0.12209248542785645, "learning_rate": 0.0005, "loss": 2.1236, "step": 31810 }, { "epoch": 0.12111477356637713, "grad_norm": 0.123374804854393, "learning_rate": 0.0005, "loss": 2.1271, "step": 31820 }, { "epoch": 0.12115283603449982, "grad_norm": 0.11081527173519135, "learning_rate": 0.0005, "loss": 2.1357, "step": 31830 }, { "epoch": 0.1211908985026225, "grad_norm": 0.11383873969316483, "learning_rate": 0.0005, "loss": 2.1205, "step": 31840 }, { "epoch": 0.12122896097074519, "grad_norm": 0.12558364868164062, "learning_rate": 0.0005, "loss": 2.1396, "step": 31850 }, { "epoch": 0.12126702343886787, "grad_norm": 0.12574754655361176, "learning_rate": 0.0005, "loss": 2.1425, "step": 31860 }, { "epoch": 0.12130508590699056, "grad_norm": 0.12244436889886856, "learning_rate": 0.0005, "loss": 2.1515, "step": 31870 }, { "epoch": 0.12134314837511323, "grad_norm": 0.13229981064796448, "learning_rate": 0.0005, "loss": 2.1385, "step": 31880 }, { "epoch": 0.12138121084323591, "grad_norm": 0.11815565079450607, "learning_rate": 0.0005, "loss": 2.1324, "step": 31890 }, { "epoch": 0.1214192733113586, "grad_norm": 0.13423267006874084, "learning_rate": 0.0005, "loss": 2.1389, "step": 31900 }, { "epoch": 0.12145733577948128, "grad_norm": 0.11861734837293625, "learning_rate": 0.0005, "loss": 2.1381, "step": 31910 }, { "epoch": 0.12149539824760397, "grad_norm": 0.1253175586462021, "learning_rate": 0.0005, "loss": 2.1287, "step": 31920 }, { "epoch": 0.12153346071572665, "grad_norm": 0.124781534075737, "learning_rate": 0.0005, "loss": 2.1271, "step": 31930 }, { "epoch": 0.12157152318384934, "grad_norm": 0.11508560925722122, "learning_rate": 0.0005, "loss": 2.1399, "step": 31940 }, { "epoch": 0.12160958565197201, "grad_norm": 0.1265585869550705, "learning_rate": 0.0005, "loss": 2.1472, "step": 31950 }, { "epoch": 0.1216476481200947, "grad_norm": 0.13155826926231384, "learning_rate": 0.0005, "loss": 2.1366, "step": 31960 }, { "epoch": 0.12168571058821738, "grad_norm": 0.1338793784379959, "learning_rate": 0.0005, "loss": 2.1422, "step": 31970 }, { "epoch": 0.12172377305634006, "grad_norm": 0.1322241872549057, "learning_rate": 0.0005, "loss": 2.1446, "step": 31980 }, { "epoch": 0.12176183552446275, "grad_norm": 0.11637207865715027, "learning_rate": 0.0005, "loss": 2.131, "step": 31990 }, { "epoch": 0.12179989799258543, "grad_norm": 0.12174341827630997, "learning_rate": 0.0005, "loss": 2.1371, "step": 32000 }, { "epoch": 0.12183796046070812, "grad_norm": 0.12612277269363403, "learning_rate": 0.0005, "loss": 2.1298, "step": 32010 }, { "epoch": 0.12187602292883079, "grad_norm": 0.11656036972999573, "learning_rate": 0.0005, "loss": 2.1416, "step": 32020 }, { "epoch": 0.12191408539695348, "grad_norm": 0.12601672112941742, "learning_rate": 0.0005, "loss": 2.1478, "step": 32030 }, { "epoch": 0.12195214786507616, "grad_norm": 0.11768614500761032, "learning_rate": 0.0005, "loss": 2.1496, "step": 32040 }, { "epoch": 0.12199021033319885, "grad_norm": 0.11926057189702988, "learning_rate": 0.0005, "loss": 2.1432, "step": 32050 }, { "epoch": 0.12202827280132153, "grad_norm": 0.11361296474933624, "learning_rate": 0.0005, "loss": 2.1241, "step": 32060 }, { "epoch": 0.12206633526944421, "grad_norm": 0.1238214448094368, "learning_rate": 0.0005, "loss": 2.1309, "step": 32070 }, { "epoch": 0.1221043977375669, "grad_norm": 0.13550199568271637, "learning_rate": 0.0005, "loss": 2.1269, "step": 32080 }, { "epoch": 0.12214246020568957, "grad_norm": 0.13755367696285248, "learning_rate": 0.0005, "loss": 2.1257, "step": 32090 }, { "epoch": 0.12218052267381226, "grad_norm": 0.12826938927173615, "learning_rate": 0.0005, "loss": 2.1359, "step": 32100 }, { "epoch": 0.12221858514193494, "grad_norm": 0.12358184158802032, "learning_rate": 0.0005, "loss": 2.1611, "step": 32110 }, { "epoch": 0.12225664761005763, "grad_norm": 0.12005414068698883, "learning_rate": 0.0005, "loss": 2.1259, "step": 32120 }, { "epoch": 0.12229471007818031, "grad_norm": 0.11488837748765945, "learning_rate": 0.0005, "loss": 2.1323, "step": 32130 }, { "epoch": 0.122332772546303, "grad_norm": 0.1216205582022667, "learning_rate": 0.0005, "loss": 2.1323, "step": 32140 }, { "epoch": 0.12237083501442568, "grad_norm": 0.11891251802444458, "learning_rate": 0.0005, "loss": 2.1469, "step": 32150 }, { "epoch": 0.12240889748254836, "grad_norm": 0.13558711111545563, "learning_rate": 0.0005, "loss": 2.1469, "step": 32160 }, { "epoch": 0.12244695995067104, "grad_norm": 0.11588139086961746, "learning_rate": 0.0005, "loss": 2.1209, "step": 32170 }, { "epoch": 0.12248502241879372, "grad_norm": 0.11844465136528015, "learning_rate": 0.0005, "loss": 2.1323, "step": 32180 }, { "epoch": 0.1225230848869164, "grad_norm": 0.11257705837488174, "learning_rate": 0.0005, "loss": 2.1338, "step": 32190 }, { "epoch": 0.12256114735503909, "grad_norm": 0.13820578157901764, "learning_rate": 0.0005, "loss": 2.1485, "step": 32200 }, { "epoch": 0.12259920982316178, "grad_norm": 0.122981958091259, "learning_rate": 0.0005, "loss": 2.1214, "step": 32210 }, { "epoch": 0.12263727229128446, "grad_norm": 0.1278199404478073, "learning_rate": 0.0005, "loss": 2.1315, "step": 32220 }, { "epoch": 0.12267533475940715, "grad_norm": 0.13852286338806152, "learning_rate": 0.0005, "loss": 2.1404, "step": 32230 }, { "epoch": 0.12271339722752982, "grad_norm": 0.11759456247091293, "learning_rate": 0.0005, "loss": 2.1482, "step": 32240 }, { "epoch": 0.1227514596956525, "grad_norm": 0.12406472116708755, "learning_rate": 0.0005, "loss": 2.1353, "step": 32250 }, { "epoch": 0.12278952216377519, "grad_norm": 0.11742253601551056, "learning_rate": 0.0005, "loss": 2.1172, "step": 32260 }, { "epoch": 0.12282758463189787, "grad_norm": 0.13278815150260925, "learning_rate": 0.0005, "loss": 2.1365, "step": 32270 }, { "epoch": 0.12286564710002056, "grad_norm": 0.12992510199546814, "learning_rate": 0.0005, "loss": 2.1513, "step": 32280 }, { "epoch": 0.12290370956814324, "grad_norm": 0.13639768958091736, "learning_rate": 0.0005, "loss": 2.132, "step": 32290 }, { "epoch": 0.12294177203626593, "grad_norm": 0.12147095799446106, "learning_rate": 0.0005, "loss": 2.1497, "step": 32300 }, { "epoch": 0.1229798345043886, "grad_norm": 0.13889503479003906, "learning_rate": 0.0005, "loss": 2.137, "step": 32310 }, { "epoch": 0.12301789697251128, "grad_norm": 0.1264171451330185, "learning_rate": 0.0005, "loss": 2.1318, "step": 32320 }, { "epoch": 0.12305595944063397, "grad_norm": 0.12271172553300858, "learning_rate": 0.0005, "loss": 2.1353, "step": 32330 }, { "epoch": 0.12309402190875665, "grad_norm": 0.13769884407520294, "learning_rate": 0.0005, "loss": 2.1432, "step": 32340 }, { "epoch": 0.12313208437687934, "grad_norm": 0.11881277710199356, "learning_rate": 0.0005, "loss": 2.1468, "step": 32350 }, { "epoch": 0.12317014684500202, "grad_norm": 0.13204024732112885, "learning_rate": 0.0005, "loss": 2.1451, "step": 32360 }, { "epoch": 0.1232082093131247, "grad_norm": 0.13416194915771484, "learning_rate": 0.0005, "loss": 2.1362, "step": 32370 }, { "epoch": 0.12324627178124738, "grad_norm": 0.12615594267845154, "learning_rate": 0.0005, "loss": 2.1432, "step": 32380 }, { "epoch": 0.12328433424937006, "grad_norm": 0.1237650066614151, "learning_rate": 0.0005, "loss": 2.1326, "step": 32390 }, { "epoch": 0.12332239671749275, "grad_norm": 0.12454275041818619, "learning_rate": 0.0005, "loss": 2.142, "step": 32400 }, { "epoch": 0.12336045918561543, "grad_norm": 0.12052928656339645, "learning_rate": 0.0005, "loss": 2.1533, "step": 32410 }, { "epoch": 0.12339852165373812, "grad_norm": 0.10830031335353851, "learning_rate": 0.0005, "loss": 2.1434, "step": 32420 }, { "epoch": 0.1234365841218608, "grad_norm": 0.1310432404279709, "learning_rate": 0.0005, "loss": 2.1323, "step": 32430 }, { "epoch": 0.12347464658998349, "grad_norm": 0.11813438683748245, "learning_rate": 0.0005, "loss": 2.1468, "step": 32440 }, { "epoch": 0.12351270905810616, "grad_norm": 0.13674898445606232, "learning_rate": 0.0005, "loss": 2.1298, "step": 32450 }, { "epoch": 0.12355077152622884, "grad_norm": 0.14288246631622314, "learning_rate": 0.0005, "loss": 2.1368, "step": 32460 }, { "epoch": 0.12358883399435153, "grad_norm": 0.12398666888475418, "learning_rate": 0.0005, "loss": 2.1359, "step": 32470 }, { "epoch": 0.12362689646247421, "grad_norm": 0.11381809413433075, "learning_rate": 0.0005, "loss": 2.1459, "step": 32480 }, { "epoch": 0.1236649589305969, "grad_norm": 0.12265384197235107, "learning_rate": 0.0005, "loss": 2.1357, "step": 32490 }, { "epoch": 0.12370302139871958, "grad_norm": 0.138731449842453, "learning_rate": 0.0005, "loss": 2.1459, "step": 32500 }, { "epoch": 0.12374108386684227, "grad_norm": 0.13886143267154694, "learning_rate": 0.0005, "loss": 2.146, "step": 32510 }, { "epoch": 0.12377914633496495, "grad_norm": 0.11149519681930542, "learning_rate": 0.0005, "loss": 2.1264, "step": 32520 }, { "epoch": 0.12381720880308762, "grad_norm": 0.11746298521757126, "learning_rate": 0.0005, "loss": 2.133, "step": 32530 }, { "epoch": 0.12385527127121031, "grad_norm": 0.13325706124305725, "learning_rate": 0.0005, "loss": 2.1412, "step": 32540 }, { "epoch": 0.12389333373933299, "grad_norm": 0.13062238693237305, "learning_rate": 0.0005, "loss": 2.1447, "step": 32550 }, { "epoch": 0.12393139620745568, "grad_norm": 0.11524543911218643, "learning_rate": 0.0005, "loss": 2.1481, "step": 32560 }, { "epoch": 0.12396945867557836, "grad_norm": 0.11860724538564682, "learning_rate": 0.0005, "loss": 2.1299, "step": 32570 }, { "epoch": 0.12400752114370105, "grad_norm": 0.11665958166122437, "learning_rate": 0.0005, "loss": 2.1297, "step": 32580 }, { "epoch": 0.12404558361182373, "grad_norm": 0.1318943053483963, "learning_rate": 0.0005, "loss": 2.1498, "step": 32590 }, { "epoch": 0.1240836460799464, "grad_norm": 0.12036443501710892, "learning_rate": 0.0005, "loss": 2.1477, "step": 32600 }, { "epoch": 0.12412170854806909, "grad_norm": 0.12892819941043854, "learning_rate": 0.0005, "loss": 2.1522, "step": 32610 }, { "epoch": 0.12415977101619177, "grad_norm": 0.137188121676445, "learning_rate": 0.0005, "loss": 2.1425, "step": 32620 }, { "epoch": 0.12419783348431446, "grad_norm": 0.12776799499988556, "learning_rate": 0.0005, "loss": 2.1237, "step": 32630 }, { "epoch": 0.12423589595243714, "grad_norm": 0.1168050542473793, "learning_rate": 0.0005, "loss": 2.1382, "step": 32640 }, { "epoch": 0.12427395842055983, "grad_norm": 0.12604869902133942, "learning_rate": 0.0005, "loss": 2.1284, "step": 32650 }, { "epoch": 0.12431202088868251, "grad_norm": 0.12291269749403, "learning_rate": 0.0005, "loss": 2.1559, "step": 32660 }, { "epoch": 0.12435008335680518, "grad_norm": 0.11891001462936401, "learning_rate": 0.0005, "loss": 2.127, "step": 32670 }, { "epoch": 0.12438814582492787, "grad_norm": 0.1177201047539711, "learning_rate": 0.0005, "loss": 2.1377, "step": 32680 }, { "epoch": 0.12442620829305055, "grad_norm": 0.12202750146389008, "learning_rate": 0.0005, "loss": 2.1401, "step": 32690 }, { "epoch": 0.12446427076117324, "grad_norm": 0.12012209743261337, "learning_rate": 0.0005, "loss": 2.1286, "step": 32700 }, { "epoch": 0.12450233322929592, "grad_norm": 0.13094893097877502, "learning_rate": 0.0005, "loss": 2.1336, "step": 32710 }, { "epoch": 0.12454039569741861, "grad_norm": 0.12597648799419403, "learning_rate": 0.0005, "loss": 2.1406, "step": 32720 }, { "epoch": 0.12457845816554129, "grad_norm": 0.13822618126869202, "learning_rate": 0.0005, "loss": 2.1376, "step": 32730 }, { "epoch": 0.12461652063366396, "grad_norm": 0.1339564472436905, "learning_rate": 0.0005, "loss": 2.1385, "step": 32740 }, { "epoch": 0.12465458310178665, "grad_norm": 0.1378975808620453, "learning_rate": 0.0005, "loss": 2.1413, "step": 32750 }, { "epoch": 0.12469264556990933, "grad_norm": 0.11569910496473312, "learning_rate": 0.0005, "loss": 2.1319, "step": 32760 }, { "epoch": 0.12473070803803202, "grad_norm": 0.11427119374275208, "learning_rate": 0.0005, "loss": 2.1432, "step": 32770 }, { "epoch": 0.1247687705061547, "grad_norm": 0.12961478531360626, "learning_rate": 0.0005, "loss": 2.1298, "step": 32780 }, { "epoch": 0.12480683297427739, "grad_norm": 1.0075163841247559, "learning_rate": 0.0005, "loss": 2.1169, "step": 32790 }, { "epoch": 0.12484489544240007, "grad_norm": 0.11409968882799149, "learning_rate": 0.0005, "loss": 2.1596, "step": 32800 }, { "epoch": 0.12488295791052274, "grad_norm": 0.1265895962715149, "learning_rate": 0.0005, "loss": 2.1378, "step": 32810 }, { "epoch": 0.12492102037864543, "grad_norm": 0.11860281974077225, "learning_rate": 0.0005, "loss": 2.1382, "step": 32820 }, { "epoch": 0.12495908284676811, "grad_norm": 0.12064908444881439, "learning_rate": 0.0005, "loss": 2.1422, "step": 32830 }, { "epoch": 0.1249971453148908, "grad_norm": 0.1199292466044426, "learning_rate": 0.0005, "loss": 2.14, "step": 32840 }, { "epoch": 0.12503520778301347, "grad_norm": 0.1281987875699997, "learning_rate": 0.0005, "loss": 2.1332, "step": 32850 }, { "epoch": 0.12507327025113615, "grad_norm": 0.12015961110591888, "learning_rate": 0.0005, "loss": 2.1333, "step": 32860 }, { "epoch": 0.12511133271925884, "grad_norm": 0.11467165499925613, "learning_rate": 0.0005, "loss": 2.1328, "step": 32870 }, { "epoch": 0.12514939518738152, "grad_norm": 0.13084529340267181, "learning_rate": 0.0005, "loss": 2.1335, "step": 32880 }, { "epoch": 0.1251874576555042, "grad_norm": 0.1264481097459793, "learning_rate": 0.0005, "loss": 2.1523, "step": 32890 }, { "epoch": 0.1252255201236269, "grad_norm": 0.11985542625188828, "learning_rate": 0.0005, "loss": 2.1281, "step": 32900 }, { "epoch": 0.12526358259174958, "grad_norm": 0.12975460290908813, "learning_rate": 0.0005, "loss": 2.1455, "step": 32910 }, { "epoch": 0.12530164505987226, "grad_norm": 0.11748325824737549, "learning_rate": 0.0005, "loss": 2.1267, "step": 32920 }, { "epoch": 0.12533970752799495, "grad_norm": 0.12457460910081863, "learning_rate": 0.0005, "loss": 2.1356, "step": 32930 }, { "epoch": 0.12537776999611763, "grad_norm": 0.1297825276851654, "learning_rate": 0.0005, "loss": 2.1302, "step": 32940 }, { "epoch": 0.12541583246424032, "grad_norm": 0.14112438261508942, "learning_rate": 0.0005, "loss": 2.1341, "step": 32950 }, { "epoch": 0.125453894932363, "grad_norm": 0.12287167459726334, "learning_rate": 0.0005, "loss": 2.1231, "step": 32960 }, { "epoch": 0.1254919574004857, "grad_norm": 0.12792804837226868, "learning_rate": 0.0005, "loss": 2.1254, "step": 32970 }, { "epoch": 0.12553001986860837, "grad_norm": 0.11716022342443466, "learning_rate": 0.0005, "loss": 2.147, "step": 32980 }, { "epoch": 0.12556808233673103, "grad_norm": 0.1469336301088333, "learning_rate": 0.0005, "loss": 2.1487, "step": 32990 }, { "epoch": 0.12560614480485371, "grad_norm": 0.1205238625407219, "learning_rate": 0.0005, "loss": 2.1484, "step": 33000 }, { "epoch": 0.1256442072729764, "grad_norm": 0.11697216331958771, "learning_rate": 0.0005, "loss": 2.1487, "step": 33010 }, { "epoch": 0.12568226974109908, "grad_norm": 0.10925181210041046, "learning_rate": 0.0005, "loss": 2.1405, "step": 33020 }, { "epoch": 0.12572033220922177, "grad_norm": 0.1148732453584671, "learning_rate": 0.0005, "loss": 2.1405, "step": 33030 }, { "epoch": 0.12575839467734445, "grad_norm": 0.1138172373175621, "learning_rate": 0.0005, "loss": 2.1306, "step": 33040 }, { "epoch": 0.12579645714546714, "grad_norm": 0.12518808245658875, "learning_rate": 0.0005, "loss": 2.1431, "step": 33050 }, { "epoch": 0.12583451961358982, "grad_norm": 0.12803898751735687, "learning_rate": 0.0005, "loss": 2.144, "step": 33060 }, { "epoch": 0.1258725820817125, "grad_norm": 0.1226850152015686, "learning_rate": 0.0005, "loss": 2.1458, "step": 33070 }, { "epoch": 0.1259106445498352, "grad_norm": 0.1251627802848816, "learning_rate": 0.0005, "loss": 2.1396, "step": 33080 }, { "epoch": 0.12594870701795788, "grad_norm": 0.12983007729053497, "learning_rate": 0.0005, "loss": 2.1472, "step": 33090 }, { "epoch": 0.12598676948608056, "grad_norm": 0.11952579766511917, "learning_rate": 0.0005, "loss": 2.1532, "step": 33100 }, { "epoch": 0.12602483195420325, "grad_norm": 0.12259417027235031, "learning_rate": 0.0005, "loss": 2.1388, "step": 33110 }, { "epoch": 0.12606289442232593, "grad_norm": 0.12636563181877136, "learning_rate": 0.0005, "loss": 2.148, "step": 33120 }, { "epoch": 0.12610095689044862, "grad_norm": 0.1269903928041458, "learning_rate": 0.0005, "loss": 2.1282, "step": 33130 }, { "epoch": 0.12613901935857127, "grad_norm": 0.13562357425689697, "learning_rate": 0.0005, "loss": 2.145, "step": 33140 }, { "epoch": 0.12617708182669396, "grad_norm": 0.11688810586929321, "learning_rate": 0.0005, "loss": 2.1492, "step": 33150 }, { "epoch": 0.12621514429481664, "grad_norm": 0.11836229264736176, "learning_rate": 0.0005, "loss": 2.1175, "step": 33160 }, { "epoch": 0.12625320676293933, "grad_norm": 0.12402665615081787, "learning_rate": 0.0005, "loss": 2.1325, "step": 33170 }, { "epoch": 0.12629126923106201, "grad_norm": 0.13359686732292175, "learning_rate": 0.0005, "loss": 2.1393, "step": 33180 }, { "epoch": 0.1263293316991847, "grad_norm": 0.21467049419879913, "learning_rate": 0.0005, "loss": 2.1373, "step": 33190 }, { "epoch": 0.12636739416730738, "grad_norm": 0.1315467804670334, "learning_rate": 0.0005, "loss": 2.1242, "step": 33200 }, { "epoch": 0.12640545663543007, "grad_norm": 0.1368442177772522, "learning_rate": 0.0005, "loss": 2.1337, "step": 33210 }, { "epoch": 0.12644351910355275, "grad_norm": 0.1287236213684082, "learning_rate": 0.0005, "loss": 2.1376, "step": 33220 }, { "epoch": 0.12648158157167544, "grad_norm": 0.12770278751850128, "learning_rate": 0.0005, "loss": 2.1317, "step": 33230 }, { "epoch": 0.12651964403979812, "grad_norm": 0.13059884309768677, "learning_rate": 0.0005, "loss": 2.141, "step": 33240 }, { "epoch": 0.1265577065079208, "grad_norm": 0.12187020480632782, "learning_rate": 0.0005, "loss": 2.144, "step": 33250 }, { "epoch": 0.1265957689760435, "grad_norm": 0.12663011252880096, "learning_rate": 0.0005, "loss": 2.1469, "step": 33260 }, { "epoch": 0.12663383144416618, "grad_norm": 0.13077551126480103, "learning_rate": 0.0005, "loss": 2.1476, "step": 33270 }, { "epoch": 0.12667189391228884, "grad_norm": 0.12449769675731659, "learning_rate": 0.0005, "loss": 2.1333, "step": 33280 }, { "epoch": 0.12670995638041152, "grad_norm": 0.12197001278400421, "learning_rate": 0.0005, "loss": 2.1361, "step": 33290 }, { "epoch": 0.1267480188485342, "grad_norm": 0.11997847259044647, "learning_rate": 0.0005, "loss": 2.1328, "step": 33300 }, { "epoch": 0.1267860813166569, "grad_norm": 0.11869657784700394, "learning_rate": 0.0005, "loss": 2.138, "step": 33310 }, { "epoch": 0.12682414378477957, "grad_norm": 0.14480037987232208, "learning_rate": 0.0005, "loss": 2.133, "step": 33320 }, { "epoch": 0.12686220625290226, "grad_norm": 0.1121041402220726, "learning_rate": 0.0005, "loss": 2.1424, "step": 33330 }, { "epoch": 0.12690026872102494, "grad_norm": 0.11679790169000626, "learning_rate": 0.0005, "loss": 2.1371, "step": 33340 }, { "epoch": 0.12693833118914763, "grad_norm": 0.11968211829662323, "learning_rate": 0.0005, "loss": 2.1415, "step": 33350 }, { "epoch": 0.12697639365727031, "grad_norm": 0.1225665882229805, "learning_rate": 0.0005, "loss": 2.1449, "step": 33360 }, { "epoch": 0.127014456125393, "grad_norm": 0.13022850453853607, "learning_rate": 0.0005, "loss": 2.141, "step": 33370 }, { "epoch": 0.12705251859351568, "grad_norm": 0.11416057497262955, "learning_rate": 0.0005, "loss": 2.1316, "step": 33380 }, { "epoch": 0.12709058106163837, "grad_norm": 0.1268990933895111, "learning_rate": 0.0005, "loss": 2.1392, "step": 33390 }, { "epoch": 0.12712864352976105, "grad_norm": 0.11592286080121994, "learning_rate": 0.0005, "loss": 2.1361, "step": 33400 }, { "epoch": 0.12716670599788374, "grad_norm": 0.12762770056724548, "learning_rate": 0.0005, "loss": 2.1455, "step": 33410 }, { "epoch": 0.1272047684660064, "grad_norm": 0.12282517552375793, "learning_rate": 0.0005, "loss": 2.1238, "step": 33420 }, { "epoch": 0.12724283093412908, "grad_norm": 0.1247594878077507, "learning_rate": 0.0005, "loss": 2.1337, "step": 33430 }, { "epoch": 0.12728089340225177, "grad_norm": 0.11615666002035141, "learning_rate": 0.0005, "loss": 2.1137, "step": 33440 }, { "epoch": 0.12731895587037445, "grad_norm": 0.12424363940954208, "learning_rate": 0.0005, "loss": 2.1394, "step": 33450 }, { "epoch": 0.12735701833849714, "grad_norm": 0.1269409954547882, "learning_rate": 0.0005, "loss": 2.1396, "step": 33460 }, { "epoch": 0.12739508080661982, "grad_norm": 0.12168506532907486, "learning_rate": 0.0005, "loss": 2.1394, "step": 33470 }, { "epoch": 0.1274331432747425, "grad_norm": 0.12149317562580109, "learning_rate": 0.0005, "loss": 2.1379, "step": 33480 }, { "epoch": 0.1274712057428652, "grad_norm": 0.13020338118076324, "learning_rate": 0.0005, "loss": 2.1494, "step": 33490 }, { "epoch": 0.12750926821098788, "grad_norm": 0.12166621536016464, "learning_rate": 0.0005, "loss": 2.1326, "step": 33500 }, { "epoch": 0.12754733067911056, "grad_norm": 0.12197873741388321, "learning_rate": 0.0005, "loss": 2.1344, "step": 33510 }, { "epoch": 0.12758539314723324, "grad_norm": 0.115919329226017, "learning_rate": 0.0005, "loss": 2.1393, "step": 33520 }, { "epoch": 0.12762345561535593, "grad_norm": 0.1329682618379593, "learning_rate": 0.0005, "loss": 2.1372, "step": 33530 }, { "epoch": 0.12766151808347861, "grad_norm": 0.12893423438072205, "learning_rate": 0.0005, "loss": 2.1177, "step": 33540 }, { "epoch": 0.1276995805516013, "grad_norm": 0.12900428473949432, "learning_rate": 0.0005, "loss": 2.1359, "step": 33550 }, { "epoch": 0.12773764301972398, "grad_norm": 0.11646492779254913, "learning_rate": 0.0005, "loss": 2.1372, "step": 33560 }, { "epoch": 0.12777570548784664, "grad_norm": 0.11849641054868698, "learning_rate": 0.0005, "loss": 2.1527, "step": 33570 }, { "epoch": 0.12781376795596933, "grad_norm": 0.12793667614459991, "learning_rate": 0.0005, "loss": 2.1283, "step": 33580 }, { "epoch": 0.127851830424092, "grad_norm": 0.11357719451189041, "learning_rate": 0.0005, "loss": 2.1259, "step": 33590 }, { "epoch": 0.1278898928922147, "grad_norm": 0.12847648561000824, "learning_rate": 0.0005, "loss": 2.1348, "step": 33600 }, { "epoch": 0.12792795536033738, "grad_norm": 0.1309060901403427, "learning_rate": 0.0005, "loss": 2.1345, "step": 33610 }, { "epoch": 0.12796601782846007, "grad_norm": 0.11633670330047607, "learning_rate": 0.0005, "loss": 2.1351, "step": 33620 }, { "epoch": 0.12800408029658275, "grad_norm": 0.12359406799077988, "learning_rate": 0.0005, "loss": 2.1317, "step": 33630 }, { "epoch": 0.12804214276470544, "grad_norm": 0.11715533584356308, "learning_rate": 0.0005, "loss": 2.1393, "step": 33640 }, { "epoch": 0.12808020523282812, "grad_norm": 0.12290675938129425, "learning_rate": 0.0005, "loss": 2.1419, "step": 33650 }, { "epoch": 0.1281182677009508, "grad_norm": 0.13310450315475464, "learning_rate": 0.0005, "loss": 2.1302, "step": 33660 }, { "epoch": 0.1281563301690735, "grad_norm": 0.13879603147506714, "learning_rate": 0.0005, "loss": 2.1117, "step": 33670 }, { "epoch": 0.12819439263719618, "grad_norm": 0.14221248030662537, "learning_rate": 0.0005, "loss": 2.1385, "step": 33680 }, { "epoch": 0.12823245510531886, "grad_norm": 0.12564119696617126, "learning_rate": 0.0005, "loss": 2.1336, "step": 33690 }, { "epoch": 0.12827051757344154, "grad_norm": 0.1257466971874237, "learning_rate": 0.0005, "loss": 2.1296, "step": 33700 }, { "epoch": 0.1283085800415642, "grad_norm": 0.12742167711257935, "learning_rate": 0.0005, "loss": 2.1197, "step": 33710 }, { "epoch": 0.1283466425096869, "grad_norm": 0.1109384074807167, "learning_rate": 0.0005, "loss": 2.1331, "step": 33720 }, { "epoch": 0.12838470497780957, "grad_norm": 0.13379578292369843, "learning_rate": 0.0005, "loss": 2.144, "step": 33730 }, { "epoch": 0.12842276744593226, "grad_norm": 0.14024749398231506, "learning_rate": 0.0005, "loss": 2.1303, "step": 33740 }, { "epoch": 0.12846082991405494, "grad_norm": 0.15728142857551575, "learning_rate": 0.0005, "loss": 2.1306, "step": 33750 }, { "epoch": 0.12849889238217763, "grad_norm": 0.11820480227470398, "learning_rate": 0.0005, "loss": 2.1236, "step": 33760 }, { "epoch": 0.1285369548503003, "grad_norm": 0.11761283129453659, "learning_rate": 0.0005, "loss": 2.1203, "step": 33770 }, { "epoch": 0.128575017318423, "grad_norm": 0.1218690425157547, "learning_rate": 0.0005, "loss": 2.1457, "step": 33780 }, { "epoch": 0.12861307978654568, "grad_norm": 0.13610930740833282, "learning_rate": 0.0005, "loss": 2.1289, "step": 33790 }, { "epoch": 0.12865114225466837, "grad_norm": 0.1270422637462616, "learning_rate": 0.0005, "loss": 2.1377, "step": 33800 }, { "epoch": 0.12868920472279105, "grad_norm": 0.1524609476327896, "learning_rate": 0.0005, "loss": 2.1496, "step": 33810 }, { "epoch": 0.12872726719091374, "grad_norm": 0.13481594622135162, "learning_rate": 0.0005, "loss": 2.1363, "step": 33820 }, { "epoch": 0.12876532965903642, "grad_norm": 0.1191399097442627, "learning_rate": 0.0005, "loss": 2.1299, "step": 33830 }, { "epoch": 0.1288033921271591, "grad_norm": 0.13578785955905914, "learning_rate": 0.0005, "loss": 2.137, "step": 33840 }, { "epoch": 0.1288414545952818, "grad_norm": 0.1373569369316101, "learning_rate": 0.0005, "loss": 2.1208, "step": 33850 }, { "epoch": 0.12887951706340445, "grad_norm": 0.12380945682525635, "learning_rate": 0.0005, "loss": 2.1401, "step": 33860 }, { "epoch": 0.12891757953152713, "grad_norm": 0.12317613512277603, "learning_rate": 0.0005, "loss": 2.1409, "step": 33870 }, { "epoch": 0.12895564199964982, "grad_norm": 0.13095170259475708, "learning_rate": 0.0005, "loss": 2.151, "step": 33880 }, { "epoch": 0.1289937044677725, "grad_norm": 0.17096243798732758, "learning_rate": 0.0005, "loss": 2.145, "step": 33890 }, { "epoch": 0.1290317669358952, "grad_norm": 0.11582276225090027, "learning_rate": 0.0005, "loss": 2.1422, "step": 33900 }, { "epoch": 0.12906982940401787, "grad_norm": 0.12426700443029404, "learning_rate": 0.0005, "loss": 2.1372, "step": 33910 }, { "epoch": 0.12910789187214056, "grad_norm": 0.12811040878295898, "learning_rate": 0.0005, "loss": 2.1408, "step": 33920 }, { "epoch": 0.12914595434026324, "grad_norm": 0.11992785334587097, "learning_rate": 0.0005, "loss": 2.152, "step": 33930 }, { "epoch": 0.12918401680838593, "grad_norm": 0.1243828758597374, "learning_rate": 0.0005, "loss": 2.1324, "step": 33940 }, { "epoch": 0.1292220792765086, "grad_norm": 0.11735928058624268, "learning_rate": 0.0005, "loss": 2.144, "step": 33950 }, { "epoch": 0.1292601417446313, "grad_norm": 0.11512462049722672, "learning_rate": 0.0005, "loss": 2.1235, "step": 33960 }, { "epoch": 0.12929820421275398, "grad_norm": 0.11653441935777664, "learning_rate": 0.0005, "loss": 2.1397, "step": 33970 }, { "epoch": 0.12933626668087667, "grad_norm": 0.11661040037870407, "learning_rate": 0.0005, "loss": 2.1393, "step": 33980 }, { "epoch": 0.12937432914899935, "grad_norm": 0.13248026371002197, "learning_rate": 0.0005, "loss": 2.1468, "step": 33990 }, { "epoch": 0.129412391617122, "grad_norm": 0.1311565339565277, "learning_rate": 0.0005, "loss": 2.1245, "step": 34000 }, { "epoch": 0.1294504540852447, "grad_norm": 0.13021622598171234, "learning_rate": 0.0005, "loss": 2.127, "step": 34010 }, { "epoch": 0.12948851655336738, "grad_norm": 0.11427046358585358, "learning_rate": 0.0005, "loss": 2.1532, "step": 34020 }, { "epoch": 0.12952657902149006, "grad_norm": 0.13352714478969574, "learning_rate": 0.0005, "loss": 2.1372, "step": 34030 }, { "epoch": 0.12956464148961275, "grad_norm": 0.10974465310573578, "learning_rate": 0.0005, "loss": 2.1283, "step": 34040 }, { "epoch": 0.12960270395773543, "grad_norm": 0.11958344280719757, "learning_rate": 0.0005, "loss": 2.1353, "step": 34050 }, { "epoch": 0.12964076642585812, "grad_norm": 0.12198679894208908, "learning_rate": 0.0005, "loss": 2.133, "step": 34060 }, { "epoch": 0.1296788288939808, "grad_norm": 0.1296238899230957, "learning_rate": 0.0005, "loss": 2.1393, "step": 34070 }, { "epoch": 0.1297168913621035, "grad_norm": 0.11404258757829666, "learning_rate": 0.0005, "loss": 2.119, "step": 34080 }, { "epoch": 0.12975495383022617, "grad_norm": 0.12420056760311127, "learning_rate": 0.0005, "loss": 2.1296, "step": 34090 }, { "epoch": 0.12979301629834886, "grad_norm": 0.1177731528878212, "learning_rate": 0.0005, "loss": 2.1315, "step": 34100 }, { "epoch": 0.12983107876647154, "grad_norm": 0.12449658662080765, "learning_rate": 0.0005, "loss": 2.1483, "step": 34110 }, { "epoch": 0.12986914123459423, "grad_norm": 0.1282752901315689, "learning_rate": 0.0005, "loss": 2.1339, "step": 34120 }, { "epoch": 0.1299072037027169, "grad_norm": 0.13742892444133759, "learning_rate": 0.0005, "loss": 2.1361, "step": 34130 }, { "epoch": 0.12994526617083957, "grad_norm": 0.12925803661346436, "learning_rate": 0.0005, "loss": 2.1339, "step": 34140 }, { "epoch": 0.12998332863896225, "grad_norm": 0.1282254308462143, "learning_rate": 0.0005, "loss": 2.138, "step": 34150 }, { "epoch": 0.13002139110708494, "grad_norm": 0.12230316549539566, "learning_rate": 0.0005, "loss": 2.1389, "step": 34160 }, { "epoch": 0.13005945357520762, "grad_norm": 0.1297813206911087, "learning_rate": 0.0005, "loss": 2.1393, "step": 34170 }, { "epoch": 0.1300975160433303, "grad_norm": 0.12948966026306152, "learning_rate": 0.0005, "loss": 2.1383, "step": 34180 }, { "epoch": 0.130135578511453, "grad_norm": 0.1196913793683052, "learning_rate": 0.0005, "loss": 2.1409, "step": 34190 }, { "epoch": 0.13017364097957568, "grad_norm": 0.12581545114517212, "learning_rate": 0.0005, "loss": 2.1567, "step": 34200 }, { "epoch": 0.13021170344769836, "grad_norm": 0.10900751501321793, "learning_rate": 0.0005, "loss": 2.128, "step": 34210 }, { "epoch": 0.13024976591582105, "grad_norm": 0.12207692861557007, "learning_rate": 0.0005, "loss": 2.1326, "step": 34220 }, { "epoch": 0.13028782838394373, "grad_norm": 0.12275753170251846, "learning_rate": 0.0005, "loss": 2.1529, "step": 34230 }, { "epoch": 0.13032589085206642, "grad_norm": 0.12474631518125534, "learning_rate": 0.0005, "loss": 2.1257, "step": 34240 }, { "epoch": 0.1303639533201891, "grad_norm": 0.12170256674289703, "learning_rate": 0.0005, "loss": 2.1358, "step": 34250 }, { "epoch": 0.1304020157883118, "grad_norm": 0.1270124763250351, "learning_rate": 0.0005, "loss": 2.1449, "step": 34260 }, { "epoch": 0.13044007825643447, "grad_norm": 0.14917618036270142, "learning_rate": 0.0005, "loss": 2.1394, "step": 34270 }, { "epoch": 0.13047814072455716, "grad_norm": 0.1254420131444931, "learning_rate": 0.0005, "loss": 2.1327, "step": 34280 }, { "epoch": 0.1305162031926798, "grad_norm": 0.12572523951530457, "learning_rate": 0.0005, "loss": 2.1233, "step": 34290 }, { "epoch": 0.1305542656608025, "grad_norm": 0.11523977667093277, "learning_rate": 0.0005, "loss": 2.1395, "step": 34300 }, { "epoch": 0.13059232812892518, "grad_norm": 0.1346060186624527, "learning_rate": 0.0005, "loss": 2.1347, "step": 34310 }, { "epoch": 0.13063039059704787, "grad_norm": 0.12890216708183289, "learning_rate": 0.0005, "loss": 2.1324, "step": 34320 }, { "epoch": 0.13066845306517055, "grad_norm": 0.13144852221012115, "learning_rate": 0.0005, "loss": 2.1303, "step": 34330 }, { "epoch": 0.13070651553329324, "grad_norm": 0.1467370092868805, "learning_rate": 0.0005, "loss": 2.1402, "step": 34340 }, { "epoch": 0.13074457800141592, "grad_norm": 0.12182767689228058, "learning_rate": 0.0005, "loss": 2.1365, "step": 34350 }, { "epoch": 0.1307826404695386, "grad_norm": 0.12545464932918549, "learning_rate": 0.0005, "loss": 2.1323, "step": 34360 }, { "epoch": 0.1308207029376613, "grad_norm": 0.11060311645269394, "learning_rate": 0.0005, "loss": 2.1365, "step": 34370 }, { "epoch": 0.13085876540578398, "grad_norm": 0.13011306524276733, "learning_rate": 0.0005, "loss": 2.1465, "step": 34380 }, { "epoch": 0.13089682787390666, "grad_norm": 0.13094794750213623, "learning_rate": 0.0005, "loss": 2.1248, "step": 34390 }, { "epoch": 0.13093489034202935, "grad_norm": 0.1258162409067154, "learning_rate": 0.0005, "loss": 2.131, "step": 34400 }, { "epoch": 0.13097295281015203, "grad_norm": 0.12662410736083984, "learning_rate": 0.0005, "loss": 2.1409, "step": 34410 }, { "epoch": 0.13101101527827472, "grad_norm": 0.12168075889348984, "learning_rate": 0.0005, "loss": 2.1307, "step": 34420 }, { "epoch": 0.13104907774639737, "grad_norm": 0.12663094699382782, "learning_rate": 0.0005, "loss": 2.145, "step": 34430 }, { "epoch": 0.13108714021452006, "grad_norm": 0.11823868006467819, "learning_rate": 0.0005, "loss": 2.1247, "step": 34440 }, { "epoch": 0.13112520268264274, "grad_norm": 0.12802527844905853, "learning_rate": 0.0005, "loss": 2.1451, "step": 34450 }, { "epoch": 0.13116326515076543, "grad_norm": 0.12292591482400894, "learning_rate": 0.0005, "loss": 2.135, "step": 34460 }, { "epoch": 0.13120132761888811, "grad_norm": 0.13579413294792175, "learning_rate": 0.0005, "loss": 2.1271, "step": 34470 }, { "epoch": 0.1312393900870108, "grad_norm": 0.11999151855707169, "learning_rate": 0.0005, "loss": 2.1283, "step": 34480 }, { "epoch": 0.13127745255513348, "grad_norm": 0.12723271548748016, "learning_rate": 0.0005, "loss": 2.1207, "step": 34490 }, { "epoch": 0.13131551502325617, "grad_norm": 0.13324995338916779, "learning_rate": 0.0005, "loss": 2.1306, "step": 34500 }, { "epoch": 0.13135357749137885, "grad_norm": 0.12204291671514511, "learning_rate": 0.0005, "loss": 2.1156, "step": 34510 }, { "epoch": 0.13139163995950154, "grad_norm": 0.11512850224971771, "learning_rate": 0.0005, "loss": 2.1493, "step": 34520 }, { "epoch": 0.13142970242762422, "grad_norm": 0.12048737704753876, "learning_rate": 0.0005, "loss": 2.1356, "step": 34530 }, { "epoch": 0.1314677648957469, "grad_norm": 0.12571175396442413, "learning_rate": 0.0005, "loss": 2.1343, "step": 34540 }, { "epoch": 0.1315058273638696, "grad_norm": 0.120302215218544, "learning_rate": 0.0005, "loss": 2.1308, "step": 34550 }, { "epoch": 0.13154388983199228, "grad_norm": 0.13598310947418213, "learning_rate": 0.0005, "loss": 2.1359, "step": 34560 }, { "epoch": 0.13158195230011493, "grad_norm": 0.1138150691986084, "learning_rate": 0.0005, "loss": 2.1402, "step": 34570 }, { "epoch": 0.13162001476823762, "grad_norm": 0.136013001203537, "learning_rate": 0.0005, "loss": 2.1407, "step": 34580 }, { "epoch": 0.1316580772363603, "grad_norm": 0.11748948693275452, "learning_rate": 0.0005, "loss": 2.1487, "step": 34590 }, { "epoch": 0.131696139704483, "grad_norm": 0.11970457434654236, "learning_rate": 0.0005, "loss": 2.1285, "step": 34600 }, { "epoch": 0.13173420217260567, "grad_norm": 0.12416719645261765, "learning_rate": 0.0005, "loss": 2.1369, "step": 34610 }, { "epoch": 0.13177226464072836, "grad_norm": 0.1187528520822525, "learning_rate": 0.0005, "loss": 2.128, "step": 34620 }, { "epoch": 0.13181032710885104, "grad_norm": 0.12313452363014221, "learning_rate": 0.0005, "loss": 2.142, "step": 34630 }, { "epoch": 0.13184838957697373, "grad_norm": 0.13389801979064941, "learning_rate": 0.0005, "loss": 2.1294, "step": 34640 }, { "epoch": 0.13188645204509641, "grad_norm": 0.1279163956642151, "learning_rate": 0.0005, "loss": 2.1232, "step": 34650 }, { "epoch": 0.1319245145132191, "grad_norm": 0.13198937475681305, "learning_rate": 0.0005, "loss": 2.1335, "step": 34660 }, { "epoch": 0.13196257698134178, "grad_norm": 0.11823670566082001, "learning_rate": 0.0005, "loss": 2.149, "step": 34670 }, { "epoch": 0.13200063944946447, "grad_norm": 0.11793782562017441, "learning_rate": 0.0005, "loss": 2.138, "step": 34680 }, { "epoch": 0.13203870191758715, "grad_norm": 0.1369984745979309, "learning_rate": 0.0005, "loss": 2.1299, "step": 34690 }, { "epoch": 0.13207676438570984, "grad_norm": 0.12443507462739944, "learning_rate": 0.0005, "loss": 2.1273, "step": 34700 }, { "epoch": 0.13211482685383252, "grad_norm": 0.13990186154842377, "learning_rate": 0.0005, "loss": 2.12, "step": 34710 }, { "epoch": 0.13215288932195518, "grad_norm": 0.13101623952388763, "learning_rate": 0.0005, "loss": 2.1376, "step": 34720 }, { "epoch": 0.13219095179007787, "grad_norm": 0.12870261073112488, "learning_rate": 0.0005, "loss": 2.1407, "step": 34730 }, { "epoch": 0.13222901425820055, "grad_norm": 0.12486039847135544, "learning_rate": 0.0005, "loss": 2.1385, "step": 34740 }, { "epoch": 0.13226707672632323, "grad_norm": 0.11611022800207138, "learning_rate": 0.0005, "loss": 2.14, "step": 34750 }, { "epoch": 0.13230513919444592, "grad_norm": 0.41225793957710266, "learning_rate": 0.0005, "loss": 2.1387, "step": 34760 }, { "epoch": 0.1323432016625686, "grad_norm": 0.12023355066776276, "learning_rate": 0.0005, "loss": 2.138, "step": 34770 }, { "epoch": 0.1323812641306913, "grad_norm": 0.11662869900465012, "learning_rate": 0.0005, "loss": 2.1418, "step": 34780 }, { "epoch": 0.13241932659881397, "grad_norm": 0.10957567393779755, "learning_rate": 0.0005, "loss": 2.1495, "step": 34790 }, { "epoch": 0.13245738906693666, "grad_norm": 0.12181458622217178, "learning_rate": 0.0005, "loss": 2.14, "step": 34800 }, { "epoch": 0.13249545153505934, "grad_norm": 0.1326204389333725, "learning_rate": 0.0005, "loss": 2.1411, "step": 34810 }, { "epoch": 0.13253351400318203, "grad_norm": 0.1400616616010666, "learning_rate": 0.0005, "loss": 2.1372, "step": 34820 }, { "epoch": 0.13257157647130471, "grad_norm": 0.12994307279586792, "learning_rate": 0.0005, "loss": 2.1422, "step": 34830 }, { "epoch": 0.1326096389394274, "grad_norm": 0.1476479470729828, "learning_rate": 0.0005, "loss": 2.1348, "step": 34840 }, { "epoch": 0.13264770140755008, "grad_norm": 0.12488952279090881, "learning_rate": 0.0005, "loss": 2.1526, "step": 34850 }, { "epoch": 0.13268576387567274, "grad_norm": 0.13037273287773132, "learning_rate": 0.0005, "loss": 2.1197, "step": 34860 }, { "epoch": 0.13272382634379543, "grad_norm": 0.11375074088573456, "learning_rate": 0.0005, "loss": 2.1397, "step": 34870 }, { "epoch": 0.1327618888119181, "grad_norm": 0.11915922164916992, "learning_rate": 0.0005, "loss": 2.1241, "step": 34880 }, { "epoch": 0.1327999512800408, "grad_norm": 0.11518298089504242, "learning_rate": 0.0005, "loss": 2.1307, "step": 34890 }, { "epoch": 0.13283801374816348, "grad_norm": 0.13177339732646942, "learning_rate": 0.0005, "loss": 2.1349, "step": 34900 }, { "epoch": 0.13287607621628617, "grad_norm": 0.12411221116781235, "learning_rate": 0.0005, "loss": 2.134, "step": 34910 }, { "epoch": 0.13291413868440885, "grad_norm": 0.12184184789657593, "learning_rate": 0.0005, "loss": 2.1411, "step": 34920 }, { "epoch": 0.13295220115253154, "grad_norm": 0.13678424060344696, "learning_rate": 0.0005, "loss": 2.1368, "step": 34930 }, { "epoch": 0.13299026362065422, "grad_norm": 0.1206953153014183, "learning_rate": 0.0005, "loss": 2.1379, "step": 34940 }, { "epoch": 0.1330283260887769, "grad_norm": 0.13750183582305908, "learning_rate": 0.0005, "loss": 2.1351, "step": 34950 }, { "epoch": 0.1330663885568996, "grad_norm": 0.12357515096664429, "learning_rate": 0.0005, "loss": 2.1312, "step": 34960 }, { "epoch": 0.13310445102502227, "grad_norm": 0.11207663267850876, "learning_rate": 0.0005, "loss": 2.1277, "step": 34970 }, { "epoch": 0.13314251349314496, "grad_norm": 0.1267058104276657, "learning_rate": 0.0005, "loss": 2.1421, "step": 34980 }, { "epoch": 0.13318057596126764, "grad_norm": 0.12690705060958862, "learning_rate": 0.0005, "loss": 2.1299, "step": 34990 }, { "epoch": 0.13321863842939033, "grad_norm": 0.1255311369895935, "learning_rate": 0.0005, "loss": 2.1426, "step": 35000 }, { "epoch": 0.133256700897513, "grad_norm": 0.11876732110977173, "learning_rate": 0.0005, "loss": 2.1409, "step": 35010 }, { "epoch": 0.13329476336563567, "grad_norm": 0.12742522358894348, "learning_rate": 0.0005, "loss": 2.1315, "step": 35020 }, { "epoch": 0.13333282583375836, "grad_norm": 0.13061149418354034, "learning_rate": 0.0005, "loss": 2.1372, "step": 35030 }, { "epoch": 0.13337088830188104, "grad_norm": 0.14856840670108795, "learning_rate": 0.0005, "loss": 2.1215, "step": 35040 }, { "epoch": 0.13340895077000373, "grad_norm": 0.11686535179615021, "learning_rate": 0.0005, "loss": 2.1442, "step": 35050 }, { "epoch": 0.1334470132381264, "grad_norm": 0.12009676545858383, "learning_rate": 0.0005, "loss": 2.143, "step": 35060 }, { "epoch": 0.1334850757062491, "grad_norm": 0.1326339691877365, "learning_rate": 0.0005, "loss": 2.131, "step": 35070 }, { "epoch": 0.13352313817437178, "grad_norm": 0.11814519762992859, "learning_rate": 0.0005, "loss": 2.1202, "step": 35080 }, { "epoch": 0.13356120064249447, "grad_norm": 0.12367371469736099, "learning_rate": 0.0005, "loss": 2.1549, "step": 35090 }, { "epoch": 0.13359926311061715, "grad_norm": 0.1246814876794815, "learning_rate": 0.0005, "loss": 2.1339, "step": 35100 }, { "epoch": 0.13363732557873984, "grad_norm": 0.1327364146709442, "learning_rate": 0.0005, "loss": 2.1488, "step": 35110 }, { "epoch": 0.13367538804686252, "grad_norm": 0.12271331250667572, "learning_rate": 0.0005, "loss": 2.1405, "step": 35120 }, { "epoch": 0.1337134505149852, "grad_norm": 0.1177125796675682, "learning_rate": 0.0005, "loss": 2.1404, "step": 35130 }, { "epoch": 0.1337515129831079, "grad_norm": 0.11724243313074112, "learning_rate": 0.0005, "loss": 2.1496, "step": 35140 }, { "epoch": 0.13378957545123055, "grad_norm": 0.13234220445156097, "learning_rate": 0.0005, "loss": 2.1426, "step": 35150 }, { "epoch": 0.13382763791935323, "grad_norm": 0.12565819919109344, "learning_rate": 0.0005, "loss": 2.1227, "step": 35160 }, { "epoch": 0.13386570038747592, "grad_norm": 0.12357509136199951, "learning_rate": 0.0005, "loss": 2.1323, "step": 35170 }, { "epoch": 0.1339037628555986, "grad_norm": 0.12172172218561172, "learning_rate": 0.0005, "loss": 2.1326, "step": 35180 }, { "epoch": 0.1339418253237213, "grad_norm": 0.1319197118282318, "learning_rate": 0.0005, "loss": 2.1363, "step": 35190 }, { "epoch": 0.13397988779184397, "grad_norm": 0.12759651243686676, "learning_rate": 0.0005, "loss": 2.1392, "step": 35200 }, { "epoch": 0.13401795025996666, "grad_norm": 0.14796006679534912, "learning_rate": 0.0005, "loss": 2.1294, "step": 35210 }, { "epoch": 0.13405601272808934, "grad_norm": 0.13388635218143463, "learning_rate": 0.0005, "loss": 2.1363, "step": 35220 }, { "epoch": 0.13409407519621203, "grad_norm": 0.11979969590902328, "learning_rate": 0.0005, "loss": 2.1364, "step": 35230 }, { "epoch": 0.1341321376643347, "grad_norm": 0.11983058601617813, "learning_rate": 0.0005, "loss": 2.1204, "step": 35240 }, { "epoch": 0.1341702001324574, "grad_norm": 0.11468696594238281, "learning_rate": 0.0005, "loss": 2.1426, "step": 35250 }, { "epoch": 0.13420826260058008, "grad_norm": 0.1290093958377838, "learning_rate": 0.0005, "loss": 2.1359, "step": 35260 }, { "epoch": 0.13424632506870277, "grad_norm": 0.12517830729484558, "learning_rate": 0.0005, "loss": 2.1213, "step": 35270 }, { "epoch": 0.13428438753682545, "grad_norm": 0.13029910624027252, "learning_rate": 0.0005, "loss": 2.1545, "step": 35280 }, { "epoch": 0.1343224500049481, "grad_norm": 0.12360113859176636, "learning_rate": 0.0005, "loss": 2.1472, "step": 35290 }, { "epoch": 0.1343605124730708, "grad_norm": 0.13174830377101898, "learning_rate": 0.0005, "loss": 2.1544, "step": 35300 }, { "epoch": 0.13439857494119348, "grad_norm": 0.1239006370306015, "learning_rate": 0.0005, "loss": 2.1454, "step": 35310 }, { "epoch": 0.13443663740931616, "grad_norm": 0.1369420737028122, "learning_rate": 0.0005, "loss": 2.1401, "step": 35320 }, { "epoch": 0.13447469987743885, "grad_norm": 0.11984249204397202, "learning_rate": 0.0005, "loss": 2.1283, "step": 35330 }, { "epoch": 0.13451276234556153, "grad_norm": 0.1304861605167389, "learning_rate": 0.0005, "loss": 2.146, "step": 35340 }, { "epoch": 0.13455082481368422, "grad_norm": 0.12857241928577423, "learning_rate": 0.0005, "loss": 2.1369, "step": 35350 }, { "epoch": 0.1345888872818069, "grad_norm": 0.12768466770648956, "learning_rate": 0.0005, "loss": 2.1364, "step": 35360 }, { "epoch": 0.1346269497499296, "grad_norm": 0.12753640115261078, "learning_rate": 0.0005, "loss": 2.1313, "step": 35370 }, { "epoch": 0.13466501221805227, "grad_norm": 0.12317626178264618, "learning_rate": 0.0005, "loss": 2.1405, "step": 35380 }, { "epoch": 0.13470307468617496, "grad_norm": 0.12090485543012619, "learning_rate": 0.0005, "loss": 2.1466, "step": 35390 }, { "epoch": 0.13474113715429764, "grad_norm": 0.13802245259284973, "learning_rate": 0.0005, "loss": 2.1357, "step": 35400 }, { "epoch": 0.13477919962242033, "grad_norm": 0.12055863440036774, "learning_rate": 0.0005, "loss": 2.158, "step": 35410 }, { "epoch": 0.134817262090543, "grad_norm": 0.13493365049362183, "learning_rate": 0.0005, "loss": 2.1277, "step": 35420 }, { "epoch": 0.1348553245586657, "grad_norm": 0.11872437596321106, "learning_rate": 0.0005, "loss": 2.1401, "step": 35430 }, { "epoch": 0.13489338702678835, "grad_norm": 0.127437025308609, "learning_rate": 0.0005, "loss": 2.1387, "step": 35440 }, { "epoch": 0.13493144949491104, "grad_norm": 0.13511709868907928, "learning_rate": 0.0005, "loss": 2.1475, "step": 35450 }, { "epoch": 0.13496951196303372, "grad_norm": 0.12347541749477386, "learning_rate": 0.0005, "loss": 2.1419, "step": 35460 }, { "epoch": 0.1350075744311564, "grad_norm": 0.12082315236330032, "learning_rate": 0.0005, "loss": 2.1365, "step": 35470 }, { "epoch": 0.1350456368992791, "grad_norm": 0.11401130259037018, "learning_rate": 0.0005, "loss": 2.1376, "step": 35480 }, { "epoch": 0.13508369936740178, "grad_norm": 0.1377699226140976, "learning_rate": 0.0005, "loss": 2.1571, "step": 35490 }, { "epoch": 0.13512176183552446, "grad_norm": 0.11370331048965454, "learning_rate": 0.0005, "loss": 2.1324, "step": 35500 }, { "epoch": 0.13515982430364715, "grad_norm": 0.1212431862950325, "learning_rate": 0.0005, "loss": 2.13, "step": 35510 }, { "epoch": 0.13519788677176983, "grad_norm": 0.1107739806175232, "learning_rate": 0.0005, "loss": 2.142, "step": 35520 }, { "epoch": 0.13523594923989252, "grad_norm": 0.1320618987083435, "learning_rate": 0.0005, "loss": 2.1334, "step": 35530 }, { "epoch": 0.1352740117080152, "grad_norm": 0.14418496191501617, "learning_rate": 0.0005, "loss": 2.1445, "step": 35540 }, { "epoch": 0.1353120741761379, "grad_norm": 0.12587794661521912, "learning_rate": 0.0005, "loss": 2.1389, "step": 35550 }, { "epoch": 0.13535013664426057, "grad_norm": 0.11606061458587646, "learning_rate": 0.0005, "loss": 2.1328, "step": 35560 }, { "epoch": 0.13538819911238326, "grad_norm": 0.1209748387336731, "learning_rate": 0.0005, "loss": 2.1413, "step": 35570 }, { "epoch": 0.1354262615805059, "grad_norm": 0.11753486096858978, "learning_rate": 0.0005, "loss": 2.1419, "step": 35580 }, { "epoch": 0.1354643240486286, "grad_norm": 0.15218910574913025, "learning_rate": 0.0005, "loss": 2.1312, "step": 35590 }, { "epoch": 0.13550238651675128, "grad_norm": 0.12810829281806946, "learning_rate": 0.0005, "loss": 2.1203, "step": 35600 }, { "epoch": 0.13554044898487397, "grad_norm": 0.12554165720939636, "learning_rate": 0.0005, "loss": 2.1379, "step": 35610 }, { "epoch": 0.13557851145299665, "grad_norm": 0.11238068342208862, "learning_rate": 0.0005, "loss": 2.1411, "step": 35620 }, { "epoch": 0.13561657392111934, "grad_norm": 0.11834820359945297, "learning_rate": 0.0005, "loss": 2.1387, "step": 35630 }, { "epoch": 0.13565463638924202, "grad_norm": 0.1300107091665268, "learning_rate": 0.0005, "loss": 2.1337, "step": 35640 }, { "epoch": 0.1356926988573647, "grad_norm": 0.12143900245428085, "learning_rate": 0.0005, "loss": 2.1337, "step": 35650 }, { "epoch": 0.1357307613254874, "grad_norm": 0.11875910311937332, "learning_rate": 0.0005, "loss": 2.1444, "step": 35660 }, { "epoch": 0.13576882379361008, "grad_norm": 0.1321776658296585, "learning_rate": 0.0005, "loss": 2.1414, "step": 35670 }, { "epoch": 0.13580688626173276, "grad_norm": 0.13334231078624725, "learning_rate": 0.0005, "loss": 2.1437, "step": 35680 }, { "epoch": 0.13584494872985545, "grad_norm": 0.14358310401439667, "learning_rate": 0.0005, "loss": 2.1323, "step": 35690 }, { "epoch": 0.13588301119797813, "grad_norm": 0.12222322821617126, "learning_rate": 0.0005, "loss": 2.1462, "step": 35700 }, { "epoch": 0.13592107366610082, "grad_norm": 0.13186433911323547, "learning_rate": 0.0005, "loss": 2.1312, "step": 35710 }, { "epoch": 0.13595913613422347, "grad_norm": 0.12122279405593872, "learning_rate": 0.0005, "loss": 2.1409, "step": 35720 }, { "epoch": 0.13599719860234616, "grad_norm": 0.11469511687755585, "learning_rate": 0.0005, "loss": 2.1282, "step": 35730 }, { "epoch": 0.13603526107046884, "grad_norm": 0.14768067002296448, "learning_rate": 0.0005, "loss": 2.1319, "step": 35740 }, { "epoch": 0.13607332353859153, "grad_norm": 0.11734780669212341, "learning_rate": 0.0005, "loss": 2.1357, "step": 35750 }, { "epoch": 0.1361113860067142, "grad_norm": 0.11367372423410416, "learning_rate": 0.0005, "loss": 2.1429, "step": 35760 }, { "epoch": 0.1361494484748369, "grad_norm": 0.12199106812477112, "learning_rate": 0.0005, "loss": 2.1396, "step": 35770 }, { "epoch": 0.13618751094295958, "grad_norm": 0.11509162187576294, "learning_rate": 0.0005, "loss": 2.151, "step": 35780 }, { "epoch": 0.13622557341108227, "grad_norm": 0.11662641912698746, "learning_rate": 0.0005, "loss": 2.1445, "step": 35790 }, { "epoch": 0.13626363587920495, "grad_norm": 0.12217991799116135, "learning_rate": 0.0005, "loss": 2.1501, "step": 35800 }, { "epoch": 0.13630169834732764, "grad_norm": 0.11236120760440826, "learning_rate": 0.0005, "loss": 2.1284, "step": 35810 }, { "epoch": 0.13633976081545032, "grad_norm": 0.12871301174163818, "learning_rate": 0.0005, "loss": 2.1485, "step": 35820 }, { "epoch": 0.136377823283573, "grad_norm": 0.12680701911449432, "learning_rate": 0.0005, "loss": 2.1455, "step": 35830 }, { "epoch": 0.1364158857516957, "grad_norm": 0.12248557060956955, "learning_rate": 0.0005, "loss": 2.1175, "step": 35840 }, { "epoch": 0.13645394821981838, "grad_norm": 0.12114865332841873, "learning_rate": 0.0005, "loss": 2.1306, "step": 35850 }, { "epoch": 0.13649201068794106, "grad_norm": 0.13375422358512878, "learning_rate": 0.0005, "loss": 2.1344, "step": 35860 }, { "epoch": 0.13653007315606372, "grad_norm": 0.1284463107585907, "learning_rate": 0.0005, "loss": 2.14, "step": 35870 }, { "epoch": 0.1365681356241864, "grad_norm": 0.1267269253730774, "learning_rate": 0.0005, "loss": 2.1334, "step": 35880 }, { "epoch": 0.1366061980923091, "grad_norm": 0.1250191330909729, "learning_rate": 0.0005, "loss": 2.1446, "step": 35890 }, { "epoch": 0.13664426056043177, "grad_norm": 0.1312512457370758, "learning_rate": 0.0005, "loss": 2.1265, "step": 35900 }, { "epoch": 0.13668232302855446, "grad_norm": 0.11584340035915375, "learning_rate": 0.0005, "loss": 2.1404, "step": 35910 }, { "epoch": 0.13672038549667714, "grad_norm": 0.11950317770242691, "learning_rate": 0.0005, "loss": 2.1583, "step": 35920 }, { "epoch": 0.13675844796479983, "grad_norm": 0.13057249784469604, "learning_rate": 0.0005, "loss": 2.1349, "step": 35930 }, { "epoch": 0.1367965104329225, "grad_norm": 0.12502941489219666, "learning_rate": 0.0005, "loss": 2.1372, "step": 35940 }, { "epoch": 0.1368345729010452, "grad_norm": 0.13216383755207062, "learning_rate": 0.0005, "loss": 2.1396, "step": 35950 }, { "epoch": 0.13687263536916788, "grad_norm": 0.11722370982170105, "learning_rate": 0.0005, "loss": 2.142, "step": 35960 }, { "epoch": 0.13691069783729057, "grad_norm": 0.12868359684944153, "learning_rate": 0.0005, "loss": 2.1474, "step": 35970 }, { "epoch": 0.13694876030541325, "grad_norm": 0.12457242608070374, "learning_rate": 0.0005, "loss": 2.1477, "step": 35980 }, { "epoch": 0.13698682277353594, "grad_norm": 0.11647374927997589, "learning_rate": 0.0005, "loss": 2.1306, "step": 35990 }, { "epoch": 0.13702488524165862, "grad_norm": 0.1253417730331421, "learning_rate": 0.0005, "loss": 2.1406, "step": 36000 }, { "epoch": 0.13706294770978128, "grad_norm": 0.13089333474636078, "learning_rate": 0.0005, "loss": 2.1396, "step": 36010 }, { "epoch": 0.13710101017790396, "grad_norm": 0.12575079500675201, "learning_rate": 0.0005, "loss": 2.1485, "step": 36020 }, { "epoch": 0.13713907264602665, "grad_norm": 0.12303038686513901, "learning_rate": 0.0005, "loss": 2.144, "step": 36030 }, { "epoch": 0.13717713511414933, "grad_norm": 0.11526139080524445, "learning_rate": 0.0005, "loss": 2.1362, "step": 36040 }, { "epoch": 0.13721519758227202, "grad_norm": 0.12276002019643784, "learning_rate": 0.0005, "loss": 2.1179, "step": 36050 }, { "epoch": 0.1372532600503947, "grad_norm": 0.1275072991847992, "learning_rate": 0.0005, "loss": 2.1416, "step": 36060 }, { "epoch": 0.1372913225185174, "grad_norm": 0.14315102994441986, "learning_rate": 0.0005, "loss": 2.1297, "step": 36070 }, { "epoch": 0.13732938498664007, "grad_norm": 0.11972994357347488, "learning_rate": 0.0005, "loss": 2.1391, "step": 36080 }, { "epoch": 0.13736744745476276, "grad_norm": 0.13286733627319336, "learning_rate": 0.0005, "loss": 2.1509, "step": 36090 }, { "epoch": 0.13740550992288544, "grad_norm": 0.129139244556427, "learning_rate": 0.0005, "loss": 2.1303, "step": 36100 }, { "epoch": 0.13744357239100813, "grad_norm": 0.12458977103233337, "learning_rate": 0.0005, "loss": 2.156, "step": 36110 }, { "epoch": 0.1374816348591308, "grad_norm": 0.12720626592636108, "learning_rate": 0.0005, "loss": 2.1529, "step": 36120 }, { "epoch": 0.1375196973272535, "grad_norm": 0.11940285563468933, "learning_rate": 0.0005, "loss": 2.134, "step": 36130 }, { "epoch": 0.13755775979537618, "grad_norm": 0.1287795752286911, "learning_rate": 0.0005, "loss": 2.1455, "step": 36140 }, { "epoch": 0.13759582226349887, "grad_norm": 0.11979345977306366, "learning_rate": 0.0005, "loss": 2.1314, "step": 36150 }, { "epoch": 0.13763388473162153, "grad_norm": 0.1211363896727562, "learning_rate": 0.0005, "loss": 2.1497, "step": 36160 }, { "epoch": 0.1376719471997442, "grad_norm": 0.11665554344654083, "learning_rate": 0.0005, "loss": 2.1362, "step": 36170 }, { "epoch": 0.1377100096678669, "grad_norm": 0.11989234387874603, "learning_rate": 0.0005, "loss": 2.1457, "step": 36180 }, { "epoch": 0.13774807213598958, "grad_norm": 0.12173985689878464, "learning_rate": 0.0005, "loss": 2.1364, "step": 36190 }, { "epoch": 0.13778613460411226, "grad_norm": 0.11196370422840118, "learning_rate": 0.0005, "loss": 2.1214, "step": 36200 }, { "epoch": 0.13782419707223495, "grad_norm": 0.1438337117433548, "learning_rate": 0.0005, "loss": 2.1442, "step": 36210 }, { "epoch": 0.13786225954035763, "grad_norm": 0.13081873953342438, "learning_rate": 0.0005, "loss": 2.143, "step": 36220 }, { "epoch": 0.13790032200848032, "grad_norm": 0.12510628998279572, "learning_rate": 0.0005, "loss": 2.1305, "step": 36230 }, { "epoch": 0.137938384476603, "grad_norm": 0.11957137286663055, "learning_rate": 0.0005, "loss": 2.1335, "step": 36240 }, { "epoch": 0.1379764469447257, "grad_norm": 0.12658755481243134, "learning_rate": 0.0005, "loss": 2.144, "step": 36250 }, { "epoch": 0.13801450941284837, "grad_norm": 0.13881908357143402, "learning_rate": 0.0005, "loss": 2.1332, "step": 36260 }, { "epoch": 0.13805257188097106, "grad_norm": 0.12685273587703705, "learning_rate": 0.0005, "loss": 2.1277, "step": 36270 }, { "epoch": 0.13809063434909374, "grad_norm": 0.12292812764644623, "learning_rate": 0.0005, "loss": 2.1417, "step": 36280 }, { "epoch": 0.13812869681721643, "grad_norm": 0.1255536824464798, "learning_rate": 0.0005, "loss": 2.1341, "step": 36290 }, { "epoch": 0.13816675928533909, "grad_norm": 0.13109980523586273, "learning_rate": 0.0005, "loss": 2.1383, "step": 36300 }, { "epoch": 0.13820482175346177, "grad_norm": 0.12761881947517395, "learning_rate": 0.0005, "loss": 2.1316, "step": 36310 }, { "epoch": 0.13824288422158446, "grad_norm": 0.1440645009279251, "learning_rate": 0.0005, "loss": 2.1402, "step": 36320 }, { "epoch": 0.13828094668970714, "grad_norm": 0.12873628735542297, "learning_rate": 0.0005, "loss": 2.138, "step": 36330 }, { "epoch": 0.13831900915782983, "grad_norm": 0.12757310271263123, "learning_rate": 0.0005, "loss": 2.113, "step": 36340 }, { "epoch": 0.1383570716259525, "grad_norm": 0.14658595621585846, "learning_rate": 0.0005, "loss": 2.1395, "step": 36350 }, { "epoch": 0.1383951340940752, "grad_norm": 0.12868966162204742, "learning_rate": 0.0005, "loss": 2.1245, "step": 36360 }, { "epoch": 0.13843319656219788, "grad_norm": 0.12172853201627731, "learning_rate": 0.0005, "loss": 2.1496, "step": 36370 }, { "epoch": 0.13847125903032056, "grad_norm": 0.12915430963039398, "learning_rate": 0.0005, "loss": 2.1312, "step": 36380 }, { "epoch": 0.13850932149844325, "grad_norm": 0.11664684861898422, "learning_rate": 0.0005, "loss": 2.1413, "step": 36390 }, { "epoch": 0.13854738396656593, "grad_norm": 0.1285259872674942, "learning_rate": 0.0005, "loss": 2.1389, "step": 36400 }, { "epoch": 0.13858544643468862, "grad_norm": 0.12674078345298767, "learning_rate": 0.0005, "loss": 2.1451, "step": 36410 }, { "epoch": 0.1386235089028113, "grad_norm": 0.1258033961057663, "learning_rate": 0.0005, "loss": 2.1399, "step": 36420 }, { "epoch": 0.138661571370934, "grad_norm": 0.13805773854255676, "learning_rate": 0.0005, "loss": 2.1361, "step": 36430 }, { "epoch": 0.13869963383905665, "grad_norm": 0.12224043905735016, "learning_rate": 0.0005, "loss": 2.1601, "step": 36440 }, { "epoch": 0.13873769630717933, "grad_norm": 0.11138515174388885, "learning_rate": 0.0005, "loss": 2.1293, "step": 36450 }, { "epoch": 0.13877575877530202, "grad_norm": 0.13060350716114044, "learning_rate": 0.0005, "loss": 2.1304, "step": 36460 }, { "epoch": 0.1388138212434247, "grad_norm": 0.12439647316932678, "learning_rate": 0.0005, "loss": 2.1418, "step": 36470 }, { "epoch": 0.13885188371154739, "grad_norm": 0.1259547621011734, "learning_rate": 0.0005, "loss": 2.1433, "step": 36480 }, { "epoch": 0.13888994617967007, "grad_norm": 0.12292451411485672, "learning_rate": 0.0005, "loss": 2.1135, "step": 36490 }, { "epoch": 0.13892800864779276, "grad_norm": 0.11878538131713867, "learning_rate": 0.0005, "loss": 2.1187, "step": 36500 }, { "epoch": 0.13896607111591544, "grad_norm": 0.13011427223682404, "learning_rate": 0.0005, "loss": 2.1425, "step": 36510 }, { "epoch": 0.13900413358403813, "grad_norm": 0.1278398334980011, "learning_rate": 0.0005, "loss": 2.1356, "step": 36520 }, { "epoch": 0.1390421960521608, "grad_norm": 0.1215527355670929, "learning_rate": 0.0005, "loss": 2.1333, "step": 36530 }, { "epoch": 0.1390802585202835, "grad_norm": 0.11837470531463623, "learning_rate": 0.0005, "loss": 2.1381, "step": 36540 }, { "epoch": 0.13911832098840618, "grad_norm": 0.1168607845902443, "learning_rate": 0.0005, "loss": 2.1393, "step": 36550 }, { "epoch": 0.13915638345652886, "grad_norm": 0.12791843712329865, "learning_rate": 0.0005, "loss": 2.1409, "step": 36560 }, { "epoch": 0.13919444592465155, "grad_norm": 0.13247643411159515, "learning_rate": 0.0005, "loss": 2.1454, "step": 36570 }, { "epoch": 0.13923250839277423, "grad_norm": 0.13305790722370148, "learning_rate": 0.0005, "loss": 2.148, "step": 36580 }, { "epoch": 0.1392705708608969, "grad_norm": 0.1222585067152977, "learning_rate": 0.0005, "loss": 2.1379, "step": 36590 }, { "epoch": 0.13930863332901958, "grad_norm": 0.12503567337989807, "learning_rate": 0.0005, "loss": 2.1313, "step": 36600 }, { "epoch": 0.13934669579714226, "grad_norm": 0.12737417221069336, "learning_rate": 0.0005, "loss": 2.1253, "step": 36610 }, { "epoch": 0.13938475826526495, "grad_norm": 0.19949164986610413, "learning_rate": 0.0005, "loss": 2.1256, "step": 36620 }, { "epoch": 0.13942282073338763, "grad_norm": 0.12841491401195526, "learning_rate": 0.0005, "loss": 2.1212, "step": 36630 }, { "epoch": 0.13946088320151032, "grad_norm": 0.1240854263305664, "learning_rate": 0.0005, "loss": 2.1365, "step": 36640 }, { "epoch": 0.139498945669633, "grad_norm": 0.11256958544254303, "learning_rate": 0.0005, "loss": 2.1368, "step": 36650 }, { "epoch": 0.13953700813775569, "grad_norm": 0.11909143626689911, "learning_rate": 0.0005, "loss": 2.1399, "step": 36660 }, { "epoch": 0.13957507060587837, "grad_norm": 0.11793980002403259, "learning_rate": 0.0005, "loss": 2.1401, "step": 36670 }, { "epoch": 0.13961313307400106, "grad_norm": 0.12352468818426132, "learning_rate": 0.0005, "loss": 2.1336, "step": 36680 }, { "epoch": 0.13965119554212374, "grad_norm": 0.11416994780302048, "learning_rate": 0.0005, "loss": 2.1445, "step": 36690 }, { "epoch": 0.13968925801024643, "grad_norm": 0.11804043501615524, "learning_rate": 0.0005, "loss": 2.1417, "step": 36700 }, { "epoch": 0.1397273204783691, "grad_norm": 0.1268421709537506, "learning_rate": 0.0005, "loss": 2.1374, "step": 36710 }, { "epoch": 0.1397653829464918, "grad_norm": 0.1253766119480133, "learning_rate": 0.0005, "loss": 2.1523, "step": 36720 }, { "epoch": 0.13980344541461445, "grad_norm": 0.13076674938201904, "learning_rate": 0.0005, "loss": 2.1195, "step": 36730 }, { "epoch": 0.13984150788273714, "grad_norm": 0.12516288459300995, "learning_rate": 0.0005, "loss": 2.1436, "step": 36740 }, { "epoch": 0.13987957035085982, "grad_norm": 0.12267495691776276, "learning_rate": 0.0005, "loss": 2.1324, "step": 36750 }, { "epoch": 0.1399176328189825, "grad_norm": 0.13755877315998077, "learning_rate": 0.0005, "loss": 2.1384, "step": 36760 }, { "epoch": 0.1399556952871052, "grad_norm": 0.12464100122451782, "learning_rate": 0.0005, "loss": 2.1411, "step": 36770 }, { "epoch": 0.13999375775522788, "grad_norm": 0.11837789416313171, "learning_rate": 0.0005, "loss": 2.1304, "step": 36780 }, { "epoch": 0.14003182022335056, "grad_norm": 0.12519973516464233, "learning_rate": 0.0005, "loss": 2.1304, "step": 36790 }, { "epoch": 0.14006988269147325, "grad_norm": 0.12340795993804932, "learning_rate": 0.0005, "loss": 2.127, "step": 36800 }, { "epoch": 0.14010794515959593, "grad_norm": 0.12131775170564651, "learning_rate": 0.0005, "loss": 2.1436, "step": 36810 }, { "epoch": 0.14014600762771862, "grad_norm": 0.12783758342266083, "learning_rate": 0.0005, "loss": 2.1383, "step": 36820 }, { "epoch": 0.1401840700958413, "grad_norm": 0.11167051643133163, "learning_rate": 0.0005, "loss": 2.1393, "step": 36830 }, { "epoch": 0.14022213256396399, "grad_norm": 0.12533417344093323, "learning_rate": 0.0005, "loss": 2.1386, "step": 36840 }, { "epoch": 0.14026019503208667, "grad_norm": 0.13003957271575928, "learning_rate": 0.0005, "loss": 2.1316, "step": 36850 }, { "epoch": 0.14029825750020936, "grad_norm": 0.12811410427093506, "learning_rate": 0.0005, "loss": 2.1371, "step": 36860 }, { "epoch": 0.140336319968332, "grad_norm": 0.13217073678970337, "learning_rate": 0.0005, "loss": 2.1322, "step": 36870 }, { "epoch": 0.1403743824364547, "grad_norm": 0.12067825347185135, "learning_rate": 0.0005, "loss": 2.1445, "step": 36880 }, { "epoch": 0.14041244490457738, "grad_norm": 0.12828949093818665, "learning_rate": 0.0005, "loss": 2.1345, "step": 36890 }, { "epoch": 0.14045050737270007, "grad_norm": 0.11882949620485306, "learning_rate": 0.0005, "loss": 2.1277, "step": 36900 }, { "epoch": 0.14048856984082275, "grad_norm": 0.11670491844415665, "learning_rate": 0.0005, "loss": 2.1303, "step": 36910 }, { "epoch": 0.14052663230894544, "grad_norm": 0.12217088788747787, "learning_rate": 0.0005, "loss": 2.1343, "step": 36920 }, { "epoch": 0.14056469477706812, "grad_norm": 0.10978880524635315, "learning_rate": 0.0005, "loss": 2.1299, "step": 36930 }, { "epoch": 0.1406027572451908, "grad_norm": 0.11447262018918991, "learning_rate": 0.0005, "loss": 2.1323, "step": 36940 }, { "epoch": 0.1406408197133135, "grad_norm": 0.13392773270606995, "learning_rate": 0.0005, "loss": 2.1545, "step": 36950 }, { "epoch": 0.14067888218143618, "grad_norm": 0.1232706680893898, "learning_rate": 0.0005, "loss": 2.1505, "step": 36960 }, { "epoch": 0.14071694464955886, "grad_norm": 0.13241569697856903, "learning_rate": 0.0005, "loss": 2.1537, "step": 36970 }, { "epoch": 0.14075500711768155, "grad_norm": 0.11878272891044617, "learning_rate": 0.0005, "loss": 2.1344, "step": 36980 }, { "epoch": 0.14079306958580423, "grad_norm": 0.12405449151992798, "learning_rate": 0.0005, "loss": 2.1308, "step": 36990 }, { "epoch": 0.14083113205392692, "grad_norm": 0.11694375425577164, "learning_rate": 0.0005, "loss": 2.1409, "step": 37000 }, { "epoch": 0.1408691945220496, "grad_norm": 0.11805060505867004, "learning_rate": 0.0005, "loss": 2.1402, "step": 37010 }, { "epoch": 0.14090725699017226, "grad_norm": 0.1309378743171692, "learning_rate": 0.0005, "loss": 2.1357, "step": 37020 }, { "epoch": 0.14094531945829494, "grad_norm": 0.13261401653289795, "learning_rate": 0.0005, "loss": 2.1334, "step": 37030 }, { "epoch": 0.14098338192641763, "grad_norm": 0.14187407493591309, "learning_rate": 0.0005, "loss": 2.1309, "step": 37040 }, { "epoch": 0.1410214443945403, "grad_norm": 0.11595277488231659, "learning_rate": 0.0005, "loss": 2.1377, "step": 37050 }, { "epoch": 0.141059506862663, "grad_norm": 0.12343664467334747, "learning_rate": 0.0005, "loss": 2.1248, "step": 37060 }, { "epoch": 0.14109756933078568, "grad_norm": 0.11963911354541779, "learning_rate": 0.0005, "loss": 2.1328, "step": 37070 }, { "epoch": 0.14113563179890837, "grad_norm": 0.12053116410970688, "learning_rate": 0.0005, "loss": 2.1329, "step": 37080 }, { "epoch": 0.14117369426703105, "grad_norm": 0.1328931450843811, "learning_rate": 0.0005, "loss": 2.1263, "step": 37090 }, { "epoch": 0.14121175673515374, "grad_norm": 0.12884977459907532, "learning_rate": 0.0005, "loss": 2.1307, "step": 37100 }, { "epoch": 0.14124981920327642, "grad_norm": 0.12232869118452072, "learning_rate": 0.0005, "loss": 2.136, "step": 37110 }, { "epoch": 0.1412878816713991, "grad_norm": 0.12900561094284058, "learning_rate": 0.0005, "loss": 2.135, "step": 37120 }, { "epoch": 0.1413259441395218, "grad_norm": 0.12357156723737717, "learning_rate": 0.0005, "loss": 2.151, "step": 37130 }, { "epoch": 0.14136400660764448, "grad_norm": 0.11812389642000198, "learning_rate": 0.0005, "loss": 2.1435, "step": 37140 }, { "epoch": 0.14140206907576716, "grad_norm": 0.12773482501506805, "learning_rate": 0.0005, "loss": 2.137, "step": 37150 }, { "epoch": 0.14144013154388982, "grad_norm": 0.12107904255390167, "learning_rate": 0.0005, "loss": 2.1569, "step": 37160 }, { "epoch": 0.1414781940120125, "grad_norm": 0.12494766712188721, "learning_rate": 0.0005, "loss": 2.1522, "step": 37170 }, { "epoch": 0.1415162564801352, "grad_norm": 0.12003045529127121, "learning_rate": 0.0005, "loss": 2.1421, "step": 37180 }, { "epoch": 0.14155431894825787, "grad_norm": 0.1376497894525528, "learning_rate": 0.0005, "loss": 2.1364, "step": 37190 }, { "epoch": 0.14159238141638056, "grad_norm": 0.1384882777929306, "learning_rate": 0.0005, "loss": 2.1447, "step": 37200 }, { "epoch": 0.14163044388450324, "grad_norm": 0.13794715702533722, "learning_rate": 0.0005, "loss": 2.1324, "step": 37210 }, { "epoch": 0.14166850635262593, "grad_norm": 0.12299910932779312, "learning_rate": 0.0005, "loss": 2.1355, "step": 37220 }, { "epoch": 0.1417065688207486, "grad_norm": 0.12569528818130493, "learning_rate": 0.0005, "loss": 2.1408, "step": 37230 }, { "epoch": 0.1417446312888713, "grad_norm": 0.11445607244968414, "learning_rate": 0.0005, "loss": 2.1425, "step": 37240 }, { "epoch": 0.14178269375699398, "grad_norm": 0.13320550322532654, "learning_rate": 0.0005, "loss": 2.1273, "step": 37250 }, { "epoch": 0.14182075622511667, "grad_norm": 0.13344018161296844, "learning_rate": 0.0005, "loss": 2.1403, "step": 37260 }, { "epoch": 0.14185881869323935, "grad_norm": 0.12571915984153748, "learning_rate": 0.0005, "loss": 2.1272, "step": 37270 }, { "epoch": 0.14189688116136204, "grad_norm": 0.13246026635169983, "learning_rate": 0.0005, "loss": 2.1376, "step": 37280 }, { "epoch": 0.14193494362948472, "grad_norm": 0.1409611701965332, "learning_rate": 0.0005, "loss": 2.1255, "step": 37290 }, { "epoch": 0.1419730060976074, "grad_norm": 0.12814782559871674, "learning_rate": 0.0005, "loss": 2.1424, "step": 37300 }, { "epoch": 0.14201106856573006, "grad_norm": 0.14565905928611755, "learning_rate": 0.0005, "loss": 2.1173, "step": 37310 }, { "epoch": 0.14204913103385275, "grad_norm": 0.13787856698036194, "learning_rate": 0.0005, "loss": 2.1416, "step": 37320 }, { "epoch": 0.14208719350197543, "grad_norm": 0.11704063415527344, "learning_rate": 0.0005, "loss": 2.1364, "step": 37330 }, { "epoch": 0.14212525597009812, "grad_norm": 0.12095765024423599, "learning_rate": 0.0005, "loss": 2.1416, "step": 37340 }, { "epoch": 0.1421633184382208, "grad_norm": 0.13464663922786713, "learning_rate": 0.0005, "loss": 2.1374, "step": 37350 }, { "epoch": 0.1422013809063435, "grad_norm": 0.11635299772024155, "learning_rate": 0.0005, "loss": 2.1357, "step": 37360 }, { "epoch": 0.14223944337446617, "grad_norm": 0.12324802577495575, "learning_rate": 0.0005, "loss": 2.1258, "step": 37370 }, { "epoch": 0.14227750584258886, "grad_norm": 0.12421499192714691, "learning_rate": 0.0005, "loss": 2.1305, "step": 37380 }, { "epoch": 0.14231556831071154, "grad_norm": 0.1331142783164978, "learning_rate": 0.0005, "loss": 2.131, "step": 37390 }, { "epoch": 0.14235363077883423, "grad_norm": 0.1281598061323166, "learning_rate": 0.0005, "loss": 2.1496, "step": 37400 }, { "epoch": 0.1423916932469569, "grad_norm": 0.12264150381088257, "learning_rate": 0.0005, "loss": 2.1373, "step": 37410 }, { "epoch": 0.1424297557150796, "grad_norm": 0.11806928366422653, "learning_rate": 0.0005, "loss": 2.1467, "step": 37420 }, { "epoch": 0.14246781818320228, "grad_norm": 0.11630085110664368, "learning_rate": 0.0005, "loss": 2.1412, "step": 37430 }, { "epoch": 0.14250588065132497, "grad_norm": 0.1202859953045845, "learning_rate": 0.0005, "loss": 2.1285, "step": 37440 }, { "epoch": 0.14254394311944762, "grad_norm": 0.13352476060390472, "learning_rate": 0.0005, "loss": 2.1334, "step": 37450 }, { "epoch": 0.1425820055875703, "grad_norm": 0.12109289318323135, "learning_rate": 0.0005, "loss": 2.1373, "step": 37460 }, { "epoch": 0.142620068055693, "grad_norm": 0.1269426792860031, "learning_rate": 0.0005, "loss": 2.1137, "step": 37470 }, { "epoch": 0.14265813052381568, "grad_norm": 0.11681367456912994, "learning_rate": 0.0005, "loss": 2.1326, "step": 37480 }, { "epoch": 0.14269619299193836, "grad_norm": 0.1320292204618454, "learning_rate": 0.0005, "loss": 2.1422, "step": 37490 }, { "epoch": 0.14273425546006105, "grad_norm": 0.12286791950464249, "learning_rate": 0.0005, "loss": 2.1433, "step": 37500 }, { "epoch": 0.14277231792818373, "grad_norm": 0.11698734760284424, "learning_rate": 0.0005, "loss": 2.1318, "step": 37510 }, { "epoch": 0.14281038039630642, "grad_norm": 0.12439266592264175, "learning_rate": 0.0005, "loss": 2.1392, "step": 37520 }, { "epoch": 0.1428484428644291, "grad_norm": 0.11994090676307678, "learning_rate": 0.0005, "loss": 2.1442, "step": 37530 }, { "epoch": 0.1428865053325518, "grad_norm": 0.12152058631181717, "learning_rate": 0.0005, "loss": 2.1253, "step": 37540 }, { "epoch": 0.14292456780067447, "grad_norm": 0.13039402663707733, "learning_rate": 0.0005, "loss": 2.1473, "step": 37550 }, { "epoch": 0.14296263026879716, "grad_norm": 0.1288081258535385, "learning_rate": 0.0005, "loss": 2.1375, "step": 37560 }, { "epoch": 0.14300069273691984, "grad_norm": 0.11937260627746582, "learning_rate": 0.0005, "loss": 2.1505, "step": 37570 }, { "epoch": 0.14303875520504253, "grad_norm": 0.13222989439964294, "learning_rate": 0.0005, "loss": 2.1401, "step": 37580 }, { "epoch": 0.14307681767316519, "grad_norm": 0.1243140771985054, "learning_rate": 0.0005, "loss": 2.1505, "step": 37590 }, { "epoch": 0.14311488014128787, "grad_norm": 0.1320112645626068, "learning_rate": 0.0005, "loss": 2.1405, "step": 37600 }, { "epoch": 0.14315294260941056, "grad_norm": 0.12151151895523071, "learning_rate": 0.0005, "loss": 2.1259, "step": 37610 }, { "epoch": 0.14319100507753324, "grad_norm": 0.14852769672870636, "learning_rate": 0.0005, "loss": 2.1275, "step": 37620 }, { "epoch": 0.14322906754565592, "grad_norm": 0.12357402592897415, "learning_rate": 0.0005, "loss": 2.1293, "step": 37630 }, { "epoch": 0.1432671300137786, "grad_norm": 0.12055303901433945, "learning_rate": 0.0005, "loss": 2.1214, "step": 37640 }, { "epoch": 0.1433051924819013, "grad_norm": 0.15052442252635956, "learning_rate": 0.0005, "loss": 2.1305, "step": 37650 }, { "epoch": 0.14334325495002398, "grad_norm": 0.12786470353603363, "learning_rate": 0.0005, "loss": 2.1584, "step": 37660 }, { "epoch": 0.14338131741814666, "grad_norm": 0.13811320066452026, "learning_rate": 0.0005, "loss": 2.1459, "step": 37670 }, { "epoch": 0.14341937988626935, "grad_norm": 0.1383814960718155, "learning_rate": 0.0005, "loss": 2.147, "step": 37680 }, { "epoch": 0.14345744235439203, "grad_norm": 0.12483002990484238, "learning_rate": 0.0005, "loss": 2.1392, "step": 37690 }, { "epoch": 0.14349550482251472, "grad_norm": 0.11994721740484238, "learning_rate": 0.0005, "loss": 2.1406, "step": 37700 }, { "epoch": 0.1435335672906374, "grad_norm": 0.13180948793888092, "learning_rate": 0.0005, "loss": 2.1368, "step": 37710 }, { "epoch": 0.1435716297587601, "grad_norm": 0.12397965788841248, "learning_rate": 0.0005, "loss": 2.1463, "step": 37720 }, { "epoch": 0.14360969222688277, "grad_norm": 0.11854337155818939, "learning_rate": 0.0005, "loss": 2.1353, "step": 37730 }, { "epoch": 0.14364775469500543, "grad_norm": 0.11657878011465073, "learning_rate": 0.0005, "loss": 2.1349, "step": 37740 }, { "epoch": 0.14368581716312812, "grad_norm": 0.12216203659772873, "learning_rate": 0.0005, "loss": 2.1323, "step": 37750 }, { "epoch": 0.1437238796312508, "grad_norm": 0.1354895681142807, "learning_rate": 0.0005, "loss": 2.1379, "step": 37760 }, { "epoch": 0.14376194209937349, "grad_norm": 0.1245875358581543, "learning_rate": 0.0005, "loss": 2.1385, "step": 37770 }, { "epoch": 0.14380000456749617, "grad_norm": 0.11784937977790833, "learning_rate": 0.0005, "loss": 2.147, "step": 37780 }, { "epoch": 0.14383806703561886, "grad_norm": 0.1278153657913208, "learning_rate": 0.0005, "loss": 2.1477, "step": 37790 }, { "epoch": 0.14387612950374154, "grad_norm": 0.12693701684474945, "learning_rate": 0.0005, "loss": 2.1363, "step": 37800 }, { "epoch": 0.14391419197186422, "grad_norm": 0.13049623370170593, "learning_rate": 0.0005, "loss": 2.1308, "step": 37810 }, { "epoch": 0.1439522544399869, "grad_norm": 0.12316485494375229, "learning_rate": 0.0005, "loss": 2.1266, "step": 37820 }, { "epoch": 0.1439903169081096, "grad_norm": 0.1379222273826599, "learning_rate": 0.0005, "loss": 2.1312, "step": 37830 }, { "epoch": 0.14402837937623228, "grad_norm": 0.11370328068733215, "learning_rate": 0.0005, "loss": 2.1289, "step": 37840 }, { "epoch": 0.14406644184435496, "grad_norm": 0.1197470873594284, "learning_rate": 0.0005, "loss": 2.1371, "step": 37850 }, { "epoch": 0.14410450431247765, "grad_norm": 0.12998999655246735, "learning_rate": 0.0005, "loss": 2.1437, "step": 37860 }, { "epoch": 0.14414256678060033, "grad_norm": 0.13060982525348663, "learning_rate": 0.0005, "loss": 2.1204, "step": 37870 }, { "epoch": 0.144180629248723, "grad_norm": 0.11585245281457901, "learning_rate": 0.0005, "loss": 2.156, "step": 37880 }, { "epoch": 0.14421869171684568, "grad_norm": 0.12489481270313263, "learning_rate": 0.0005, "loss": 2.1352, "step": 37890 }, { "epoch": 0.14425675418496836, "grad_norm": 0.15002131462097168, "learning_rate": 0.0005, "loss": 2.1429, "step": 37900 }, { "epoch": 0.14429481665309105, "grad_norm": 0.12778665125370026, "learning_rate": 0.0005, "loss": 2.141, "step": 37910 }, { "epoch": 0.14433287912121373, "grad_norm": 0.12881921231746674, "learning_rate": 0.0005, "loss": 2.138, "step": 37920 }, { "epoch": 0.14437094158933642, "grad_norm": 0.11601099371910095, "learning_rate": 0.0005, "loss": 2.1302, "step": 37930 }, { "epoch": 0.1444090040574591, "grad_norm": 0.12787161767482758, "learning_rate": 0.0005, "loss": 2.1414, "step": 37940 }, { "epoch": 0.14444706652558179, "grad_norm": 0.12424326688051224, "learning_rate": 0.0005, "loss": 2.1434, "step": 37950 }, { "epoch": 0.14448512899370447, "grad_norm": 0.12463422119617462, "learning_rate": 0.0005, "loss": 2.138, "step": 37960 }, { "epoch": 0.14452319146182716, "grad_norm": 0.11958973109722137, "learning_rate": 0.0005, "loss": 2.1286, "step": 37970 }, { "epoch": 0.14456125392994984, "grad_norm": 0.12186741083860397, "learning_rate": 0.0005, "loss": 2.1333, "step": 37980 }, { "epoch": 0.14459931639807252, "grad_norm": 0.1192806214094162, "learning_rate": 0.0005, "loss": 2.1344, "step": 37990 }, { "epoch": 0.1446373788661952, "grad_norm": 0.11857765913009644, "learning_rate": 0.0005, "loss": 2.1294, "step": 38000 }, { "epoch": 0.1446754413343179, "grad_norm": 0.1343282014131546, "learning_rate": 0.0005, "loss": 2.1386, "step": 38010 }, { "epoch": 0.14471350380244055, "grad_norm": 0.13186195492744446, "learning_rate": 0.0005, "loss": 2.1351, "step": 38020 }, { "epoch": 0.14475156627056324, "grad_norm": 0.12335959076881409, "learning_rate": 0.0005, "loss": 2.1421, "step": 38030 }, { "epoch": 0.14478962873868592, "grad_norm": 0.1687193065881729, "learning_rate": 0.0005, "loss": 2.144, "step": 38040 }, { "epoch": 0.1448276912068086, "grad_norm": 0.14608801901340485, "learning_rate": 0.0005, "loss": 2.1235, "step": 38050 }, { "epoch": 0.1448657536749313, "grad_norm": 0.13224822282791138, "learning_rate": 0.0005, "loss": 2.1442, "step": 38060 }, { "epoch": 0.14490381614305398, "grad_norm": 0.12992419302463531, "learning_rate": 0.0005, "loss": 2.1372, "step": 38070 }, { "epoch": 0.14494187861117666, "grad_norm": 0.1225738450884819, "learning_rate": 0.0005, "loss": 2.1513, "step": 38080 }, { "epoch": 0.14497994107929935, "grad_norm": 0.12545767426490784, "learning_rate": 0.0005, "loss": 2.1311, "step": 38090 }, { "epoch": 0.14501800354742203, "grad_norm": 0.12390364706516266, "learning_rate": 0.0005, "loss": 2.1436, "step": 38100 }, { "epoch": 0.14505606601554472, "grad_norm": 0.11716514825820923, "learning_rate": 0.0005, "loss": 2.1187, "step": 38110 }, { "epoch": 0.1450941284836674, "grad_norm": 0.1196618601679802, "learning_rate": 0.0005, "loss": 2.1305, "step": 38120 }, { "epoch": 0.14513219095179009, "grad_norm": 0.11519220471382141, "learning_rate": 0.0005, "loss": 2.1428, "step": 38130 }, { "epoch": 0.14517025341991277, "grad_norm": 0.1359359472990036, "learning_rate": 0.0005, "loss": 2.1345, "step": 38140 }, { "epoch": 0.14520831588803546, "grad_norm": 0.132180318236351, "learning_rate": 0.0005, "loss": 2.1471, "step": 38150 }, { "epoch": 0.14524637835615814, "grad_norm": 0.13018040359020233, "learning_rate": 0.0005, "loss": 2.1326, "step": 38160 }, { "epoch": 0.1452844408242808, "grad_norm": 0.12225526571273804, "learning_rate": 0.0005, "loss": 2.1331, "step": 38170 }, { "epoch": 0.14532250329240348, "grad_norm": 0.11801525950431824, "learning_rate": 0.0005, "loss": 2.1241, "step": 38180 }, { "epoch": 0.14536056576052617, "grad_norm": 0.11402857303619385, "learning_rate": 0.0005, "loss": 2.135, "step": 38190 }, { "epoch": 0.14539862822864885, "grad_norm": 0.11925767362117767, "learning_rate": 0.0005, "loss": 2.1419, "step": 38200 }, { "epoch": 0.14543669069677154, "grad_norm": 0.11742289364337921, "learning_rate": 0.0005, "loss": 2.1367, "step": 38210 }, { "epoch": 0.14547475316489422, "grad_norm": 0.11711934953927994, "learning_rate": 0.0005, "loss": 2.1315, "step": 38220 }, { "epoch": 0.1455128156330169, "grad_norm": 0.10924158990383148, "learning_rate": 0.0005, "loss": 2.1264, "step": 38230 }, { "epoch": 0.1455508781011396, "grad_norm": 0.12829233705997467, "learning_rate": 0.0005, "loss": 2.1121, "step": 38240 }, { "epoch": 0.14558894056926228, "grad_norm": 0.10900772362947464, "learning_rate": 0.0005, "loss": 2.1341, "step": 38250 }, { "epoch": 0.14562700303738496, "grad_norm": 0.1258832812309265, "learning_rate": 0.0005, "loss": 2.1317, "step": 38260 }, { "epoch": 0.14566506550550765, "grad_norm": 0.11658526957035065, "learning_rate": 0.0005, "loss": 2.1314, "step": 38270 }, { "epoch": 0.14570312797363033, "grad_norm": 0.1278267353773117, "learning_rate": 0.0005, "loss": 2.13, "step": 38280 }, { "epoch": 0.14574119044175302, "grad_norm": 0.1270705610513687, "learning_rate": 0.0005, "loss": 2.127, "step": 38290 }, { "epoch": 0.1457792529098757, "grad_norm": 0.11987922340631485, "learning_rate": 0.0005, "loss": 2.1322, "step": 38300 }, { "epoch": 0.14581731537799836, "grad_norm": 0.12335024774074554, "learning_rate": 0.0005, "loss": 2.1369, "step": 38310 }, { "epoch": 0.14585537784612104, "grad_norm": 0.12798041105270386, "learning_rate": 0.0005, "loss": 2.1316, "step": 38320 }, { "epoch": 0.14589344031424373, "grad_norm": 0.13565818965435028, "learning_rate": 0.0005, "loss": 2.1302, "step": 38330 }, { "epoch": 0.1459315027823664, "grad_norm": 0.13244055211544037, "learning_rate": 0.0005, "loss": 2.132, "step": 38340 }, { "epoch": 0.1459695652504891, "grad_norm": 0.13553233444690704, "learning_rate": 0.0005, "loss": 2.1231, "step": 38350 }, { "epoch": 0.14600762771861178, "grad_norm": 0.1146652102470398, "learning_rate": 0.0005, "loss": 2.135, "step": 38360 }, { "epoch": 0.14604569018673447, "grad_norm": 0.13296912610530853, "learning_rate": 0.0005, "loss": 2.1383, "step": 38370 }, { "epoch": 0.14608375265485715, "grad_norm": 0.13272130489349365, "learning_rate": 0.0005, "loss": 2.1396, "step": 38380 }, { "epoch": 0.14612181512297984, "grad_norm": 0.12927483022212982, "learning_rate": 0.0005, "loss": 2.1303, "step": 38390 }, { "epoch": 0.14615987759110252, "grad_norm": 0.12845762073993683, "learning_rate": 0.0005, "loss": 2.128, "step": 38400 }, { "epoch": 0.1461979400592252, "grad_norm": 0.11350326985120773, "learning_rate": 0.0005, "loss": 2.1509, "step": 38410 }, { "epoch": 0.1462360025273479, "grad_norm": 0.11804868280887604, "learning_rate": 0.0005, "loss": 2.1515, "step": 38420 }, { "epoch": 0.14627406499547058, "grad_norm": 0.127664253115654, "learning_rate": 0.0005, "loss": 2.1232, "step": 38430 }, { "epoch": 0.14631212746359326, "grad_norm": 0.13080111145973206, "learning_rate": 0.0005, "loss": 2.1308, "step": 38440 }, { "epoch": 0.14635018993171595, "grad_norm": 0.12366043031215668, "learning_rate": 0.0005, "loss": 2.1261, "step": 38450 }, { "epoch": 0.1463882523998386, "grad_norm": 0.1258617490530014, "learning_rate": 0.0005, "loss": 2.1382, "step": 38460 }, { "epoch": 0.1464263148679613, "grad_norm": 0.12624163925647736, "learning_rate": 0.0005, "loss": 2.1375, "step": 38470 }, { "epoch": 0.14646437733608397, "grad_norm": 0.11812226474285126, "learning_rate": 0.0005, "loss": 2.126, "step": 38480 }, { "epoch": 0.14650243980420666, "grad_norm": 0.12122759968042374, "learning_rate": 0.0005, "loss": 2.1206, "step": 38490 }, { "epoch": 0.14654050227232934, "grad_norm": 0.12794890999794006, "learning_rate": 0.0005, "loss": 2.1215, "step": 38500 }, { "epoch": 0.14657856474045203, "grad_norm": 0.1506595015525818, "learning_rate": 0.0005, "loss": 2.1389, "step": 38510 }, { "epoch": 0.1466166272085747, "grad_norm": 0.20770882070064545, "learning_rate": 0.0005, "loss": 2.1371, "step": 38520 }, { "epoch": 0.1466546896766974, "grad_norm": 0.1322670727968216, "learning_rate": 0.0005, "loss": 2.1506, "step": 38530 }, { "epoch": 0.14669275214482008, "grad_norm": 0.13230635225772858, "learning_rate": 0.0005, "loss": 2.1478, "step": 38540 }, { "epoch": 0.14673081461294277, "grad_norm": 0.12269840389490128, "learning_rate": 0.0005, "loss": 2.1231, "step": 38550 }, { "epoch": 0.14676887708106545, "grad_norm": 0.12784303724765778, "learning_rate": 0.0005, "loss": 2.1128, "step": 38560 }, { "epoch": 0.14680693954918814, "grad_norm": 0.12518112361431122, "learning_rate": 0.0005, "loss": 2.1443, "step": 38570 }, { "epoch": 0.14684500201731082, "grad_norm": 0.11033365875482559, "learning_rate": 0.0005, "loss": 2.1358, "step": 38580 }, { "epoch": 0.1468830644854335, "grad_norm": 0.11702969670295715, "learning_rate": 0.0005, "loss": 2.1333, "step": 38590 }, { "epoch": 0.14692112695355616, "grad_norm": 0.1252330094575882, "learning_rate": 0.0005, "loss": 2.1154, "step": 38600 }, { "epoch": 0.14695918942167885, "grad_norm": 0.12296465784311295, "learning_rate": 0.0005, "loss": 2.1434, "step": 38610 }, { "epoch": 0.14699725188980153, "grad_norm": 0.1148548498749733, "learning_rate": 0.0005, "loss": 2.1389, "step": 38620 }, { "epoch": 0.14703531435792422, "grad_norm": 0.13031576573848724, "learning_rate": 0.0005, "loss": 2.1438, "step": 38630 }, { "epoch": 0.1470733768260469, "grad_norm": 0.11590581387281418, "learning_rate": 0.0005, "loss": 2.1437, "step": 38640 }, { "epoch": 0.1471114392941696, "grad_norm": 0.12280098348855972, "learning_rate": 0.0005, "loss": 2.1543, "step": 38650 }, { "epoch": 0.14714950176229227, "grad_norm": 0.1293204426765442, "learning_rate": 0.0005, "loss": 2.1311, "step": 38660 }, { "epoch": 0.14718756423041496, "grad_norm": 0.12075809389352798, "learning_rate": 0.0005, "loss": 2.1297, "step": 38670 }, { "epoch": 0.14722562669853764, "grad_norm": 0.13208520412445068, "learning_rate": 0.0005, "loss": 2.1525, "step": 38680 }, { "epoch": 0.14726368916666033, "grad_norm": 0.138469398021698, "learning_rate": 0.0005, "loss": 2.1289, "step": 38690 }, { "epoch": 0.147301751634783, "grad_norm": 0.12534962594509125, "learning_rate": 0.0005, "loss": 2.15, "step": 38700 }, { "epoch": 0.1473398141029057, "grad_norm": 0.1162012442946434, "learning_rate": 0.0005, "loss": 2.1442, "step": 38710 }, { "epoch": 0.14737787657102838, "grad_norm": 0.1286344975233078, "learning_rate": 0.0005, "loss": 2.1337, "step": 38720 }, { "epoch": 0.14741593903915107, "grad_norm": 0.12214122712612152, "learning_rate": 0.0005, "loss": 2.1417, "step": 38730 }, { "epoch": 0.14745400150727372, "grad_norm": 0.12790422141551971, "learning_rate": 0.0005, "loss": 2.128, "step": 38740 }, { "epoch": 0.1474920639753964, "grad_norm": 0.13529656827449799, "learning_rate": 0.0005, "loss": 2.1418, "step": 38750 }, { "epoch": 0.1475301264435191, "grad_norm": 0.1380883902311325, "learning_rate": 0.0005, "loss": 2.1467, "step": 38760 }, { "epoch": 0.14756818891164178, "grad_norm": 0.12100613117218018, "learning_rate": 0.0005, "loss": 2.1384, "step": 38770 }, { "epoch": 0.14760625137976446, "grad_norm": 0.13053341209888458, "learning_rate": 0.0005, "loss": 2.1233, "step": 38780 }, { "epoch": 0.14764431384788715, "grad_norm": 0.1276913583278656, "learning_rate": 0.0005, "loss": 2.1411, "step": 38790 }, { "epoch": 0.14768237631600983, "grad_norm": 0.12804895639419556, "learning_rate": 0.0005, "loss": 2.1486, "step": 38800 }, { "epoch": 0.14772043878413252, "grad_norm": 0.1234828382730484, "learning_rate": 0.0005, "loss": 2.1183, "step": 38810 }, { "epoch": 0.1477585012522552, "grad_norm": 0.10741379112005234, "learning_rate": 0.0005, "loss": 2.1303, "step": 38820 }, { "epoch": 0.1477965637203779, "grad_norm": 0.13724245131015778, "learning_rate": 0.0005, "loss": 2.1372, "step": 38830 }, { "epoch": 0.14783462618850057, "grad_norm": 0.11840417236089706, "learning_rate": 0.0005, "loss": 2.1373, "step": 38840 }, { "epoch": 0.14787268865662326, "grad_norm": 0.13324248790740967, "learning_rate": 0.0005, "loss": 2.146, "step": 38850 }, { "epoch": 0.14791075112474594, "grad_norm": 0.12486392259597778, "learning_rate": 0.0005, "loss": 2.1525, "step": 38860 }, { "epoch": 0.14794881359286863, "grad_norm": 0.13672782480716705, "learning_rate": 0.0005, "loss": 2.1463, "step": 38870 }, { "epoch": 0.1479868760609913, "grad_norm": 0.11927548050880432, "learning_rate": 0.0005, "loss": 2.1312, "step": 38880 }, { "epoch": 0.14802493852911397, "grad_norm": 0.1426343470811844, "learning_rate": 0.0005, "loss": 2.1316, "step": 38890 }, { "epoch": 0.14806300099723665, "grad_norm": 0.1276564598083496, "learning_rate": 0.0005, "loss": 2.1352, "step": 38900 }, { "epoch": 0.14810106346535934, "grad_norm": 0.11925414949655533, "learning_rate": 0.0005, "loss": 2.1152, "step": 38910 }, { "epoch": 0.14813912593348202, "grad_norm": 0.13122230768203735, "learning_rate": 0.0005, "loss": 2.1194, "step": 38920 }, { "epoch": 0.1481771884016047, "grad_norm": 0.12605653703212738, "learning_rate": 0.0005, "loss": 2.1364, "step": 38930 }, { "epoch": 0.1482152508697274, "grad_norm": 0.12071428447961807, "learning_rate": 0.0005, "loss": 2.1287, "step": 38940 }, { "epoch": 0.14825331333785008, "grad_norm": 0.14357160031795502, "learning_rate": 0.0005, "loss": 2.1349, "step": 38950 }, { "epoch": 0.14829137580597276, "grad_norm": 0.118900828063488, "learning_rate": 0.0005, "loss": 2.1448, "step": 38960 }, { "epoch": 0.14832943827409545, "grad_norm": 0.1205672100186348, "learning_rate": 0.0005, "loss": 2.128, "step": 38970 }, { "epoch": 0.14836750074221813, "grad_norm": 0.11521317064762115, "learning_rate": 0.0005, "loss": 2.1313, "step": 38980 }, { "epoch": 0.14840556321034082, "grad_norm": 0.13125668466091156, "learning_rate": 0.0005, "loss": 2.1446, "step": 38990 }, { "epoch": 0.1484436256784635, "grad_norm": 0.11577267944812775, "learning_rate": 0.0005, "loss": 2.1332, "step": 39000 }, { "epoch": 0.1484816881465862, "grad_norm": 0.13303667306900024, "learning_rate": 0.0005, "loss": 2.1393, "step": 39010 }, { "epoch": 0.14851975061470887, "grad_norm": 0.12673087418079376, "learning_rate": 0.0005, "loss": 2.1323, "step": 39020 }, { "epoch": 0.14855781308283153, "grad_norm": 0.12567567825317383, "learning_rate": 0.0005, "loss": 2.1322, "step": 39030 }, { "epoch": 0.14859587555095422, "grad_norm": 0.1653776913881302, "learning_rate": 0.0005, "loss": 2.1335, "step": 39040 }, { "epoch": 0.1486339380190769, "grad_norm": 0.12336651980876923, "learning_rate": 0.0005, "loss": 2.1511, "step": 39050 }, { "epoch": 0.14867200048719958, "grad_norm": 0.1362055391073227, "learning_rate": 0.0005, "loss": 2.1399, "step": 39060 }, { "epoch": 0.14871006295532227, "grad_norm": 0.12506848573684692, "learning_rate": 0.0005, "loss": 2.1334, "step": 39070 }, { "epoch": 0.14874812542344495, "grad_norm": 0.1321392059326172, "learning_rate": 0.0005, "loss": 2.1157, "step": 39080 }, { "epoch": 0.14878618789156764, "grad_norm": 0.12654539942741394, "learning_rate": 0.0005, "loss": 2.1156, "step": 39090 }, { "epoch": 0.14882425035969032, "grad_norm": 0.13101084530353546, "learning_rate": 0.0005, "loss": 2.142, "step": 39100 }, { "epoch": 0.148862312827813, "grad_norm": 0.1241462379693985, "learning_rate": 0.0005, "loss": 2.1391, "step": 39110 }, { "epoch": 0.1489003752959357, "grad_norm": 0.1250251829624176, "learning_rate": 0.0005, "loss": 2.138, "step": 39120 }, { "epoch": 0.14893843776405838, "grad_norm": 0.11190465837717056, "learning_rate": 0.0005, "loss": 2.1457, "step": 39130 }, { "epoch": 0.14897650023218106, "grad_norm": 0.11848768591880798, "learning_rate": 0.0005, "loss": 2.1254, "step": 39140 }, { "epoch": 0.14901456270030375, "grad_norm": 0.1262151002883911, "learning_rate": 0.0005, "loss": 2.1416, "step": 39150 }, { "epoch": 0.14905262516842643, "grad_norm": 0.12781231105327606, "learning_rate": 0.0005, "loss": 2.1337, "step": 39160 }, { "epoch": 0.1490906876365491, "grad_norm": 0.12659291923046112, "learning_rate": 0.0005, "loss": 2.135, "step": 39170 }, { "epoch": 0.14912875010467178, "grad_norm": 0.11960427463054657, "learning_rate": 0.0005, "loss": 2.1381, "step": 39180 }, { "epoch": 0.14916681257279446, "grad_norm": 0.12238552421331406, "learning_rate": 0.0005, "loss": 2.1443, "step": 39190 }, { "epoch": 0.14920487504091715, "grad_norm": 0.12759989500045776, "learning_rate": 0.0005, "loss": 2.139, "step": 39200 }, { "epoch": 0.14924293750903983, "grad_norm": 0.13447055220603943, "learning_rate": 0.0005, "loss": 2.1408, "step": 39210 }, { "epoch": 0.14928099997716252, "grad_norm": 0.13414421677589417, "learning_rate": 0.0005, "loss": 2.143, "step": 39220 }, { "epoch": 0.1493190624452852, "grad_norm": 0.12294954806566238, "learning_rate": 0.0005, "loss": 2.1301, "step": 39230 }, { "epoch": 0.14935712491340788, "grad_norm": 0.12728038430213928, "learning_rate": 0.0005, "loss": 2.124, "step": 39240 }, { "epoch": 0.14939518738153057, "grad_norm": 0.12434787303209305, "learning_rate": 0.0005, "loss": 2.1499, "step": 39250 }, { "epoch": 0.14943324984965325, "grad_norm": 0.11438928544521332, "learning_rate": 0.0005, "loss": 2.1326, "step": 39260 }, { "epoch": 0.14947131231777594, "grad_norm": 0.11270195990800858, "learning_rate": 0.0005, "loss": 2.1345, "step": 39270 }, { "epoch": 0.14950937478589862, "grad_norm": 0.14034071564674377, "learning_rate": 0.0005, "loss": 2.1342, "step": 39280 }, { "epoch": 0.1495474372540213, "grad_norm": 0.12831617891788483, "learning_rate": 0.0005, "loss": 2.1284, "step": 39290 }, { "epoch": 0.149585499722144, "grad_norm": 0.1376051902770996, "learning_rate": 0.0005, "loss": 2.1373, "step": 39300 }, { "epoch": 0.14962356219026668, "grad_norm": 0.11683588474988937, "learning_rate": 0.0005, "loss": 2.1346, "step": 39310 }, { "epoch": 0.14966162465838934, "grad_norm": 0.11693572252988815, "learning_rate": 0.0005, "loss": 2.1277, "step": 39320 }, { "epoch": 0.14969968712651202, "grad_norm": 0.11618661880493164, "learning_rate": 0.0005, "loss": 2.1388, "step": 39330 }, { "epoch": 0.1497377495946347, "grad_norm": 0.13750776648521423, "learning_rate": 0.0005, "loss": 2.137, "step": 39340 }, { "epoch": 0.1497758120627574, "grad_norm": 0.128860741853714, "learning_rate": 0.0005, "loss": 2.1279, "step": 39350 }, { "epoch": 0.14981387453088008, "grad_norm": 0.11993896961212158, "learning_rate": 0.0005, "loss": 2.1162, "step": 39360 }, { "epoch": 0.14985193699900276, "grad_norm": 0.12513276934623718, "learning_rate": 0.0005, "loss": 2.132, "step": 39370 }, { "epoch": 0.14988999946712545, "grad_norm": 0.12072756141424179, "learning_rate": 0.0005, "loss": 2.1339, "step": 39380 }, { "epoch": 0.14992806193524813, "grad_norm": 0.11543644219636917, "learning_rate": 0.0005, "loss": 2.1415, "step": 39390 }, { "epoch": 0.14996612440337082, "grad_norm": 0.12288960069417953, "learning_rate": 0.0005, "loss": 2.135, "step": 39400 }, { "epoch": 0.1500041868714935, "grad_norm": 0.13772891461849213, "learning_rate": 0.0005, "loss": 2.1406, "step": 39410 }, { "epoch": 0.15004224933961618, "grad_norm": 0.14142270386219025, "learning_rate": 0.0005, "loss": 2.1383, "step": 39420 }, { "epoch": 0.15008031180773887, "grad_norm": 0.13073478639125824, "learning_rate": 0.0005, "loss": 2.1435, "step": 39430 }, { "epoch": 0.15011837427586155, "grad_norm": 0.13670803606510162, "learning_rate": 0.0005, "loss": 2.1232, "step": 39440 }, { "epoch": 0.15015643674398424, "grad_norm": 0.1232762262225151, "learning_rate": 0.0005, "loss": 2.1254, "step": 39450 }, { "epoch": 0.1501944992121069, "grad_norm": 0.11498036235570908, "learning_rate": 0.0005, "loss": 2.1293, "step": 39460 }, { "epoch": 0.15023256168022958, "grad_norm": 0.1252405345439911, "learning_rate": 0.0005, "loss": 2.1429, "step": 39470 }, { "epoch": 0.15027062414835227, "grad_norm": 0.11288294196128845, "learning_rate": 0.0005, "loss": 2.1412, "step": 39480 }, { "epoch": 0.15030868661647495, "grad_norm": 0.12291713058948517, "learning_rate": 0.0005, "loss": 2.1325, "step": 39490 }, { "epoch": 0.15034674908459764, "grad_norm": 0.12331737577915192, "learning_rate": 0.0005, "loss": 2.1429, "step": 39500 }, { "epoch": 0.15038481155272032, "grad_norm": 0.11889985203742981, "learning_rate": 0.0005, "loss": 2.1304, "step": 39510 }, { "epoch": 0.150422874020843, "grad_norm": 0.1344471573829651, "learning_rate": 0.0005, "loss": 2.1333, "step": 39520 }, { "epoch": 0.1504609364889657, "grad_norm": 0.13127154111862183, "learning_rate": 0.0005, "loss": 2.1367, "step": 39530 }, { "epoch": 0.15049899895708838, "grad_norm": 0.13445231318473816, "learning_rate": 0.0005, "loss": 2.1347, "step": 39540 }, { "epoch": 0.15053706142521106, "grad_norm": 0.12982484698295593, "learning_rate": 0.0005, "loss": 2.1247, "step": 39550 }, { "epoch": 0.15057512389333375, "grad_norm": 0.12241829931735992, "learning_rate": 0.0005, "loss": 2.1315, "step": 39560 }, { "epoch": 0.15061318636145643, "grad_norm": 0.12182803452014923, "learning_rate": 0.0005, "loss": 2.139, "step": 39570 }, { "epoch": 0.15065124882957912, "grad_norm": 0.12285663187503815, "learning_rate": 0.0005, "loss": 2.1211, "step": 39580 }, { "epoch": 0.1506893112977018, "grad_norm": 0.12623947858810425, "learning_rate": 0.0005, "loss": 2.1293, "step": 39590 }, { "epoch": 0.15072737376582448, "grad_norm": 0.126447856426239, "learning_rate": 0.0005, "loss": 2.1469, "step": 39600 }, { "epoch": 0.15076543623394714, "grad_norm": 0.11390262097120285, "learning_rate": 0.0005, "loss": 2.1264, "step": 39610 }, { "epoch": 0.15080349870206983, "grad_norm": 0.1311657577753067, "learning_rate": 0.0005, "loss": 2.1228, "step": 39620 }, { "epoch": 0.1508415611701925, "grad_norm": 0.12902265787124634, "learning_rate": 0.0005, "loss": 2.1248, "step": 39630 }, { "epoch": 0.1508796236383152, "grad_norm": 0.11394213885068893, "learning_rate": 0.0005, "loss": 2.1301, "step": 39640 }, { "epoch": 0.15091768610643788, "grad_norm": 0.12555819749832153, "learning_rate": 0.0005, "loss": 2.1282, "step": 39650 }, { "epoch": 0.15095574857456057, "grad_norm": 0.12388810515403748, "learning_rate": 0.0005, "loss": 2.1405, "step": 39660 }, { "epoch": 0.15099381104268325, "grad_norm": 0.13326005637645721, "learning_rate": 0.0005, "loss": 2.1397, "step": 39670 }, { "epoch": 0.15103187351080594, "grad_norm": 0.1274305284023285, "learning_rate": 0.0005, "loss": 2.1314, "step": 39680 }, { "epoch": 0.15106993597892862, "grad_norm": 0.12226250767707825, "learning_rate": 0.0005, "loss": 2.1425, "step": 39690 }, { "epoch": 0.1511079984470513, "grad_norm": 0.14611788094043732, "learning_rate": 0.0005, "loss": 2.1318, "step": 39700 }, { "epoch": 0.151146060915174, "grad_norm": 0.13780009746551514, "learning_rate": 0.0005, "loss": 2.1406, "step": 39710 }, { "epoch": 0.15118412338329668, "grad_norm": 0.1312699317932129, "learning_rate": 0.0005, "loss": 2.1291, "step": 39720 }, { "epoch": 0.15122218585141936, "grad_norm": 0.1251794397830963, "learning_rate": 0.0005, "loss": 2.1274, "step": 39730 }, { "epoch": 0.15126024831954205, "grad_norm": 0.11780116707086563, "learning_rate": 0.0005, "loss": 2.1426, "step": 39740 }, { "epoch": 0.1512983107876647, "grad_norm": 0.1149701476097107, "learning_rate": 0.0005, "loss": 2.1355, "step": 39750 }, { "epoch": 0.1513363732557874, "grad_norm": 0.13181151449680328, "learning_rate": 0.0005, "loss": 2.1472, "step": 39760 }, { "epoch": 0.15137443572391007, "grad_norm": 0.14071857929229736, "learning_rate": 0.0005, "loss": 2.1413, "step": 39770 }, { "epoch": 0.15141249819203276, "grad_norm": 0.12593907117843628, "learning_rate": 0.0005, "loss": 2.1336, "step": 39780 }, { "epoch": 0.15145056066015544, "grad_norm": 0.13257673382759094, "learning_rate": 0.0005, "loss": 2.141, "step": 39790 }, { "epoch": 0.15148862312827813, "grad_norm": 0.12204756587743759, "learning_rate": 0.0005, "loss": 2.1429, "step": 39800 }, { "epoch": 0.1515266855964008, "grad_norm": 0.15629100799560547, "learning_rate": 0.0005, "loss": 2.1429, "step": 39810 }, { "epoch": 0.1515647480645235, "grad_norm": 0.13981211185455322, "learning_rate": 0.0005, "loss": 2.1264, "step": 39820 }, { "epoch": 0.15160281053264618, "grad_norm": 0.11483721435070038, "learning_rate": 0.0005, "loss": 2.1261, "step": 39830 }, { "epoch": 0.15164087300076887, "grad_norm": 0.11665428429841995, "learning_rate": 0.0005, "loss": 2.1342, "step": 39840 }, { "epoch": 0.15167893546889155, "grad_norm": 0.11376866698265076, "learning_rate": 0.0005, "loss": 2.1461, "step": 39850 }, { "epoch": 0.15171699793701424, "grad_norm": 0.12335699051618576, "learning_rate": 0.0005, "loss": 2.1308, "step": 39860 }, { "epoch": 0.15175506040513692, "grad_norm": 0.13599348068237305, "learning_rate": 0.0005, "loss": 2.1479, "step": 39870 }, { "epoch": 0.1517931228732596, "grad_norm": 0.11808918416500092, "learning_rate": 0.0005, "loss": 2.1443, "step": 39880 }, { "epoch": 0.15183118534138226, "grad_norm": 0.1326199173927307, "learning_rate": 0.0005, "loss": 2.1335, "step": 39890 }, { "epoch": 0.15186924780950495, "grad_norm": 0.1278129518032074, "learning_rate": 0.0005, "loss": 2.1327, "step": 39900 }, { "epoch": 0.15190731027762763, "grad_norm": 0.12414394319057465, "learning_rate": 0.0005, "loss": 2.1505, "step": 39910 }, { "epoch": 0.15194537274575032, "grad_norm": 0.14067183434963226, "learning_rate": 0.0005, "loss": 2.1294, "step": 39920 }, { "epoch": 0.151983435213873, "grad_norm": 0.13897956907749176, "learning_rate": 0.0005, "loss": 2.1325, "step": 39930 }, { "epoch": 0.1520214976819957, "grad_norm": 0.12281318753957748, "learning_rate": 0.0005, "loss": 2.1302, "step": 39940 }, { "epoch": 0.15205956015011837, "grad_norm": 0.11802121996879578, "learning_rate": 0.0005, "loss": 2.1195, "step": 39950 }, { "epoch": 0.15209762261824106, "grad_norm": 0.14475691318511963, "learning_rate": 0.0005, "loss": 2.1357, "step": 39960 }, { "epoch": 0.15213568508636374, "grad_norm": 0.1253284513950348, "learning_rate": 0.0005, "loss": 2.151, "step": 39970 }, { "epoch": 0.15217374755448643, "grad_norm": 0.12120725214481354, "learning_rate": 0.0005, "loss": 2.1443, "step": 39980 }, { "epoch": 0.1522118100226091, "grad_norm": 0.12783914804458618, "learning_rate": 0.0005, "loss": 2.146, "step": 39990 }, { "epoch": 0.1522498724907318, "grad_norm": 0.11306191235780716, "learning_rate": 0.0005, "loss": 2.1448, "step": 40000 }, { "epoch": 0.15228793495885448, "grad_norm": 0.12707388401031494, "learning_rate": 0.0005, "loss": 2.128, "step": 40010 }, { "epoch": 0.15232599742697717, "grad_norm": 0.11756369471549988, "learning_rate": 0.0005, "loss": 2.1243, "step": 40020 }, { "epoch": 0.15236405989509985, "grad_norm": 0.141278937458992, "learning_rate": 0.0005, "loss": 2.1477, "step": 40030 }, { "epoch": 0.1524021223632225, "grad_norm": 0.12917031347751617, "learning_rate": 0.0005, "loss": 2.1303, "step": 40040 }, { "epoch": 0.1524401848313452, "grad_norm": 0.1284797489643097, "learning_rate": 0.0005, "loss": 2.1343, "step": 40050 }, { "epoch": 0.15247824729946788, "grad_norm": 0.1224033460021019, "learning_rate": 0.0005, "loss": 2.1427, "step": 40060 }, { "epoch": 0.15251630976759056, "grad_norm": 0.12608686089515686, "learning_rate": 0.0005, "loss": 2.1407, "step": 40070 }, { "epoch": 0.15255437223571325, "grad_norm": 0.15755626559257507, "learning_rate": 0.0005, "loss": 2.1297, "step": 40080 }, { "epoch": 0.15259243470383593, "grad_norm": 0.1173790842294693, "learning_rate": 0.0005, "loss": 2.1602, "step": 40090 }, { "epoch": 0.15263049717195862, "grad_norm": 0.12344210594892502, "learning_rate": 0.0005, "loss": 2.134, "step": 40100 }, { "epoch": 0.1526685596400813, "grad_norm": 0.12379170209169388, "learning_rate": 0.0005, "loss": 2.1242, "step": 40110 }, { "epoch": 0.152706622108204, "grad_norm": 0.13954222202301025, "learning_rate": 0.0005, "loss": 2.1294, "step": 40120 }, { "epoch": 0.15274468457632667, "grad_norm": 0.12113303691148758, "learning_rate": 0.0005, "loss": 2.1259, "step": 40130 }, { "epoch": 0.15278274704444936, "grad_norm": 0.12871098518371582, "learning_rate": 0.0005, "loss": 2.1375, "step": 40140 }, { "epoch": 0.15282080951257204, "grad_norm": 0.12698256969451904, "learning_rate": 0.0005, "loss": 2.1312, "step": 40150 }, { "epoch": 0.15285887198069473, "grad_norm": 0.12224501371383667, "learning_rate": 0.0005, "loss": 2.1305, "step": 40160 }, { "epoch": 0.1528969344488174, "grad_norm": 0.1328393965959549, "learning_rate": 0.0005, "loss": 2.1285, "step": 40170 }, { "epoch": 0.15293499691694007, "grad_norm": 0.13421137630939484, "learning_rate": 0.0005, "loss": 2.1275, "step": 40180 }, { "epoch": 0.15297305938506275, "grad_norm": 0.13883419334888458, "learning_rate": 0.0005, "loss": 2.1313, "step": 40190 }, { "epoch": 0.15301112185318544, "grad_norm": 0.13581129908561707, "learning_rate": 0.0005, "loss": 2.1326, "step": 40200 }, { "epoch": 0.15304918432130812, "grad_norm": 0.11238997429609299, "learning_rate": 0.0005, "loss": 2.1365, "step": 40210 }, { "epoch": 0.1530872467894308, "grad_norm": 0.13544441759586334, "learning_rate": 0.0005, "loss": 2.1446, "step": 40220 }, { "epoch": 0.1531253092575535, "grad_norm": 0.11856289207935333, "learning_rate": 0.0005, "loss": 2.1271, "step": 40230 }, { "epoch": 0.15316337172567618, "grad_norm": 0.12014134973287582, "learning_rate": 0.0005, "loss": 2.1467, "step": 40240 }, { "epoch": 0.15320143419379886, "grad_norm": 0.11337390542030334, "learning_rate": 0.0005, "loss": 2.1299, "step": 40250 }, { "epoch": 0.15323949666192155, "grad_norm": 0.12547358870506287, "learning_rate": 0.0005, "loss": 2.1274, "step": 40260 }, { "epoch": 0.15327755913004423, "grad_norm": 0.12301641702651978, "learning_rate": 0.0005, "loss": 2.1399, "step": 40270 }, { "epoch": 0.15331562159816692, "grad_norm": 0.1294236034154892, "learning_rate": 0.0005, "loss": 2.1365, "step": 40280 }, { "epoch": 0.1533536840662896, "grad_norm": 0.14758537709712982, "learning_rate": 0.0005, "loss": 2.1292, "step": 40290 }, { "epoch": 0.1533917465344123, "grad_norm": 0.12281176447868347, "learning_rate": 0.0005, "loss": 2.134, "step": 40300 }, { "epoch": 0.15342980900253497, "grad_norm": 0.11867820471525192, "learning_rate": 0.0005, "loss": 2.1365, "step": 40310 }, { "epoch": 0.15346787147065763, "grad_norm": 0.12238939106464386, "learning_rate": 0.0005, "loss": 2.1349, "step": 40320 }, { "epoch": 0.15350593393878031, "grad_norm": 0.11539949476718903, "learning_rate": 0.0005, "loss": 2.1523, "step": 40330 }, { "epoch": 0.153543996406903, "grad_norm": 0.1254558265209198, "learning_rate": 0.0005, "loss": 2.1417, "step": 40340 }, { "epoch": 0.15358205887502568, "grad_norm": 0.15088531374931335, "learning_rate": 0.0005, "loss": 2.1451, "step": 40350 }, { "epoch": 0.15362012134314837, "grad_norm": 0.13487495481967926, "learning_rate": 0.0005, "loss": 2.1348, "step": 40360 }, { "epoch": 0.15365818381127105, "grad_norm": 0.136166051030159, "learning_rate": 0.0005, "loss": 2.1509, "step": 40370 }, { "epoch": 0.15369624627939374, "grad_norm": 0.11267966777086258, "learning_rate": 0.0005, "loss": 2.1343, "step": 40380 }, { "epoch": 0.15373430874751642, "grad_norm": 0.11552360653877258, "learning_rate": 0.0005, "loss": 2.1241, "step": 40390 }, { "epoch": 0.1537723712156391, "grad_norm": 0.11731477081775665, "learning_rate": 0.0005, "loss": 2.1403, "step": 40400 }, { "epoch": 0.1538104336837618, "grad_norm": 0.12443472445011139, "learning_rate": 0.0005, "loss": 2.1284, "step": 40410 }, { "epoch": 0.15384849615188448, "grad_norm": 0.11814551055431366, "learning_rate": 0.0005, "loss": 2.1328, "step": 40420 }, { "epoch": 0.15388655862000716, "grad_norm": 0.13402269780635834, "learning_rate": 0.0005, "loss": 2.1412, "step": 40430 }, { "epoch": 0.15392462108812985, "grad_norm": 0.11917763948440552, "learning_rate": 0.0005, "loss": 2.1248, "step": 40440 }, { "epoch": 0.15396268355625253, "grad_norm": 0.12332938611507416, "learning_rate": 0.0005, "loss": 2.1486, "step": 40450 }, { "epoch": 0.15400074602437522, "grad_norm": 0.12632392346858978, "learning_rate": 0.0005, "loss": 2.126, "step": 40460 }, { "epoch": 0.15403880849249788, "grad_norm": 0.12391570955514908, "learning_rate": 0.0005, "loss": 2.125, "step": 40470 }, { "epoch": 0.15407687096062056, "grad_norm": 0.11539748311042786, "learning_rate": 0.0005, "loss": 2.1373, "step": 40480 }, { "epoch": 0.15411493342874324, "grad_norm": 0.1276472806930542, "learning_rate": 0.0005, "loss": 2.1281, "step": 40490 }, { "epoch": 0.15415299589686593, "grad_norm": 0.13653264939785004, "learning_rate": 0.0005, "loss": 2.1377, "step": 40500 }, { "epoch": 0.15419105836498861, "grad_norm": 0.12682753801345825, "learning_rate": 0.0005, "loss": 2.1505, "step": 40510 }, { "epoch": 0.1542291208331113, "grad_norm": 0.11323153972625732, "learning_rate": 0.0005, "loss": 2.1355, "step": 40520 }, { "epoch": 0.15426718330123398, "grad_norm": 0.13077697157859802, "learning_rate": 0.0005, "loss": 2.1446, "step": 40530 }, { "epoch": 0.15430524576935667, "grad_norm": 0.1277543604373932, "learning_rate": 0.0005, "loss": 2.1449, "step": 40540 }, { "epoch": 0.15434330823747935, "grad_norm": 0.12148383259773254, "learning_rate": 0.0005, "loss": 2.1351, "step": 40550 }, { "epoch": 0.15438137070560204, "grad_norm": 0.12244511395692825, "learning_rate": 0.0005, "loss": 2.1362, "step": 40560 }, { "epoch": 0.15441943317372472, "grad_norm": 0.12497160583734512, "learning_rate": 0.0005, "loss": 2.1458, "step": 40570 }, { "epoch": 0.1544574956418474, "grad_norm": 0.1199488416314125, "learning_rate": 0.0005, "loss": 2.1312, "step": 40580 }, { "epoch": 0.1544955581099701, "grad_norm": 0.13732865452766418, "learning_rate": 0.0005, "loss": 2.1388, "step": 40590 }, { "epoch": 0.15453362057809278, "grad_norm": 0.14091861248016357, "learning_rate": 0.0005, "loss": 2.1436, "step": 40600 }, { "epoch": 0.15457168304621544, "grad_norm": 0.1148281842470169, "learning_rate": 0.0005, "loss": 2.1462, "step": 40610 }, { "epoch": 0.15460974551433812, "grad_norm": 0.12657825648784637, "learning_rate": 0.0005, "loss": 2.1313, "step": 40620 }, { "epoch": 0.1546478079824608, "grad_norm": 0.11575401574373245, "learning_rate": 0.0005, "loss": 2.1286, "step": 40630 }, { "epoch": 0.1546858704505835, "grad_norm": 0.13155962526798248, "learning_rate": 0.0005, "loss": 2.1311, "step": 40640 }, { "epoch": 0.15472393291870618, "grad_norm": 0.13908006250858307, "learning_rate": 0.0005, "loss": 2.1377, "step": 40650 }, { "epoch": 0.15476199538682886, "grad_norm": 0.12285695225000381, "learning_rate": 0.0005, "loss": 2.1413, "step": 40660 }, { "epoch": 0.15480005785495154, "grad_norm": 0.11925322562456131, "learning_rate": 0.0005, "loss": 2.1431, "step": 40670 }, { "epoch": 0.15483812032307423, "grad_norm": 0.12518733739852905, "learning_rate": 0.0005, "loss": 2.1336, "step": 40680 }, { "epoch": 0.15487618279119691, "grad_norm": 0.11846065521240234, "learning_rate": 0.0005, "loss": 2.1404, "step": 40690 }, { "epoch": 0.1549142452593196, "grad_norm": 0.13635188341140747, "learning_rate": 0.0005, "loss": 2.124, "step": 40700 }, { "epoch": 0.15495230772744228, "grad_norm": 0.11662866175174713, "learning_rate": 0.0005, "loss": 2.1289, "step": 40710 }, { "epoch": 0.15499037019556497, "grad_norm": 0.13562005758285522, "learning_rate": 0.0005, "loss": 2.1367, "step": 40720 }, { "epoch": 0.15502843266368765, "grad_norm": 0.12767818570137024, "learning_rate": 0.0005, "loss": 2.1217, "step": 40730 }, { "epoch": 0.15506649513181034, "grad_norm": 0.13494707643985748, "learning_rate": 0.0005, "loss": 2.1507, "step": 40740 }, { "epoch": 0.155104557599933, "grad_norm": 0.13277818262577057, "learning_rate": 0.0005, "loss": 2.1344, "step": 40750 }, { "epoch": 0.15514262006805568, "grad_norm": 0.7971560955047607, "learning_rate": 0.0005, "loss": 2.148, "step": 40760 }, { "epoch": 0.15518068253617837, "grad_norm": 0.12246012687683105, "learning_rate": 0.0005, "loss": 2.1194, "step": 40770 }, { "epoch": 0.15521874500430105, "grad_norm": 0.12156087160110474, "learning_rate": 0.0005, "loss": 2.1371, "step": 40780 }, { "epoch": 0.15525680747242374, "grad_norm": 0.12802185118198395, "learning_rate": 0.0005, "loss": 2.1207, "step": 40790 }, { "epoch": 0.15529486994054642, "grad_norm": 0.11559620499610901, "learning_rate": 0.0005, "loss": 2.1346, "step": 40800 }, { "epoch": 0.1553329324086691, "grad_norm": 0.12793463468551636, "learning_rate": 0.0005, "loss": 2.1355, "step": 40810 }, { "epoch": 0.1553709948767918, "grad_norm": 0.1196453645825386, "learning_rate": 0.0005, "loss": 2.1319, "step": 40820 }, { "epoch": 0.15540905734491448, "grad_norm": 0.13778267800807953, "learning_rate": 0.0005, "loss": 2.1407, "step": 40830 }, { "epoch": 0.15544711981303716, "grad_norm": 0.126423642039299, "learning_rate": 0.0005, "loss": 2.1421, "step": 40840 }, { "epoch": 0.15548518228115984, "grad_norm": 0.12409335374832153, "learning_rate": 0.0005, "loss": 2.1521, "step": 40850 }, { "epoch": 0.15552324474928253, "grad_norm": 0.1275656372308731, "learning_rate": 0.0005, "loss": 2.1383, "step": 40860 }, { "epoch": 0.15556130721740521, "grad_norm": 0.12488967180252075, "learning_rate": 0.0005, "loss": 2.1388, "step": 40870 }, { "epoch": 0.1555993696855279, "grad_norm": 0.1289723962545395, "learning_rate": 0.0005, "loss": 2.1321, "step": 40880 }, { "epoch": 0.15563743215365058, "grad_norm": 0.12757979333400726, "learning_rate": 0.0005, "loss": 2.1268, "step": 40890 }, { "epoch": 0.15567549462177324, "grad_norm": 0.11737728863954544, "learning_rate": 0.0005, "loss": 2.1381, "step": 40900 }, { "epoch": 0.15571355708989593, "grad_norm": 0.13236309587955475, "learning_rate": 0.0005, "loss": 2.1414, "step": 40910 }, { "epoch": 0.1557516195580186, "grad_norm": 0.12473408877849579, "learning_rate": 0.0005, "loss": 2.1417, "step": 40920 }, { "epoch": 0.1557896820261413, "grad_norm": 0.12784703075885773, "learning_rate": 0.0005, "loss": 2.1314, "step": 40930 }, { "epoch": 0.15582774449426398, "grad_norm": 0.11651159822940826, "learning_rate": 0.0005, "loss": 2.1386, "step": 40940 }, { "epoch": 0.15586580696238667, "grad_norm": 0.13728098571300507, "learning_rate": 0.0005, "loss": 2.1454, "step": 40950 }, { "epoch": 0.15590386943050935, "grad_norm": 0.13878300786018372, "learning_rate": 0.0005, "loss": 2.1276, "step": 40960 }, { "epoch": 0.15594193189863204, "grad_norm": 0.12241476029157639, "learning_rate": 0.0005, "loss": 2.1285, "step": 40970 }, { "epoch": 0.15597999436675472, "grad_norm": 0.13079239428043365, "learning_rate": 0.0005, "loss": 2.1438, "step": 40980 }, { "epoch": 0.1560180568348774, "grad_norm": 0.1226423978805542, "learning_rate": 0.0005, "loss": 2.139, "step": 40990 }, { "epoch": 0.1560561193030001, "grad_norm": 0.12032759934663773, "learning_rate": 0.0005, "loss": 2.134, "step": 41000 }, { "epoch": 0.15609418177112278, "grad_norm": 0.12876206636428833, "learning_rate": 0.0005, "loss": 2.1325, "step": 41010 }, { "epoch": 0.15613224423924546, "grad_norm": 0.12257958203554153, "learning_rate": 0.0005, "loss": 2.1355, "step": 41020 }, { "epoch": 0.15617030670736815, "grad_norm": 0.12270672619342804, "learning_rate": 0.0005, "loss": 2.142, "step": 41030 }, { "epoch": 0.1562083691754908, "grad_norm": 0.11476442217826843, "learning_rate": 0.0005, "loss": 2.1452, "step": 41040 }, { "epoch": 0.1562464316436135, "grad_norm": 0.11680492013692856, "learning_rate": 0.0005, "loss": 2.1441, "step": 41050 }, { "epoch": 0.15628449411173617, "grad_norm": 0.13939838111400604, "learning_rate": 0.0005, "loss": 2.1311, "step": 41060 }, { "epoch": 0.15632255657985886, "grad_norm": 0.13249801099300385, "learning_rate": 0.0005, "loss": 2.1224, "step": 41070 }, { "epoch": 0.15636061904798154, "grad_norm": 0.12342536449432373, "learning_rate": 0.0005, "loss": 2.1267, "step": 41080 }, { "epoch": 0.15639868151610423, "grad_norm": 0.11840111017227173, "learning_rate": 0.0005, "loss": 2.1426, "step": 41090 }, { "epoch": 0.1564367439842269, "grad_norm": 0.12191004306077957, "learning_rate": 0.0005, "loss": 2.1316, "step": 41100 }, { "epoch": 0.1564748064523496, "grad_norm": 0.1340741664171219, "learning_rate": 0.0005, "loss": 2.1457, "step": 41110 }, { "epoch": 0.15651286892047228, "grad_norm": 0.12074489146471024, "learning_rate": 0.0005, "loss": 2.1334, "step": 41120 }, { "epoch": 0.15655093138859497, "grad_norm": 0.12547871470451355, "learning_rate": 0.0005, "loss": 2.1347, "step": 41130 }, { "epoch": 0.15658899385671765, "grad_norm": 0.12773819267749786, "learning_rate": 0.0005, "loss": 2.143, "step": 41140 }, { "epoch": 0.15662705632484034, "grad_norm": 0.12264664471149445, "learning_rate": 0.0005, "loss": 2.1428, "step": 41150 }, { "epoch": 0.15666511879296302, "grad_norm": 0.12241547554731369, "learning_rate": 0.0005, "loss": 2.1287, "step": 41160 }, { "epoch": 0.1567031812610857, "grad_norm": 0.12571392953395844, "learning_rate": 0.0005, "loss": 2.1287, "step": 41170 }, { "epoch": 0.1567412437292084, "grad_norm": 0.11955336481332779, "learning_rate": 0.0005, "loss": 2.123, "step": 41180 }, { "epoch": 0.15677930619733105, "grad_norm": 0.13170619308948517, "learning_rate": 0.0005, "loss": 2.1424, "step": 41190 }, { "epoch": 0.15681736866545373, "grad_norm": 0.12544187903404236, "learning_rate": 0.0005, "loss": 2.1166, "step": 41200 }, { "epoch": 0.15685543113357642, "grad_norm": 0.11725877970457077, "learning_rate": 0.0005, "loss": 2.1365, "step": 41210 }, { "epoch": 0.1568934936016991, "grad_norm": 0.11695241928100586, "learning_rate": 0.0005, "loss": 2.1501, "step": 41220 }, { "epoch": 0.1569315560698218, "grad_norm": 0.12395942211151123, "learning_rate": 0.0005, "loss": 2.1436, "step": 41230 }, { "epoch": 0.15696961853794447, "grad_norm": 0.12017051875591278, "learning_rate": 0.0005, "loss": 2.1365, "step": 41240 }, { "epoch": 0.15700768100606716, "grad_norm": 0.11665423214435577, "learning_rate": 0.0005, "loss": 2.1464, "step": 41250 }, { "epoch": 0.15704574347418984, "grad_norm": 0.11894332617521286, "learning_rate": 0.0005, "loss": 2.1289, "step": 41260 }, { "epoch": 0.15708380594231253, "grad_norm": 0.1246785819530487, "learning_rate": 0.0005, "loss": 2.1309, "step": 41270 }, { "epoch": 0.1571218684104352, "grad_norm": 0.12720352411270142, "learning_rate": 0.0005, "loss": 2.1295, "step": 41280 }, { "epoch": 0.1571599308785579, "grad_norm": 0.13201768696308136, "learning_rate": 0.0005, "loss": 2.1346, "step": 41290 }, { "epoch": 0.15719799334668058, "grad_norm": 0.11735563725233078, "learning_rate": 0.0005, "loss": 2.129, "step": 41300 }, { "epoch": 0.15723605581480327, "grad_norm": 0.13252070546150208, "learning_rate": 0.0005, "loss": 2.1549, "step": 41310 }, { "epoch": 0.15727411828292595, "grad_norm": 0.1171932965517044, "learning_rate": 0.0005, "loss": 2.1222, "step": 41320 }, { "epoch": 0.1573121807510486, "grad_norm": 0.12152529507875443, "learning_rate": 0.0005, "loss": 2.1373, "step": 41330 }, { "epoch": 0.1573502432191713, "grad_norm": 0.14090144634246826, "learning_rate": 0.0005, "loss": 2.119, "step": 41340 }, { "epoch": 0.15738830568729398, "grad_norm": 0.13475215435028076, "learning_rate": 0.0005, "loss": 2.1451, "step": 41350 }, { "epoch": 0.15742636815541666, "grad_norm": 0.11995682120323181, "learning_rate": 0.0005, "loss": 2.1244, "step": 41360 }, { "epoch": 0.15746443062353935, "grad_norm": 0.13988709449768066, "learning_rate": 0.0005, "loss": 2.1385, "step": 41370 }, { "epoch": 0.15750249309166203, "grad_norm": 0.12187988311052322, "learning_rate": 0.0005, "loss": 2.1395, "step": 41380 }, { "epoch": 0.15754055555978472, "grad_norm": 0.1212259903550148, "learning_rate": 0.0005, "loss": 2.134, "step": 41390 }, { "epoch": 0.1575786180279074, "grad_norm": 0.12687280774116516, "learning_rate": 0.0005, "loss": 2.1447, "step": 41400 }, { "epoch": 0.1576166804960301, "grad_norm": 0.1433592289686203, "learning_rate": 0.0005, "loss": 2.1399, "step": 41410 }, { "epoch": 0.15765474296415277, "grad_norm": 0.13183125853538513, "learning_rate": 0.0005, "loss": 2.1437, "step": 41420 }, { "epoch": 0.15769280543227546, "grad_norm": 0.12849292159080505, "learning_rate": 0.0005, "loss": 2.1209, "step": 41430 }, { "epoch": 0.15773086790039814, "grad_norm": 0.11942418664693832, "learning_rate": 0.0005, "loss": 2.1183, "step": 41440 }, { "epoch": 0.15776893036852083, "grad_norm": 0.12183071672916412, "learning_rate": 0.0005, "loss": 2.1291, "step": 41450 }, { "epoch": 0.1578069928366435, "grad_norm": 0.12043503671884537, "learning_rate": 0.0005, "loss": 2.1314, "step": 41460 }, { "epoch": 0.15784505530476617, "grad_norm": 0.10971025377511978, "learning_rate": 0.0005, "loss": 2.1412, "step": 41470 }, { "epoch": 0.15788311777288885, "grad_norm": 0.11589441448450089, "learning_rate": 0.0005, "loss": 2.1346, "step": 41480 }, { "epoch": 0.15792118024101154, "grad_norm": 0.12625034153461456, "learning_rate": 0.0005, "loss": 2.1373, "step": 41490 }, { "epoch": 0.15795924270913422, "grad_norm": 0.1306608021259308, "learning_rate": 0.0005, "loss": 2.1376, "step": 41500 }, { "epoch": 0.1579973051772569, "grad_norm": 0.11992191523313522, "learning_rate": 0.0005, "loss": 2.1298, "step": 41510 }, { "epoch": 0.1580353676453796, "grad_norm": 0.14196783304214478, "learning_rate": 0.0005, "loss": 2.1394, "step": 41520 }, { "epoch": 0.15807343011350228, "grad_norm": 0.11304987221956253, "learning_rate": 0.0005, "loss": 2.1375, "step": 41530 }, { "epoch": 0.15811149258162496, "grad_norm": 0.12227694690227509, "learning_rate": 0.0005, "loss": 2.1384, "step": 41540 }, { "epoch": 0.15814955504974765, "grad_norm": 0.1242867261171341, "learning_rate": 0.0005, "loss": 2.1383, "step": 41550 }, { "epoch": 0.15818761751787033, "grad_norm": 0.13180440664291382, "learning_rate": 0.0005, "loss": 2.1326, "step": 41560 }, { "epoch": 0.15822567998599302, "grad_norm": 0.1172434613108635, "learning_rate": 0.0005, "loss": 2.1359, "step": 41570 }, { "epoch": 0.1582637424541157, "grad_norm": 0.12316454201936722, "learning_rate": 0.0005, "loss": 2.149, "step": 41580 }, { "epoch": 0.1583018049222384, "grad_norm": 0.1225380226969719, "learning_rate": 0.0005, "loss": 2.1396, "step": 41590 }, { "epoch": 0.15833986739036107, "grad_norm": 0.12523019313812256, "learning_rate": 0.0005, "loss": 2.1303, "step": 41600 }, { "epoch": 0.15837792985848376, "grad_norm": 0.14092890918254852, "learning_rate": 0.0005, "loss": 2.1405, "step": 41610 }, { "epoch": 0.15841599232660641, "grad_norm": 0.1391395926475525, "learning_rate": 0.0005, "loss": 2.1279, "step": 41620 }, { "epoch": 0.1584540547947291, "grad_norm": 0.12023383378982544, "learning_rate": 0.0005, "loss": 2.1359, "step": 41630 }, { "epoch": 0.15849211726285178, "grad_norm": 0.13288703560829163, "learning_rate": 0.0005, "loss": 2.1679, "step": 41640 }, { "epoch": 0.15853017973097447, "grad_norm": 0.12219773977994919, "learning_rate": 0.0005, "loss": 2.1414, "step": 41650 }, { "epoch": 0.15856824219909715, "grad_norm": 0.11769925057888031, "learning_rate": 0.0005, "loss": 2.1457, "step": 41660 }, { "epoch": 0.15860630466721984, "grad_norm": 0.11972153186798096, "learning_rate": 0.0005, "loss": 2.1434, "step": 41670 }, { "epoch": 0.15864436713534252, "grad_norm": 0.12952840328216553, "learning_rate": 0.0005, "loss": 2.1337, "step": 41680 }, { "epoch": 0.1586824296034652, "grad_norm": 0.11720789968967438, "learning_rate": 0.0005, "loss": 2.1528, "step": 41690 }, { "epoch": 0.1587204920715879, "grad_norm": 0.11736951768398285, "learning_rate": 0.0005, "loss": 2.1399, "step": 41700 }, { "epoch": 0.15875855453971058, "grad_norm": 0.10920219868421555, "learning_rate": 0.0005, "loss": 2.1333, "step": 41710 }, { "epoch": 0.15879661700783326, "grad_norm": 0.12645253539085388, "learning_rate": 0.0005, "loss": 2.1286, "step": 41720 }, { "epoch": 0.15883467947595595, "grad_norm": 0.1483883261680603, "learning_rate": 0.0005, "loss": 2.1408, "step": 41730 }, { "epoch": 0.15887274194407863, "grad_norm": 0.12281257659196854, "learning_rate": 0.0005, "loss": 2.1305, "step": 41740 }, { "epoch": 0.15891080441220132, "grad_norm": 0.12573546171188354, "learning_rate": 0.0005, "loss": 2.1374, "step": 41750 }, { "epoch": 0.15894886688032397, "grad_norm": 0.13053619861602783, "learning_rate": 0.0005, "loss": 2.1303, "step": 41760 }, { "epoch": 0.15898692934844666, "grad_norm": 0.12806281447410583, "learning_rate": 0.0005, "loss": 2.1331, "step": 41770 }, { "epoch": 0.15902499181656934, "grad_norm": 0.13922947645187378, "learning_rate": 0.0005, "loss": 2.1341, "step": 41780 }, { "epoch": 0.15906305428469203, "grad_norm": 0.12998144328594208, "learning_rate": 0.0005, "loss": 2.1426, "step": 41790 }, { "epoch": 0.15910111675281471, "grad_norm": 0.12506332993507385, "learning_rate": 0.0005, "loss": 2.132, "step": 41800 }, { "epoch": 0.1591391792209374, "grad_norm": 0.1313193440437317, "learning_rate": 0.0005, "loss": 2.1467, "step": 41810 }, { "epoch": 0.15917724168906008, "grad_norm": 0.12198295444250107, "learning_rate": 0.0005, "loss": 2.1328, "step": 41820 }, { "epoch": 0.15921530415718277, "grad_norm": 0.11847379803657532, "learning_rate": 0.0005, "loss": 2.1411, "step": 41830 }, { "epoch": 0.15925336662530545, "grad_norm": 0.14477434754371643, "learning_rate": 0.0005, "loss": 2.1511, "step": 41840 }, { "epoch": 0.15929142909342814, "grad_norm": 0.12056008726358414, "learning_rate": 0.0005, "loss": 2.121, "step": 41850 }, { "epoch": 0.15932949156155082, "grad_norm": 0.11610439419746399, "learning_rate": 0.0005, "loss": 2.1319, "step": 41860 }, { "epoch": 0.1593675540296735, "grad_norm": 0.1233954057097435, "learning_rate": 0.0005, "loss": 2.1313, "step": 41870 }, { "epoch": 0.1594056164977962, "grad_norm": 0.11605192720890045, "learning_rate": 0.0005, "loss": 2.1157, "step": 41880 }, { "epoch": 0.15944367896591888, "grad_norm": 0.13223831355571747, "learning_rate": 0.0005, "loss": 2.1379, "step": 41890 }, { "epoch": 0.15948174143404154, "grad_norm": 0.12090417742729187, "learning_rate": 0.0005, "loss": 2.1552, "step": 41900 }, { "epoch": 0.15951980390216422, "grad_norm": 0.1277896612882614, "learning_rate": 0.0005, "loss": 2.1311, "step": 41910 }, { "epoch": 0.1595578663702869, "grad_norm": 0.12657415866851807, "learning_rate": 0.0005, "loss": 2.1379, "step": 41920 }, { "epoch": 0.1595959288384096, "grad_norm": 0.11490483582019806, "learning_rate": 0.0005, "loss": 2.1395, "step": 41930 }, { "epoch": 0.15963399130653227, "grad_norm": 0.1302410364151001, "learning_rate": 0.0005, "loss": 2.1389, "step": 41940 }, { "epoch": 0.15967205377465496, "grad_norm": 0.12107552587985992, "learning_rate": 0.0005, "loss": 2.1294, "step": 41950 }, { "epoch": 0.15971011624277764, "grad_norm": 0.1330546736717224, "learning_rate": 0.0005, "loss": 2.1614, "step": 41960 }, { "epoch": 0.15974817871090033, "grad_norm": 0.130593940615654, "learning_rate": 0.0005, "loss": 2.1297, "step": 41970 }, { "epoch": 0.15978624117902301, "grad_norm": 0.11943244934082031, "learning_rate": 0.0005, "loss": 2.1487, "step": 41980 }, { "epoch": 0.1598243036471457, "grad_norm": 0.12528935074806213, "learning_rate": 0.0005, "loss": 2.135, "step": 41990 }, { "epoch": 0.15986236611526838, "grad_norm": 0.11964350938796997, "learning_rate": 0.0005, "loss": 2.1289, "step": 42000 }, { "epoch": 0.15990042858339107, "grad_norm": 0.1630644053220749, "learning_rate": 0.0005, "loss": 2.1479, "step": 42010 }, { "epoch": 0.15993849105151375, "grad_norm": 0.13449083268642426, "learning_rate": 0.0005, "loss": 2.1322, "step": 42020 }, { "epoch": 0.15997655351963644, "grad_norm": 0.13047908246517181, "learning_rate": 0.0005, "loss": 2.1371, "step": 42030 }, { "epoch": 0.16001461598775912, "grad_norm": 0.14681918919086456, "learning_rate": 0.0005, "loss": 2.1139, "step": 42040 }, { "epoch": 0.16005267845588178, "grad_norm": 0.11368191987276077, "learning_rate": 0.0005, "loss": 2.1444, "step": 42050 }, { "epoch": 0.16009074092400447, "grad_norm": 0.11434902995824814, "learning_rate": 0.0005, "loss": 2.135, "step": 42060 }, { "epoch": 0.16012880339212715, "grad_norm": 0.11909456551074982, "learning_rate": 0.0005, "loss": 2.1259, "step": 42070 }, { "epoch": 0.16016686586024984, "grad_norm": 0.11459674686193466, "learning_rate": 0.0005, "loss": 2.1311, "step": 42080 }, { "epoch": 0.16020492832837252, "grad_norm": 0.11956145614385605, "learning_rate": 0.0005, "loss": 2.1435, "step": 42090 }, { "epoch": 0.1602429907964952, "grad_norm": 0.12218481302261353, "learning_rate": 0.0005, "loss": 2.124, "step": 42100 }, { "epoch": 0.1602810532646179, "grad_norm": 0.1367029994726181, "learning_rate": 0.0005, "loss": 2.1351, "step": 42110 }, { "epoch": 0.16031911573274057, "grad_norm": 0.13083545863628387, "learning_rate": 0.0005, "loss": 2.121, "step": 42120 }, { "epoch": 0.16035717820086326, "grad_norm": 0.14804820716381073, "learning_rate": 0.0005, "loss": 2.1491, "step": 42130 }, { "epoch": 0.16039524066898594, "grad_norm": 0.12293334305286407, "learning_rate": 0.0005, "loss": 2.1339, "step": 42140 }, { "epoch": 0.16043330313710863, "grad_norm": 0.12244443595409393, "learning_rate": 0.0005, "loss": 2.137, "step": 42150 }, { "epoch": 0.16047136560523131, "grad_norm": 0.11816592514514923, "learning_rate": 0.0005, "loss": 2.125, "step": 42160 }, { "epoch": 0.160509428073354, "grad_norm": 0.12204094231128693, "learning_rate": 0.0005, "loss": 2.1287, "step": 42170 }, { "epoch": 0.16054749054147668, "grad_norm": 0.1250113993883133, "learning_rate": 0.0005, "loss": 2.1502, "step": 42180 }, { "epoch": 0.16058555300959934, "grad_norm": 0.1222028136253357, "learning_rate": 0.0005, "loss": 2.1301, "step": 42190 }, { "epoch": 0.16062361547772203, "grad_norm": 0.1175677701830864, "learning_rate": 0.0005, "loss": 2.1309, "step": 42200 }, { "epoch": 0.1606616779458447, "grad_norm": 0.11348593980073929, "learning_rate": 0.0005, "loss": 2.1438, "step": 42210 }, { "epoch": 0.1606997404139674, "grad_norm": 0.1285901814699173, "learning_rate": 0.0005, "loss": 2.1313, "step": 42220 }, { "epoch": 0.16073780288209008, "grad_norm": 0.12811289727687836, "learning_rate": 0.0005, "loss": 2.1361, "step": 42230 }, { "epoch": 0.16077586535021277, "grad_norm": 0.12517668306827545, "learning_rate": 0.0005, "loss": 2.1271, "step": 42240 }, { "epoch": 0.16081392781833545, "grad_norm": 0.12036725878715515, "learning_rate": 0.0005, "loss": 2.1189, "step": 42250 }, { "epoch": 0.16085199028645814, "grad_norm": 0.14227166771888733, "learning_rate": 0.0005, "loss": 2.1374, "step": 42260 }, { "epoch": 0.16089005275458082, "grad_norm": 0.12093579024076462, "learning_rate": 0.0005, "loss": 2.1421, "step": 42270 }, { "epoch": 0.1609281152227035, "grad_norm": 0.13048814237117767, "learning_rate": 0.0005, "loss": 2.1486, "step": 42280 }, { "epoch": 0.1609661776908262, "grad_norm": 0.12342751026153564, "learning_rate": 0.0005, "loss": 2.1366, "step": 42290 }, { "epoch": 0.16100424015894887, "grad_norm": 0.12928670644760132, "learning_rate": 0.0005, "loss": 2.1469, "step": 42300 }, { "epoch": 0.16104230262707156, "grad_norm": 0.1347813606262207, "learning_rate": 0.0005, "loss": 2.1389, "step": 42310 }, { "epoch": 0.16108036509519424, "grad_norm": 0.13783428072929382, "learning_rate": 0.0005, "loss": 2.1347, "step": 42320 }, { "epoch": 0.16111842756331693, "grad_norm": 0.12047230452299118, "learning_rate": 0.0005, "loss": 2.1164, "step": 42330 }, { "epoch": 0.1611564900314396, "grad_norm": 0.13066697120666504, "learning_rate": 0.0005, "loss": 2.1435, "step": 42340 }, { "epoch": 0.16119455249956227, "grad_norm": 0.12590228021144867, "learning_rate": 0.0005, "loss": 2.1416, "step": 42350 }, { "epoch": 0.16123261496768496, "grad_norm": 0.13206040859222412, "learning_rate": 0.0005, "loss": 2.1579, "step": 42360 }, { "epoch": 0.16127067743580764, "grad_norm": 0.12833815813064575, "learning_rate": 0.0005, "loss": 2.1421, "step": 42370 }, { "epoch": 0.16130873990393033, "grad_norm": 0.12771695852279663, "learning_rate": 0.0005, "loss": 2.1427, "step": 42380 }, { "epoch": 0.161346802372053, "grad_norm": 0.12943068146705627, "learning_rate": 0.0005, "loss": 2.1167, "step": 42390 }, { "epoch": 0.1613848648401757, "grad_norm": 0.12026971578598022, "learning_rate": 0.0005, "loss": 2.1352, "step": 42400 }, { "epoch": 0.16142292730829838, "grad_norm": 0.11510438472032547, "learning_rate": 0.0005, "loss": 2.1322, "step": 42410 }, { "epoch": 0.16146098977642107, "grad_norm": 0.13893866539001465, "learning_rate": 0.0005, "loss": 2.13, "step": 42420 }, { "epoch": 0.16149905224454375, "grad_norm": 0.11227071285247803, "learning_rate": 0.0005, "loss": 2.1359, "step": 42430 }, { "epoch": 0.16153711471266644, "grad_norm": 0.126421719789505, "learning_rate": 0.0005, "loss": 2.1386, "step": 42440 }, { "epoch": 0.16157517718078912, "grad_norm": 0.12309280037879944, "learning_rate": 0.0005, "loss": 2.1307, "step": 42450 }, { "epoch": 0.1616132396489118, "grad_norm": 0.12749263644218445, "learning_rate": 0.0005, "loss": 2.1302, "step": 42460 }, { "epoch": 0.1616513021170345, "grad_norm": 0.122982457280159, "learning_rate": 0.0005, "loss": 2.128, "step": 42470 }, { "epoch": 0.16168936458515715, "grad_norm": 0.12029185891151428, "learning_rate": 0.0005, "loss": 2.1484, "step": 42480 }, { "epoch": 0.16172742705327983, "grad_norm": 0.12866486608982086, "learning_rate": 0.0005, "loss": 2.1258, "step": 42490 }, { "epoch": 0.16176548952140252, "grad_norm": 0.12323121726512909, "learning_rate": 0.0005, "loss": 2.1539, "step": 42500 }, { "epoch": 0.1618035519895252, "grad_norm": 0.12656740844249725, "learning_rate": 0.0005, "loss": 2.1314, "step": 42510 }, { "epoch": 0.1618416144576479, "grad_norm": 0.13577412068843842, "learning_rate": 0.0005, "loss": 2.1377, "step": 42520 }, { "epoch": 0.16187967692577057, "grad_norm": 0.12688353657722473, "learning_rate": 0.0005, "loss": 2.1374, "step": 42530 }, { "epoch": 0.16191773939389326, "grad_norm": 0.1538940966129303, "learning_rate": 0.0005, "loss": 2.141, "step": 42540 }, { "epoch": 0.16195580186201594, "grad_norm": 0.12012957781553268, "learning_rate": 0.0005, "loss": 2.1348, "step": 42550 }, { "epoch": 0.16199386433013863, "grad_norm": 0.11320848762989044, "learning_rate": 0.0005, "loss": 2.1217, "step": 42560 }, { "epoch": 0.1620319267982613, "grad_norm": 0.13399673998355865, "learning_rate": 0.0005, "loss": 2.1169, "step": 42570 }, { "epoch": 0.162069989266384, "grad_norm": 0.12073542922735214, "learning_rate": 0.0005, "loss": 2.1445, "step": 42580 }, { "epoch": 0.16210805173450668, "grad_norm": 0.13058961927890778, "learning_rate": 0.0005, "loss": 2.1298, "step": 42590 }, { "epoch": 0.16214611420262937, "grad_norm": 0.12318974733352661, "learning_rate": 0.0005, "loss": 2.1373, "step": 42600 }, { "epoch": 0.16218417667075205, "grad_norm": 0.12237012386322021, "learning_rate": 0.0005, "loss": 2.1306, "step": 42610 }, { "epoch": 0.1622222391388747, "grad_norm": 0.124078668653965, "learning_rate": 0.0005, "loss": 2.1266, "step": 42620 }, { "epoch": 0.1622603016069974, "grad_norm": 0.12561817467212677, "learning_rate": 0.0005, "loss": 2.1335, "step": 42630 }, { "epoch": 0.16229836407512008, "grad_norm": 0.12398776412010193, "learning_rate": 0.0005, "loss": 2.139, "step": 42640 }, { "epoch": 0.16233642654324276, "grad_norm": 0.12158331274986267, "learning_rate": 0.0005, "loss": 2.1457, "step": 42650 }, { "epoch": 0.16237448901136545, "grad_norm": 0.11844036728143692, "learning_rate": 0.0005, "loss": 2.1207, "step": 42660 }, { "epoch": 0.16241255147948813, "grad_norm": 0.13241811096668243, "learning_rate": 0.0005, "loss": 2.1522, "step": 42670 }, { "epoch": 0.16245061394761082, "grad_norm": 0.12555848062038422, "learning_rate": 0.0005, "loss": 2.1307, "step": 42680 }, { "epoch": 0.1624886764157335, "grad_norm": 0.12289304286241531, "learning_rate": 0.0005, "loss": 2.1353, "step": 42690 }, { "epoch": 0.1625267388838562, "grad_norm": 0.11282894760370255, "learning_rate": 0.0005, "loss": 2.1301, "step": 42700 }, { "epoch": 0.16256480135197887, "grad_norm": 0.12983134388923645, "learning_rate": 0.0005, "loss": 2.139, "step": 42710 }, { "epoch": 0.16260286382010156, "grad_norm": 0.1355329006910324, "learning_rate": 0.0005, "loss": 2.1343, "step": 42720 }, { "epoch": 0.16264092628822424, "grad_norm": 0.11260616779327393, "learning_rate": 0.0005, "loss": 2.1378, "step": 42730 }, { "epoch": 0.16267898875634693, "grad_norm": 0.12481415271759033, "learning_rate": 0.0005, "loss": 2.1405, "step": 42740 }, { "epoch": 0.1627170512244696, "grad_norm": 0.14663955569267273, "learning_rate": 0.0005, "loss": 2.1235, "step": 42750 }, { "epoch": 0.1627551136925923, "grad_norm": 0.1387481838464737, "learning_rate": 0.0005, "loss": 2.136, "step": 42760 }, { "epoch": 0.16279317616071495, "grad_norm": 0.12403888255357742, "learning_rate": 0.0005, "loss": 2.133, "step": 42770 }, { "epoch": 0.16283123862883764, "grad_norm": 0.1486683040857315, "learning_rate": 0.0005, "loss": 2.1305, "step": 42780 }, { "epoch": 0.16286930109696032, "grad_norm": 0.14229144155979156, "learning_rate": 0.0005, "loss": 2.1491, "step": 42790 }, { "epoch": 0.162907363565083, "grad_norm": 0.1370912492275238, "learning_rate": 0.0005, "loss": 2.1285, "step": 42800 }, { "epoch": 0.1629454260332057, "grad_norm": 0.11742990463972092, "learning_rate": 0.0005, "loss": 2.1507, "step": 42810 }, { "epoch": 0.16298348850132838, "grad_norm": 0.13674955070018768, "learning_rate": 0.0005, "loss": 2.1329, "step": 42820 }, { "epoch": 0.16302155096945106, "grad_norm": 0.11049659550189972, "learning_rate": 0.0005, "loss": 2.14, "step": 42830 }, { "epoch": 0.16305961343757375, "grad_norm": 0.11355409026145935, "learning_rate": 0.0005, "loss": 2.125, "step": 42840 }, { "epoch": 0.16309767590569643, "grad_norm": 0.1293742060661316, "learning_rate": 0.0005, "loss": 2.1267, "step": 42850 }, { "epoch": 0.16313573837381912, "grad_norm": 0.12040603160858154, "learning_rate": 0.0005, "loss": 2.1306, "step": 42860 }, { "epoch": 0.1631738008419418, "grad_norm": 0.11985547095537186, "learning_rate": 0.0005, "loss": 2.1375, "step": 42870 }, { "epoch": 0.1632118633100645, "grad_norm": 0.11281248182058334, "learning_rate": 0.0005, "loss": 2.1386, "step": 42880 }, { "epoch": 0.16324992577818717, "grad_norm": 0.12250163406133652, "learning_rate": 0.0005, "loss": 2.1369, "step": 42890 }, { "epoch": 0.16328798824630986, "grad_norm": 0.13779056072235107, "learning_rate": 0.0005, "loss": 2.1489, "step": 42900 }, { "epoch": 0.1633260507144325, "grad_norm": 0.12871775031089783, "learning_rate": 0.0005, "loss": 2.1344, "step": 42910 }, { "epoch": 0.1633641131825552, "grad_norm": 0.11977288126945496, "learning_rate": 0.0005, "loss": 2.1391, "step": 42920 }, { "epoch": 0.16340217565067788, "grad_norm": 0.12549398839473724, "learning_rate": 0.0005, "loss": 2.1242, "step": 42930 }, { "epoch": 0.16344023811880057, "grad_norm": 0.11586546152830124, "learning_rate": 0.0005, "loss": 2.145, "step": 42940 }, { "epoch": 0.16347830058692325, "grad_norm": 0.11639491468667984, "learning_rate": 0.0005, "loss": 2.1244, "step": 42950 }, { "epoch": 0.16351636305504594, "grad_norm": 0.1248018816113472, "learning_rate": 0.0005, "loss": 2.1397, "step": 42960 }, { "epoch": 0.16355442552316862, "grad_norm": 0.13171744346618652, "learning_rate": 0.0005, "loss": 2.144, "step": 42970 }, { "epoch": 0.1635924879912913, "grad_norm": 0.11659903079271317, "learning_rate": 0.0005, "loss": 2.1328, "step": 42980 }, { "epoch": 0.163630550459414, "grad_norm": 0.12795613706111908, "learning_rate": 0.0005, "loss": 2.1403, "step": 42990 }, { "epoch": 0.16366861292753668, "grad_norm": 0.11983273178339005, "learning_rate": 0.0005, "loss": 2.1369, "step": 43000 }, { "epoch": 0.16370667539565936, "grad_norm": 0.11790401488542557, "learning_rate": 0.0005, "loss": 2.1369, "step": 43010 }, { "epoch": 0.16374473786378205, "grad_norm": 0.13478805124759674, "learning_rate": 0.0005, "loss": 2.1397, "step": 43020 }, { "epoch": 0.16378280033190473, "grad_norm": 0.13745801150798798, "learning_rate": 0.0005, "loss": 2.1408, "step": 43030 }, { "epoch": 0.16382086280002742, "grad_norm": 0.13325530290603638, "learning_rate": 0.0005, "loss": 2.1375, "step": 43040 }, { "epoch": 0.16385892526815007, "grad_norm": 0.11879530549049377, "learning_rate": 0.0005, "loss": 2.1275, "step": 43050 }, { "epoch": 0.16389698773627276, "grad_norm": 0.12253537774085999, "learning_rate": 0.0005, "loss": 2.1158, "step": 43060 }, { "epoch": 0.16393505020439544, "grad_norm": 0.1257277876138687, "learning_rate": 0.0005, "loss": 2.1338, "step": 43070 }, { "epoch": 0.16397311267251813, "grad_norm": 0.12686146795749664, "learning_rate": 0.0005, "loss": 2.1279, "step": 43080 }, { "epoch": 0.1640111751406408, "grad_norm": 0.12342148274183273, "learning_rate": 0.0005, "loss": 2.1448, "step": 43090 }, { "epoch": 0.1640492376087635, "grad_norm": 0.12092763930559158, "learning_rate": 0.0005, "loss": 2.1366, "step": 43100 }, { "epoch": 0.16408730007688618, "grad_norm": 0.1287047415971756, "learning_rate": 0.0005, "loss": 2.1433, "step": 43110 }, { "epoch": 0.16412536254500887, "grad_norm": 0.13601644337177277, "learning_rate": 0.0005, "loss": 2.1276, "step": 43120 }, { "epoch": 0.16416342501313155, "grad_norm": 0.12395923584699631, "learning_rate": 0.0005, "loss": 2.1345, "step": 43130 }, { "epoch": 0.16420148748125424, "grad_norm": 0.14422424137592316, "learning_rate": 0.0005, "loss": 2.1278, "step": 43140 }, { "epoch": 0.16423954994937692, "grad_norm": 0.12677833437919617, "learning_rate": 0.0005, "loss": 2.1369, "step": 43150 }, { "epoch": 0.1642776124174996, "grad_norm": 0.12512822449207306, "learning_rate": 0.0005, "loss": 2.1469, "step": 43160 }, { "epoch": 0.1643156748856223, "grad_norm": 0.13113650679588318, "learning_rate": 0.0005, "loss": 2.1374, "step": 43170 }, { "epoch": 0.16435373735374498, "grad_norm": 0.13267137110233307, "learning_rate": 0.0005, "loss": 2.13, "step": 43180 }, { "epoch": 0.16439179982186766, "grad_norm": 0.12079484015703201, "learning_rate": 0.0005, "loss": 2.1465, "step": 43190 }, { "epoch": 0.16442986228999032, "grad_norm": 0.11671856045722961, "learning_rate": 0.0005, "loss": 2.1374, "step": 43200 }, { "epoch": 0.164467924758113, "grad_norm": 0.11519180983304977, "learning_rate": 0.0005, "loss": 2.1299, "step": 43210 }, { "epoch": 0.1645059872262357, "grad_norm": 0.12597531080245972, "learning_rate": 0.0005, "loss": 2.1362, "step": 43220 }, { "epoch": 0.16454404969435837, "grad_norm": 0.12310968339443207, "learning_rate": 0.0005, "loss": 2.1546, "step": 43230 }, { "epoch": 0.16458211216248106, "grad_norm": 0.1263173371553421, "learning_rate": 0.0005, "loss": 2.1373, "step": 43240 }, { "epoch": 0.16462017463060374, "grad_norm": 0.13074570894241333, "learning_rate": 0.0005, "loss": 2.137, "step": 43250 }, { "epoch": 0.16465823709872643, "grad_norm": 0.11949026584625244, "learning_rate": 0.0005, "loss": 2.1334, "step": 43260 }, { "epoch": 0.1646962995668491, "grad_norm": 0.1144823208451271, "learning_rate": 0.0005, "loss": 2.1484, "step": 43270 }, { "epoch": 0.1647343620349718, "grad_norm": 0.12179239839315414, "learning_rate": 0.0005, "loss": 2.1323, "step": 43280 }, { "epoch": 0.16477242450309448, "grad_norm": 0.15474900603294373, "learning_rate": 0.0005, "loss": 2.1345, "step": 43290 }, { "epoch": 0.16481048697121717, "grad_norm": 0.1269451230764389, "learning_rate": 0.0005, "loss": 2.1233, "step": 43300 }, { "epoch": 0.16484854943933985, "grad_norm": 0.11439156532287598, "learning_rate": 0.0005, "loss": 2.1194, "step": 43310 }, { "epoch": 0.16488661190746254, "grad_norm": 0.12010608613491058, "learning_rate": 0.0005, "loss": 2.1422, "step": 43320 }, { "epoch": 0.16492467437558522, "grad_norm": 0.11821283400058746, "learning_rate": 0.0005, "loss": 2.1393, "step": 43330 }, { "epoch": 0.16496273684370788, "grad_norm": 0.13478679955005646, "learning_rate": 0.0005, "loss": 2.1489, "step": 43340 }, { "epoch": 0.16500079931183056, "grad_norm": 0.12389303743839264, "learning_rate": 0.0005, "loss": 2.142, "step": 43350 }, { "epoch": 0.16503886177995325, "grad_norm": 0.12443176656961441, "learning_rate": 0.0005, "loss": 2.1336, "step": 43360 }, { "epoch": 0.16507692424807593, "grad_norm": 0.1438198685646057, "learning_rate": 0.0005, "loss": 2.135, "step": 43370 }, { "epoch": 0.16511498671619862, "grad_norm": 0.12440897524356842, "learning_rate": 0.0005, "loss": 2.1292, "step": 43380 }, { "epoch": 0.1651530491843213, "grad_norm": 0.12202125042676926, "learning_rate": 0.0005, "loss": 2.1332, "step": 43390 }, { "epoch": 0.165191111652444, "grad_norm": 0.11001090705394745, "learning_rate": 0.0005, "loss": 2.1298, "step": 43400 }, { "epoch": 0.16522917412056667, "grad_norm": 0.13812753558158875, "learning_rate": 0.0005, "loss": 2.1411, "step": 43410 }, { "epoch": 0.16526723658868936, "grad_norm": 0.1187279224395752, "learning_rate": 0.0005, "loss": 2.1413, "step": 43420 }, { "epoch": 0.16530529905681204, "grad_norm": 0.11560482531785965, "learning_rate": 0.0005, "loss": 2.1293, "step": 43430 }, { "epoch": 0.16534336152493473, "grad_norm": 0.13228921592235565, "learning_rate": 0.0005, "loss": 2.1384, "step": 43440 }, { "epoch": 0.1653814239930574, "grad_norm": 0.1330287605524063, "learning_rate": 0.0005, "loss": 2.1243, "step": 43450 }, { "epoch": 0.1654194864611801, "grad_norm": 0.13445092737674713, "learning_rate": 0.0005, "loss": 2.1311, "step": 43460 }, { "epoch": 0.16545754892930278, "grad_norm": 0.11717677861452103, "learning_rate": 0.0005, "loss": 2.1301, "step": 43470 }, { "epoch": 0.16549561139742547, "grad_norm": 0.14144597947597504, "learning_rate": 0.0005, "loss": 2.1302, "step": 43480 }, { "epoch": 0.16553367386554813, "grad_norm": 0.12285357713699341, "learning_rate": 0.0005, "loss": 2.15, "step": 43490 }, { "epoch": 0.1655717363336708, "grad_norm": 0.15100809931755066, "learning_rate": 0.0005, "loss": 2.14, "step": 43500 }, { "epoch": 0.1656097988017935, "grad_norm": 0.12419594079256058, "learning_rate": 0.0005, "loss": 2.1347, "step": 43510 }, { "epoch": 0.16564786126991618, "grad_norm": 0.12664470076560974, "learning_rate": 0.0005, "loss": 2.1329, "step": 43520 }, { "epoch": 0.16568592373803886, "grad_norm": 0.11580910533666611, "learning_rate": 0.0005, "loss": 2.132, "step": 43530 }, { "epoch": 0.16572398620616155, "grad_norm": 0.12186925858259201, "learning_rate": 0.0005, "loss": 2.1377, "step": 43540 }, { "epoch": 0.16576204867428423, "grad_norm": 0.10877561569213867, "learning_rate": 0.0005, "loss": 2.1281, "step": 43550 }, { "epoch": 0.16580011114240692, "grad_norm": 0.13248339295387268, "learning_rate": 0.0005, "loss": 2.1301, "step": 43560 }, { "epoch": 0.1658381736105296, "grad_norm": 0.14807666838169098, "learning_rate": 0.0005, "loss": 2.1402, "step": 43570 }, { "epoch": 0.1658762360786523, "grad_norm": 0.12139953672885895, "learning_rate": 0.0005, "loss": 2.1486, "step": 43580 }, { "epoch": 0.16591429854677497, "grad_norm": 0.12874317169189453, "learning_rate": 0.0005, "loss": 2.1366, "step": 43590 }, { "epoch": 0.16595236101489766, "grad_norm": 0.1149357259273529, "learning_rate": 0.0005, "loss": 2.1296, "step": 43600 }, { "epoch": 0.16599042348302034, "grad_norm": 0.12108978629112244, "learning_rate": 0.0005, "loss": 2.1266, "step": 43610 }, { "epoch": 0.16602848595114303, "grad_norm": 0.12273205071687698, "learning_rate": 0.0005, "loss": 2.1369, "step": 43620 }, { "epoch": 0.16606654841926569, "grad_norm": 0.11966592818498611, "learning_rate": 0.0005, "loss": 2.1321, "step": 43630 }, { "epoch": 0.16610461088738837, "grad_norm": 0.11612257361412048, "learning_rate": 0.0005, "loss": 2.1283, "step": 43640 }, { "epoch": 0.16614267335551106, "grad_norm": 0.12440352886915207, "learning_rate": 0.0005, "loss": 2.1387, "step": 43650 }, { "epoch": 0.16618073582363374, "grad_norm": 0.11509761214256287, "learning_rate": 0.0005, "loss": 2.1577, "step": 43660 }, { "epoch": 0.16621879829175643, "grad_norm": 0.11704332381486893, "learning_rate": 0.0005, "loss": 2.1344, "step": 43670 }, { "epoch": 0.1662568607598791, "grad_norm": 0.12189175188541412, "learning_rate": 0.0005, "loss": 2.1428, "step": 43680 }, { "epoch": 0.1662949232280018, "grad_norm": 0.11538931727409363, "learning_rate": 0.0005, "loss": 2.1462, "step": 43690 }, { "epoch": 0.16633298569612448, "grad_norm": 0.12895295023918152, "learning_rate": 0.0005, "loss": 2.117, "step": 43700 }, { "epoch": 0.16637104816424717, "grad_norm": 0.11244889348745346, "learning_rate": 0.0005, "loss": 2.1317, "step": 43710 }, { "epoch": 0.16640911063236985, "grad_norm": 0.13465918600559235, "learning_rate": 0.0005, "loss": 2.1315, "step": 43720 }, { "epoch": 0.16644717310049253, "grad_norm": 0.13419680297374725, "learning_rate": 0.0005, "loss": 2.1264, "step": 43730 }, { "epoch": 0.16648523556861522, "grad_norm": 0.12253239750862122, "learning_rate": 0.0005, "loss": 2.139, "step": 43740 }, { "epoch": 0.1665232980367379, "grad_norm": 0.1258445829153061, "learning_rate": 0.0005, "loss": 2.1287, "step": 43750 }, { "epoch": 0.1665613605048606, "grad_norm": 0.1273617297410965, "learning_rate": 0.0005, "loss": 2.1495, "step": 43760 }, { "epoch": 0.16659942297298325, "grad_norm": 0.13250529766082764, "learning_rate": 0.0005, "loss": 2.1181, "step": 43770 }, { "epoch": 0.16663748544110593, "grad_norm": 0.1205471083521843, "learning_rate": 0.0005, "loss": 2.1519, "step": 43780 }, { "epoch": 0.16667554790922862, "grad_norm": 0.11968290060758591, "learning_rate": 0.0005, "loss": 2.1288, "step": 43790 }, { "epoch": 0.1667136103773513, "grad_norm": 0.137216255068779, "learning_rate": 0.0005, "loss": 2.1372, "step": 43800 }, { "epoch": 0.16675167284547399, "grad_norm": 0.13451290130615234, "learning_rate": 0.0005, "loss": 2.1148, "step": 43810 }, { "epoch": 0.16678973531359667, "grad_norm": 0.12728510797023773, "learning_rate": 0.0005, "loss": 2.1488, "step": 43820 }, { "epoch": 0.16682779778171936, "grad_norm": 0.116569884121418, "learning_rate": 0.0005, "loss": 2.1295, "step": 43830 }, { "epoch": 0.16686586024984204, "grad_norm": 0.11471624672412872, "learning_rate": 0.0005, "loss": 2.1403, "step": 43840 }, { "epoch": 0.16690392271796473, "grad_norm": 0.12343640625476837, "learning_rate": 0.0005, "loss": 2.1411, "step": 43850 }, { "epoch": 0.1669419851860874, "grad_norm": 0.11519220471382141, "learning_rate": 0.0005, "loss": 2.1291, "step": 43860 }, { "epoch": 0.1669800476542101, "grad_norm": 0.1342388391494751, "learning_rate": 0.0005, "loss": 2.1344, "step": 43870 }, { "epoch": 0.16701811012233278, "grad_norm": 0.12278634309768677, "learning_rate": 0.0005, "loss": 2.1421, "step": 43880 }, { "epoch": 0.16705617259045547, "grad_norm": 0.13932234048843384, "learning_rate": 0.0005, "loss": 2.1486, "step": 43890 }, { "epoch": 0.16709423505857815, "grad_norm": 0.12469235062599182, "learning_rate": 0.0005, "loss": 2.1337, "step": 43900 }, { "epoch": 0.16713229752670083, "grad_norm": 0.12053560465574265, "learning_rate": 0.0005, "loss": 2.1315, "step": 43910 }, { "epoch": 0.1671703599948235, "grad_norm": 0.12285034358501434, "learning_rate": 0.0005, "loss": 2.1542, "step": 43920 }, { "epoch": 0.16720842246294618, "grad_norm": 0.12460073083639145, "learning_rate": 0.0005, "loss": 2.144, "step": 43930 }, { "epoch": 0.16724648493106886, "grad_norm": 0.11997386068105698, "learning_rate": 0.0005, "loss": 2.1302, "step": 43940 }, { "epoch": 0.16728454739919155, "grad_norm": 0.1238400787115097, "learning_rate": 0.0005, "loss": 2.1395, "step": 43950 }, { "epoch": 0.16732260986731423, "grad_norm": 0.11750750243663788, "learning_rate": 0.0005, "loss": 2.1334, "step": 43960 }, { "epoch": 0.16736067233543692, "grad_norm": 0.1257324367761612, "learning_rate": 0.0005, "loss": 2.1358, "step": 43970 }, { "epoch": 0.1673987348035596, "grad_norm": 0.11816301196813583, "learning_rate": 0.0005, "loss": 2.1323, "step": 43980 }, { "epoch": 0.16743679727168229, "grad_norm": 0.1174677386879921, "learning_rate": 0.0005, "loss": 2.1321, "step": 43990 }, { "epoch": 0.16747485973980497, "grad_norm": 0.11954568326473236, "learning_rate": 0.0005, "loss": 2.1301, "step": 44000 }, { "epoch": 0.16751292220792766, "grad_norm": 0.12409286946058273, "learning_rate": 0.0005, "loss": 2.1306, "step": 44010 }, { "epoch": 0.16755098467605034, "grad_norm": 0.1352391242980957, "learning_rate": 0.0005, "loss": 2.1336, "step": 44020 }, { "epoch": 0.16758904714417303, "grad_norm": 0.1292494535446167, "learning_rate": 0.0005, "loss": 2.1324, "step": 44030 }, { "epoch": 0.1676271096122957, "grad_norm": 0.13378708064556122, "learning_rate": 0.0005, "loss": 2.1413, "step": 44040 }, { "epoch": 0.1676651720804184, "grad_norm": 0.1259700208902359, "learning_rate": 0.0005, "loss": 2.1264, "step": 44050 }, { "epoch": 0.16770323454854105, "grad_norm": 0.12806585431098938, "learning_rate": 0.0005, "loss": 2.1464, "step": 44060 }, { "epoch": 0.16774129701666374, "grad_norm": 0.11934912949800491, "learning_rate": 0.0005, "loss": 2.1253, "step": 44070 }, { "epoch": 0.16777935948478642, "grad_norm": 0.1166028380393982, "learning_rate": 0.0005, "loss": 2.118, "step": 44080 }, { "epoch": 0.1678174219529091, "grad_norm": 0.11846883594989777, "learning_rate": 0.0005, "loss": 2.1328, "step": 44090 }, { "epoch": 0.1678554844210318, "grad_norm": 0.11931490898132324, "learning_rate": 0.0005, "loss": 2.1444, "step": 44100 }, { "epoch": 0.16789354688915448, "grad_norm": 0.11143594980239868, "learning_rate": 0.0005, "loss": 2.1291, "step": 44110 }, { "epoch": 0.16793160935727716, "grad_norm": 0.11396325379610062, "learning_rate": 0.0005, "loss": 2.1408, "step": 44120 }, { "epoch": 0.16796967182539985, "grad_norm": 0.1326463669538498, "learning_rate": 0.0005, "loss": 2.144, "step": 44130 }, { "epoch": 0.16800773429352253, "grad_norm": 0.13630157709121704, "learning_rate": 0.0005, "loss": 2.1293, "step": 44140 }, { "epoch": 0.16804579676164522, "grad_norm": 0.11827398091554642, "learning_rate": 0.0005, "loss": 2.1191, "step": 44150 }, { "epoch": 0.1680838592297679, "grad_norm": 0.12105869501829147, "learning_rate": 0.0005, "loss": 2.1418, "step": 44160 }, { "epoch": 0.1681219216978906, "grad_norm": 0.13034142553806305, "learning_rate": 0.0005, "loss": 2.1427, "step": 44170 }, { "epoch": 0.16815998416601327, "grad_norm": 0.1371888369321823, "learning_rate": 0.0005, "loss": 2.1156, "step": 44180 }, { "epoch": 0.16819804663413596, "grad_norm": 0.11673656851053238, "learning_rate": 0.0005, "loss": 2.1203, "step": 44190 }, { "epoch": 0.1682361091022586, "grad_norm": 0.1259164661169052, "learning_rate": 0.0005, "loss": 2.1389, "step": 44200 }, { "epoch": 0.1682741715703813, "grad_norm": 0.12241323292255402, "learning_rate": 0.0005, "loss": 2.13, "step": 44210 }, { "epoch": 0.16831223403850398, "grad_norm": 0.12541623413562775, "learning_rate": 0.0005, "loss": 2.1258, "step": 44220 }, { "epoch": 0.16835029650662667, "grad_norm": 0.12463592737913132, "learning_rate": 0.0005, "loss": 2.1469, "step": 44230 }, { "epoch": 0.16838835897474935, "grad_norm": 0.11558257043361664, "learning_rate": 0.0005, "loss": 2.1306, "step": 44240 }, { "epoch": 0.16842642144287204, "grad_norm": 0.12510859966278076, "learning_rate": 0.0005, "loss": 2.1301, "step": 44250 }, { "epoch": 0.16846448391099472, "grad_norm": 0.12276134639978409, "learning_rate": 0.0005, "loss": 2.1231, "step": 44260 }, { "epoch": 0.1685025463791174, "grad_norm": 0.1275489777326584, "learning_rate": 0.0005, "loss": 2.1204, "step": 44270 }, { "epoch": 0.1685406088472401, "grad_norm": 0.12813347578048706, "learning_rate": 0.0005, "loss": 2.1423, "step": 44280 }, { "epoch": 0.16857867131536278, "grad_norm": 0.1250738650560379, "learning_rate": 0.0005, "loss": 2.138, "step": 44290 }, { "epoch": 0.16861673378348546, "grad_norm": 0.11898188292980194, "learning_rate": 0.0005, "loss": 2.1243, "step": 44300 }, { "epoch": 0.16865479625160815, "grad_norm": 0.14133501052856445, "learning_rate": 0.0005, "loss": 2.1398, "step": 44310 }, { "epoch": 0.16869285871973083, "grad_norm": 0.12976321578025818, "learning_rate": 0.0005, "loss": 2.1357, "step": 44320 }, { "epoch": 0.16873092118785352, "grad_norm": 0.12245601415634155, "learning_rate": 0.0005, "loss": 2.1542, "step": 44330 }, { "epoch": 0.1687689836559762, "grad_norm": 0.12810446321964264, "learning_rate": 0.0005, "loss": 2.1358, "step": 44340 }, { "epoch": 0.16880704612409886, "grad_norm": 0.16562506556510925, "learning_rate": 0.0005, "loss": 2.1426, "step": 44350 }, { "epoch": 0.16884510859222154, "grad_norm": 0.12520432472229004, "learning_rate": 0.0005, "loss": 2.141, "step": 44360 }, { "epoch": 0.16888317106034423, "grad_norm": 0.14297902584075928, "learning_rate": 0.0005, "loss": 2.152, "step": 44370 }, { "epoch": 0.1689212335284669, "grad_norm": 0.12294340133666992, "learning_rate": 0.0005, "loss": 2.1344, "step": 44380 }, { "epoch": 0.1689592959965896, "grad_norm": 0.1253482699394226, "learning_rate": 0.0005, "loss": 2.1391, "step": 44390 }, { "epoch": 0.16899735846471228, "grad_norm": 0.12899786233901978, "learning_rate": 0.0005, "loss": 2.1303, "step": 44400 }, { "epoch": 0.16903542093283497, "grad_norm": 0.12162794172763824, "learning_rate": 0.0005, "loss": 2.1418, "step": 44410 }, { "epoch": 0.16907348340095765, "grad_norm": 0.12179070711135864, "learning_rate": 0.0005, "loss": 2.1421, "step": 44420 }, { "epoch": 0.16911154586908034, "grad_norm": 0.11872969567775726, "learning_rate": 0.0005, "loss": 2.1258, "step": 44430 }, { "epoch": 0.16914960833720302, "grad_norm": 0.11570776998996735, "learning_rate": 0.0005, "loss": 2.1306, "step": 44440 }, { "epoch": 0.1691876708053257, "grad_norm": 0.18721316754817963, "learning_rate": 0.0005, "loss": 2.1219, "step": 44450 }, { "epoch": 0.1692257332734484, "grad_norm": 0.11968997120857239, "learning_rate": 0.0005, "loss": 2.144, "step": 44460 }, { "epoch": 0.16926379574157108, "grad_norm": 0.1322961300611496, "learning_rate": 0.0005, "loss": 2.1262, "step": 44470 }, { "epoch": 0.16930185820969376, "grad_norm": 0.11864107102155685, "learning_rate": 0.0005, "loss": 2.1397, "step": 44480 }, { "epoch": 0.16933992067781642, "grad_norm": 0.133161261677742, "learning_rate": 0.0005, "loss": 2.1296, "step": 44490 }, { "epoch": 0.1693779831459391, "grad_norm": 0.12709060311317444, "learning_rate": 0.0005, "loss": 2.1284, "step": 44500 }, { "epoch": 0.1694160456140618, "grad_norm": 0.127763032913208, "learning_rate": 0.0005, "loss": 2.1342, "step": 44510 }, { "epoch": 0.16945410808218447, "grad_norm": 0.1264316290616989, "learning_rate": 0.0005, "loss": 2.1366, "step": 44520 }, { "epoch": 0.16949217055030716, "grad_norm": 0.1268509477376938, "learning_rate": 0.0005, "loss": 2.148, "step": 44530 }, { "epoch": 0.16953023301842984, "grad_norm": 0.12328742444515228, "learning_rate": 0.0005, "loss": 2.1357, "step": 44540 }, { "epoch": 0.16956829548655253, "grad_norm": 0.1354341059923172, "learning_rate": 0.0005, "loss": 2.1344, "step": 44550 }, { "epoch": 0.1696063579546752, "grad_norm": 0.11687562614679337, "learning_rate": 0.0005, "loss": 2.1452, "step": 44560 }, { "epoch": 0.1696444204227979, "grad_norm": 0.11326216906309128, "learning_rate": 0.0005, "loss": 2.1333, "step": 44570 }, { "epoch": 0.16968248289092058, "grad_norm": 0.14775125682353973, "learning_rate": 0.0005, "loss": 2.1436, "step": 44580 }, { "epoch": 0.16972054535904327, "grad_norm": 0.13913802802562714, "learning_rate": 0.0005, "loss": 2.1312, "step": 44590 }, { "epoch": 0.16975860782716595, "grad_norm": 0.11591693758964539, "learning_rate": 0.0005, "loss": 2.135, "step": 44600 }, { "epoch": 0.16979667029528864, "grad_norm": 0.11765572428703308, "learning_rate": 0.0005, "loss": 2.1229, "step": 44610 }, { "epoch": 0.16983473276341132, "grad_norm": 0.1285104751586914, "learning_rate": 0.0005, "loss": 2.1316, "step": 44620 }, { "epoch": 0.169872795231534, "grad_norm": 0.13289599120616913, "learning_rate": 0.0005, "loss": 2.124, "step": 44630 }, { "epoch": 0.16991085769965666, "grad_norm": 0.11635494977235794, "learning_rate": 0.0005, "loss": 2.1299, "step": 44640 }, { "epoch": 0.16994892016777935, "grad_norm": 0.12370641529560089, "learning_rate": 0.0005, "loss": 2.1262, "step": 44650 }, { "epoch": 0.16998698263590203, "grad_norm": 0.11813714355230331, "learning_rate": 0.0005, "loss": 2.1361, "step": 44660 }, { "epoch": 0.17002504510402472, "grad_norm": 0.1225036233663559, "learning_rate": 0.0005, "loss": 2.1411, "step": 44670 }, { "epoch": 0.1700631075721474, "grad_norm": 0.12147258967161179, "learning_rate": 0.0005, "loss": 2.1286, "step": 44680 }, { "epoch": 0.1701011700402701, "grad_norm": 0.12780791521072388, "learning_rate": 0.0005, "loss": 2.1393, "step": 44690 }, { "epoch": 0.17013923250839277, "grad_norm": 0.12251202017068863, "learning_rate": 0.0005, "loss": 2.1386, "step": 44700 }, { "epoch": 0.17017729497651546, "grad_norm": 0.12178204953670502, "learning_rate": 0.0005, "loss": 2.1333, "step": 44710 }, { "epoch": 0.17021535744463814, "grad_norm": 0.10848259180784225, "learning_rate": 0.0005, "loss": 2.1381, "step": 44720 }, { "epoch": 0.17025341991276083, "grad_norm": 0.1275780349969864, "learning_rate": 0.0005, "loss": 2.1373, "step": 44730 }, { "epoch": 0.1702914823808835, "grad_norm": 0.12610189616680145, "learning_rate": 0.0005, "loss": 2.1334, "step": 44740 }, { "epoch": 0.1703295448490062, "grad_norm": 0.15247602760791779, "learning_rate": 0.0005, "loss": 2.1188, "step": 44750 }, { "epoch": 0.17036760731712888, "grad_norm": 0.1426989883184433, "learning_rate": 0.0005, "loss": 2.1514, "step": 44760 }, { "epoch": 0.17040566978525157, "grad_norm": 0.13441897928714752, "learning_rate": 0.0005, "loss": 2.1321, "step": 44770 }, { "epoch": 0.17044373225337422, "grad_norm": 0.12179271876811981, "learning_rate": 0.0005, "loss": 2.1364, "step": 44780 }, { "epoch": 0.1704817947214969, "grad_norm": 0.11661059409379959, "learning_rate": 0.0005, "loss": 2.136, "step": 44790 }, { "epoch": 0.1705198571896196, "grad_norm": 0.12268161028623581, "learning_rate": 0.0005, "loss": 2.1197, "step": 44800 }, { "epoch": 0.17055791965774228, "grad_norm": 0.1112898513674736, "learning_rate": 0.0005, "loss": 2.1271, "step": 44810 }, { "epoch": 0.17059598212586496, "grad_norm": 0.1266551911830902, "learning_rate": 0.0005, "loss": 2.1434, "step": 44820 }, { "epoch": 0.17063404459398765, "grad_norm": 0.13600681722164154, "learning_rate": 0.0005, "loss": 2.1348, "step": 44830 }, { "epoch": 0.17067210706211033, "grad_norm": 0.12621985375881195, "learning_rate": 0.0005, "loss": 2.137, "step": 44840 }, { "epoch": 0.17071016953023302, "grad_norm": 0.1442611813545227, "learning_rate": 0.0005, "loss": 2.1362, "step": 44850 }, { "epoch": 0.1707482319983557, "grad_norm": 0.12562938034534454, "learning_rate": 0.0005, "loss": 2.1399, "step": 44860 }, { "epoch": 0.1707862944664784, "grad_norm": 0.14552602171897888, "learning_rate": 0.0005, "loss": 2.1199, "step": 44870 }, { "epoch": 0.17082435693460107, "grad_norm": 0.12736286222934723, "learning_rate": 0.0005, "loss": 2.1379, "step": 44880 }, { "epoch": 0.17086241940272376, "grad_norm": 0.13560928404331207, "learning_rate": 0.0005, "loss": 2.1301, "step": 44890 }, { "epoch": 0.17090048187084644, "grad_norm": 0.13860104978084564, "learning_rate": 0.0005, "loss": 2.1345, "step": 44900 }, { "epoch": 0.17093854433896913, "grad_norm": 0.11921971291303635, "learning_rate": 0.0005, "loss": 2.1224, "step": 44910 }, { "epoch": 0.17097660680709179, "grad_norm": 0.13215811550617218, "learning_rate": 0.0005, "loss": 2.1476, "step": 44920 }, { "epoch": 0.17101466927521447, "grad_norm": 0.1387583315372467, "learning_rate": 0.0005, "loss": 2.158, "step": 44930 }, { "epoch": 0.17105273174333716, "grad_norm": 0.12246926128864288, "learning_rate": 0.0005, "loss": 2.1256, "step": 44940 }, { "epoch": 0.17109079421145984, "grad_norm": 0.11858603358268738, "learning_rate": 0.0005, "loss": 2.1354, "step": 44950 }, { "epoch": 0.17112885667958253, "grad_norm": 0.12365826219320297, "learning_rate": 0.0005, "loss": 2.1223, "step": 44960 }, { "epoch": 0.1711669191477052, "grad_norm": 0.13618750870227814, "learning_rate": 0.0005, "loss": 2.1262, "step": 44970 }, { "epoch": 0.1712049816158279, "grad_norm": 0.12615805864334106, "learning_rate": 0.0005, "loss": 2.1301, "step": 44980 }, { "epoch": 0.17124304408395058, "grad_norm": 0.12646010518074036, "learning_rate": 0.0005, "loss": 2.1367, "step": 44990 }, { "epoch": 0.17128110655207326, "grad_norm": 0.13197092711925507, "learning_rate": 0.0005, "loss": 2.1408, "step": 45000 }, { "epoch": 0.17131916902019595, "grad_norm": 0.12390444427728653, "learning_rate": 0.0005, "loss": 2.1226, "step": 45010 }, { "epoch": 0.17135723148831863, "grad_norm": 0.11571706086397171, "learning_rate": 0.0005, "loss": 2.1286, "step": 45020 }, { "epoch": 0.17139529395644132, "grad_norm": 0.11823263764381409, "learning_rate": 0.0005, "loss": 2.1286, "step": 45030 }, { "epoch": 0.171433356424564, "grad_norm": 0.13923102617263794, "learning_rate": 0.0005, "loss": 2.1382, "step": 45040 }, { "epoch": 0.1714714188926867, "grad_norm": 0.12148989737033844, "learning_rate": 0.0005, "loss": 2.1339, "step": 45050 }, { "epoch": 0.17150948136080937, "grad_norm": 0.11793144047260284, "learning_rate": 0.0005, "loss": 2.1431, "step": 45060 }, { "epoch": 0.17154754382893203, "grad_norm": 0.1269650161266327, "learning_rate": 0.0005, "loss": 2.1187, "step": 45070 }, { "epoch": 0.17158560629705472, "grad_norm": 0.12455953657627106, "learning_rate": 0.0005, "loss": 2.1225, "step": 45080 }, { "epoch": 0.1716236687651774, "grad_norm": 0.1227991133928299, "learning_rate": 0.0005, "loss": 2.1435, "step": 45090 }, { "epoch": 0.17166173123330009, "grad_norm": 0.12377261370420456, "learning_rate": 0.0005, "loss": 2.1232, "step": 45100 }, { "epoch": 0.17169979370142277, "grad_norm": 0.12025581300258636, "learning_rate": 0.0005, "loss": 2.1338, "step": 45110 }, { "epoch": 0.17173785616954546, "grad_norm": 0.12686417996883392, "learning_rate": 0.0005, "loss": 2.1358, "step": 45120 }, { "epoch": 0.17177591863766814, "grad_norm": 0.12684020400047302, "learning_rate": 0.0005, "loss": 2.1362, "step": 45130 }, { "epoch": 0.17181398110579083, "grad_norm": 0.12716318666934967, "learning_rate": 0.0005, "loss": 2.1213, "step": 45140 }, { "epoch": 0.1718520435739135, "grad_norm": 0.11402035504579544, "learning_rate": 0.0005, "loss": 2.1261, "step": 45150 }, { "epoch": 0.1718901060420362, "grad_norm": 0.11905350536108017, "learning_rate": 0.0005, "loss": 2.1298, "step": 45160 }, { "epoch": 0.17192816851015888, "grad_norm": 0.12106286734342575, "learning_rate": 0.0005, "loss": 2.1322, "step": 45170 }, { "epoch": 0.17196623097828156, "grad_norm": 0.12988130748271942, "learning_rate": 0.0005, "loss": 2.1386, "step": 45180 }, { "epoch": 0.17200429344640425, "grad_norm": 0.13417784869670868, "learning_rate": 0.0005, "loss": 2.135, "step": 45190 }, { "epoch": 0.17204235591452693, "grad_norm": 0.1289902925491333, "learning_rate": 0.0005, "loss": 2.1431, "step": 45200 }, { "epoch": 0.1720804183826496, "grad_norm": 0.1163710206747055, "learning_rate": 0.0005, "loss": 2.1415, "step": 45210 }, { "epoch": 0.17211848085077228, "grad_norm": 0.15872104465961456, "learning_rate": 0.0005, "loss": 2.1359, "step": 45220 }, { "epoch": 0.17215654331889496, "grad_norm": 0.11624854803085327, "learning_rate": 0.0005, "loss": 2.1234, "step": 45230 }, { "epoch": 0.17219460578701765, "grad_norm": 0.11021952331066132, "learning_rate": 0.0005, "loss": 2.1329, "step": 45240 }, { "epoch": 0.17223266825514033, "grad_norm": 0.11777684092521667, "learning_rate": 0.0005, "loss": 2.1176, "step": 45250 }, { "epoch": 0.17227073072326302, "grad_norm": 0.1267765313386917, "learning_rate": 0.0005, "loss": 2.1361, "step": 45260 }, { "epoch": 0.1723087931913857, "grad_norm": 0.12344861775636673, "learning_rate": 0.0005, "loss": 2.1502, "step": 45270 }, { "epoch": 0.17234685565950839, "grad_norm": 0.1236143633723259, "learning_rate": 0.0005, "loss": 2.1449, "step": 45280 }, { "epoch": 0.17238491812763107, "grad_norm": 0.12572093307971954, "learning_rate": 0.0005, "loss": 2.145, "step": 45290 }, { "epoch": 0.17242298059575376, "grad_norm": 0.13047395646572113, "learning_rate": 0.0005, "loss": 2.1361, "step": 45300 }, { "epoch": 0.17246104306387644, "grad_norm": 0.12851989269256592, "learning_rate": 0.0005, "loss": 2.133, "step": 45310 }, { "epoch": 0.17249910553199913, "grad_norm": 0.1399923413991928, "learning_rate": 0.0005, "loss": 2.1171, "step": 45320 }, { "epoch": 0.1725371680001218, "grad_norm": 0.12635745108127594, "learning_rate": 0.0005, "loss": 2.1207, "step": 45330 }, { "epoch": 0.1725752304682445, "grad_norm": 0.1274142563343048, "learning_rate": 0.0005, "loss": 2.1479, "step": 45340 }, { "epoch": 0.17261329293636715, "grad_norm": 0.13066533207893372, "learning_rate": 0.0005, "loss": 2.1425, "step": 45350 }, { "epoch": 0.17265135540448984, "grad_norm": 0.11880338191986084, "learning_rate": 0.0005, "loss": 2.1295, "step": 45360 }, { "epoch": 0.17268941787261252, "grad_norm": 0.11296599358320236, "learning_rate": 0.0005, "loss": 2.1236, "step": 45370 }, { "epoch": 0.1727274803407352, "grad_norm": 0.11921131610870361, "learning_rate": 0.0005, "loss": 2.1297, "step": 45380 }, { "epoch": 0.1727655428088579, "grad_norm": 0.12866875529289246, "learning_rate": 0.0005, "loss": 2.1237, "step": 45390 }, { "epoch": 0.17280360527698058, "grad_norm": 0.12386047840118408, "learning_rate": 0.0005, "loss": 2.1448, "step": 45400 }, { "epoch": 0.17284166774510326, "grad_norm": 0.1407991200685501, "learning_rate": 0.0005, "loss": 2.1419, "step": 45410 }, { "epoch": 0.17287973021322595, "grad_norm": 0.11899222433567047, "learning_rate": 0.0005, "loss": 2.1391, "step": 45420 }, { "epoch": 0.17291779268134863, "grad_norm": 0.13968820869922638, "learning_rate": 0.0005, "loss": 2.12, "step": 45430 }, { "epoch": 0.17295585514947132, "grad_norm": 0.12605208158493042, "learning_rate": 0.0005, "loss": 2.1438, "step": 45440 }, { "epoch": 0.172993917617594, "grad_norm": 0.11998526006937027, "learning_rate": 0.0005, "loss": 2.136, "step": 45450 }, { "epoch": 0.17303198008571669, "grad_norm": 0.11852370202541351, "learning_rate": 0.0005, "loss": 2.1269, "step": 45460 }, { "epoch": 0.17307004255383937, "grad_norm": 0.12640227377414703, "learning_rate": 0.0005, "loss": 2.1275, "step": 45470 }, { "epoch": 0.17310810502196206, "grad_norm": 0.12370039522647858, "learning_rate": 0.0005, "loss": 2.1384, "step": 45480 }, { "epoch": 0.17314616749008474, "grad_norm": 0.1274411529302597, "learning_rate": 0.0005, "loss": 2.1356, "step": 45490 }, { "epoch": 0.1731842299582074, "grad_norm": 0.11666889488697052, "learning_rate": 0.0005, "loss": 2.1281, "step": 45500 }, { "epoch": 0.17322229242633008, "grad_norm": 0.13356836140155792, "learning_rate": 0.0005, "loss": 2.1286, "step": 45510 }, { "epoch": 0.17326035489445277, "grad_norm": 0.12142832577228546, "learning_rate": 0.0005, "loss": 2.1327, "step": 45520 }, { "epoch": 0.17329841736257545, "grad_norm": 0.12746329605579376, "learning_rate": 0.0005, "loss": 2.1405, "step": 45530 }, { "epoch": 0.17333647983069814, "grad_norm": 0.1255599856376648, "learning_rate": 0.0005, "loss": 2.1256, "step": 45540 }, { "epoch": 0.17337454229882082, "grad_norm": 0.12974071502685547, "learning_rate": 0.0005, "loss": 2.1362, "step": 45550 }, { "epoch": 0.1734126047669435, "grad_norm": 0.13310503959655762, "learning_rate": 0.0005, "loss": 2.1148, "step": 45560 }, { "epoch": 0.1734506672350662, "grad_norm": 0.1318385899066925, "learning_rate": 0.0005, "loss": 2.1589, "step": 45570 }, { "epoch": 0.17348872970318888, "grad_norm": 0.1205783486366272, "learning_rate": 0.0005, "loss": 2.1376, "step": 45580 }, { "epoch": 0.17352679217131156, "grad_norm": 0.12158027291297913, "learning_rate": 0.0005, "loss": 2.1415, "step": 45590 }, { "epoch": 0.17356485463943425, "grad_norm": 0.12061561644077301, "learning_rate": 0.0005, "loss": 2.1371, "step": 45600 }, { "epoch": 0.17360291710755693, "grad_norm": 0.11390230059623718, "learning_rate": 0.0005, "loss": 2.1269, "step": 45610 }, { "epoch": 0.17364097957567962, "grad_norm": 0.12488873302936554, "learning_rate": 0.0005, "loss": 2.1326, "step": 45620 }, { "epoch": 0.1736790420438023, "grad_norm": 0.12872549891471863, "learning_rate": 0.0005, "loss": 2.1345, "step": 45630 }, { "epoch": 0.17371710451192496, "grad_norm": 0.13098588585853577, "learning_rate": 0.0005, "loss": 2.1439, "step": 45640 }, { "epoch": 0.17375516698004764, "grad_norm": 0.13178899884223938, "learning_rate": 0.0005, "loss": 2.1405, "step": 45650 }, { "epoch": 0.17379322944817033, "grad_norm": 0.12277550995349884, "learning_rate": 0.0005, "loss": 2.1317, "step": 45660 }, { "epoch": 0.173831291916293, "grad_norm": 0.1527359038591385, "learning_rate": 0.0005, "loss": 2.1367, "step": 45670 }, { "epoch": 0.1738693543844157, "grad_norm": 0.12647026777267456, "learning_rate": 0.0005, "loss": 2.1314, "step": 45680 }, { "epoch": 0.17390741685253838, "grad_norm": 0.13957686722278595, "learning_rate": 0.0005, "loss": 2.1417, "step": 45690 }, { "epoch": 0.17394547932066107, "grad_norm": 0.13181020319461823, "learning_rate": 0.0005, "loss": 2.1246, "step": 45700 }, { "epoch": 0.17398354178878375, "grad_norm": 0.12165477126836777, "learning_rate": 0.0005, "loss": 2.1303, "step": 45710 }, { "epoch": 0.17402160425690644, "grad_norm": 0.1272124946117401, "learning_rate": 0.0005, "loss": 2.1386, "step": 45720 }, { "epoch": 0.17405966672502912, "grad_norm": 0.1310453861951828, "learning_rate": 0.0005, "loss": 2.1299, "step": 45730 }, { "epoch": 0.1740977291931518, "grad_norm": 0.1178453266620636, "learning_rate": 0.0005, "loss": 2.1266, "step": 45740 }, { "epoch": 0.1741357916612745, "grad_norm": 0.12335092574357986, "learning_rate": 0.0005, "loss": 2.1363, "step": 45750 }, { "epoch": 0.17417385412939718, "grad_norm": 0.13877379894256592, "learning_rate": 0.0005, "loss": 2.1414, "step": 45760 }, { "epoch": 0.17421191659751986, "grad_norm": 0.11673180013895035, "learning_rate": 0.0005, "loss": 2.149, "step": 45770 }, { "epoch": 0.17424997906564255, "grad_norm": 0.13545605540275574, "learning_rate": 0.0005, "loss": 2.1236, "step": 45780 }, { "epoch": 0.1742880415337652, "grad_norm": 0.124485544860363, "learning_rate": 0.0005, "loss": 2.1379, "step": 45790 }, { "epoch": 0.1743261040018879, "grad_norm": 0.12190944701433182, "learning_rate": 0.0005, "loss": 2.1385, "step": 45800 }, { "epoch": 0.17436416647001057, "grad_norm": 0.12852436304092407, "learning_rate": 0.0005, "loss": 2.1468, "step": 45810 }, { "epoch": 0.17440222893813326, "grad_norm": 0.12599514424800873, "learning_rate": 0.0005, "loss": 2.1411, "step": 45820 }, { "epoch": 0.17444029140625594, "grad_norm": 0.11554643511772156, "learning_rate": 0.0005, "loss": 2.1165, "step": 45830 }, { "epoch": 0.17447835387437863, "grad_norm": 0.12082778662443161, "learning_rate": 0.0005, "loss": 2.1505, "step": 45840 }, { "epoch": 0.1745164163425013, "grad_norm": 0.12294019758701324, "learning_rate": 0.0005, "loss": 2.1333, "step": 45850 }, { "epoch": 0.174554478810624, "grad_norm": 0.12255796045064926, "learning_rate": 0.0005, "loss": 2.138, "step": 45860 }, { "epoch": 0.17459254127874668, "grad_norm": 0.1204526275396347, "learning_rate": 0.0005, "loss": 2.1433, "step": 45870 }, { "epoch": 0.17463060374686937, "grad_norm": 0.124061718583107, "learning_rate": 0.0005, "loss": 2.1351, "step": 45880 }, { "epoch": 0.17466866621499205, "grad_norm": 0.11348722875118256, "learning_rate": 0.0005, "loss": 2.1273, "step": 45890 }, { "epoch": 0.17470672868311474, "grad_norm": 0.12603124976158142, "learning_rate": 0.0005, "loss": 2.1602, "step": 45900 }, { "epoch": 0.17474479115123742, "grad_norm": 0.12413591146469116, "learning_rate": 0.0005, "loss": 2.1414, "step": 45910 }, { "epoch": 0.1747828536193601, "grad_norm": 0.13132306933403015, "learning_rate": 0.0005, "loss": 2.1165, "step": 45920 }, { "epoch": 0.17482091608748276, "grad_norm": 0.12539812922477722, "learning_rate": 0.0005, "loss": 2.1352, "step": 45930 }, { "epoch": 0.17485897855560545, "grad_norm": 0.10985272377729416, "learning_rate": 0.0005, "loss": 2.1355, "step": 45940 }, { "epoch": 0.17489704102372813, "grad_norm": 0.1305789202451706, "learning_rate": 0.0005, "loss": 2.1402, "step": 45950 }, { "epoch": 0.17493510349185082, "grad_norm": 0.1933310478925705, "learning_rate": 0.0005, "loss": 2.1359, "step": 45960 }, { "epoch": 0.1749731659599735, "grad_norm": 0.12040173262357712, "learning_rate": 0.0005, "loss": 2.1337, "step": 45970 }, { "epoch": 0.1750112284280962, "grad_norm": 0.15249690413475037, "learning_rate": 0.0005, "loss": 2.1193, "step": 45980 }, { "epoch": 0.17504929089621887, "grad_norm": 0.1186014786362648, "learning_rate": 0.0005, "loss": 2.1273, "step": 45990 }, { "epoch": 0.17508735336434156, "grad_norm": 0.12150265276432037, "learning_rate": 0.0005, "loss": 2.1283, "step": 46000 }, { "epoch": 0.17512541583246424, "grad_norm": 0.1239115297794342, "learning_rate": 0.0005, "loss": 2.1376, "step": 46010 }, { "epoch": 0.17516347830058693, "grad_norm": 0.13498394191265106, "learning_rate": 0.0005, "loss": 2.1249, "step": 46020 }, { "epoch": 0.1752015407687096, "grad_norm": 0.12338341772556305, "learning_rate": 0.0005, "loss": 2.133, "step": 46030 }, { "epoch": 0.1752396032368323, "grad_norm": 0.1274867206811905, "learning_rate": 0.0005, "loss": 2.1252, "step": 46040 }, { "epoch": 0.17527766570495498, "grad_norm": 0.11397123336791992, "learning_rate": 0.0005, "loss": 2.131, "step": 46050 }, { "epoch": 0.17531572817307767, "grad_norm": 0.16468773782253265, "learning_rate": 0.0005, "loss": 2.1454, "step": 46060 }, { "epoch": 0.17535379064120032, "grad_norm": 0.11684907227754593, "learning_rate": 0.0005, "loss": 2.1331, "step": 46070 }, { "epoch": 0.175391853109323, "grad_norm": 0.13277249038219452, "learning_rate": 0.0005, "loss": 2.1373, "step": 46080 }, { "epoch": 0.1754299155774457, "grad_norm": 0.1336735486984253, "learning_rate": 0.0005, "loss": 2.1361, "step": 46090 }, { "epoch": 0.17546797804556838, "grad_norm": 0.1380987912416458, "learning_rate": 0.0005, "loss": 2.123, "step": 46100 }, { "epoch": 0.17550604051369106, "grad_norm": 0.12328807264566422, "learning_rate": 0.0005, "loss": 2.1491, "step": 46110 }, { "epoch": 0.17554410298181375, "grad_norm": 0.12212718278169632, "learning_rate": 0.0005, "loss": 2.1355, "step": 46120 }, { "epoch": 0.17558216544993643, "grad_norm": 0.11557463556528091, "learning_rate": 0.0005, "loss": 2.1232, "step": 46130 }, { "epoch": 0.17562022791805912, "grad_norm": 0.1220347136259079, "learning_rate": 0.0005, "loss": 2.1216, "step": 46140 }, { "epoch": 0.1756582903861818, "grad_norm": 0.13644815981388092, "learning_rate": 0.0005, "loss": 2.1149, "step": 46150 }, { "epoch": 0.1756963528543045, "grad_norm": 0.13459473848342896, "learning_rate": 0.0005, "loss": 2.1413, "step": 46160 }, { "epoch": 0.17573441532242717, "grad_norm": 0.12515737116336823, "learning_rate": 0.0005, "loss": 2.1341, "step": 46170 }, { "epoch": 0.17577247779054986, "grad_norm": 0.13208098709583282, "learning_rate": 0.0005, "loss": 2.1323, "step": 46180 }, { "epoch": 0.17581054025867254, "grad_norm": 0.11350484192371368, "learning_rate": 0.0005, "loss": 2.1289, "step": 46190 }, { "epoch": 0.17584860272679523, "grad_norm": 0.11895857751369476, "learning_rate": 0.0005, "loss": 2.1383, "step": 46200 }, { "epoch": 0.1758866651949179, "grad_norm": 0.11057837307453156, "learning_rate": 0.0005, "loss": 2.1325, "step": 46210 }, { "epoch": 0.17592472766304057, "grad_norm": 0.1252758502960205, "learning_rate": 0.0005, "loss": 2.1241, "step": 46220 }, { "epoch": 0.17596279013116325, "grad_norm": 0.12299270182847977, "learning_rate": 0.0005, "loss": 2.1371, "step": 46230 }, { "epoch": 0.17600085259928594, "grad_norm": 0.12673571705818176, "learning_rate": 0.0005, "loss": 2.1331, "step": 46240 }, { "epoch": 0.17603891506740862, "grad_norm": 0.10950964689254761, "learning_rate": 0.0005, "loss": 2.1413, "step": 46250 }, { "epoch": 0.1760769775355313, "grad_norm": 0.11511676013469696, "learning_rate": 0.0005, "loss": 2.1404, "step": 46260 }, { "epoch": 0.176115040003654, "grad_norm": 0.12869395315647125, "learning_rate": 0.0005, "loss": 2.1501, "step": 46270 }, { "epoch": 0.17615310247177668, "grad_norm": 0.14847025275230408, "learning_rate": 0.0005, "loss": 2.1413, "step": 46280 }, { "epoch": 0.17619116493989936, "grad_norm": 0.11274388432502747, "learning_rate": 0.0005, "loss": 2.1348, "step": 46290 }, { "epoch": 0.17622922740802205, "grad_norm": 0.12950894236564636, "learning_rate": 0.0005, "loss": 2.1367, "step": 46300 }, { "epoch": 0.17626728987614473, "grad_norm": 0.1113792359828949, "learning_rate": 0.0005, "loss": 2.1326, "step": 46310 }, { "epoch": 0.17630535234426742, "grad_norm": 0.15397079288959503, "learning_rate": 0.0005, "loss": 2.1355, "step": 46320 }, { "epoch": 0.1763434148123901, "grad_norm": 0.13755261898040771, "learning_rate": 0.0005, "loss": 2.1389, "step": 46330 }, { "epoch": 0.1763814772805128, "grad_norm": 0.11487016081809998, "learning_rate": 0.0005, "loss": 2.1406, "step": 46340 }, { "epoch": 0.17641953974863547, "grad_norm": 0.11901956796646118, "learning_rate": 0.0005, "loss": 2.1439, "step": 46350 }, { "epoch": 0.17645760221675813, "grad_norm": 0.12137291580438614, "learning_rate": 0.0005, "loss": 2.1573, "step": 46360 }, { "epoch": 0.17649566468488082, "grad_norm": 0.11371459066867828, "learning_rate": 0.0005, "loss": 2.1182, "step": 46370 }, { "epoch": 0.1765337271530035, "grad_norm": 0.1144208237528801, "learning_rate": 0.0005, "loss": 2.1343, "step": 46380 }, { "epoch": 0.17657178962112619, "grad_norm": 0.1280364841222763, "learning_rate": 0.0005, "loss": 2.1281, "step": 46390 }, { "epoch": 0.17660985208924887, "grad_norm": 0.12925398349761963, "learning_rate": 0.0005, "loss": 2.1186, "step": 46400 }, { "epoch": 0.17664791455737155, "grad_norm": 0.12857505679130554, "learning_rate": 0.0005, "loss": 2.1305, "step": 46410 }, { "epoch": 0.17668597702549424, "grad_norm": 0.13170984387397766, "learning_rate": 0.0005, "loss": 2.1437, "step": 46420 }, { "epoch": 0.17672403949361692, "grad_norm": 0.12619630992412567, "learning_rate": 0.0005, "loss": 2.1272, "step": 46430 }, { "epoch": 0.1767621019617396, "grad_norm": 0.14194762706756592, "learning_rate": 0.0005, "loss": 2.1354, "step": 46440 }, { "epoch": 0.1768001644298623, "grad_norm": 0.12867200374603271, "learning_rate": 0.0005, "loss": 2.1379, "step": 46450 }, { "epoch": 0.17683822689798498, "grad_norm": 0.12789608538150787, "learning_rate": 0.0005, "loss": 2.1308, "step": 46460 }, { "epoch": 0.17687628936610766, "grad_norm": 0.1245100274682045, "learning_rate": 0.0005, "loss": 2.1338, "step": 46470 }, { "epoch": 0.17691435183423035, "grad_norm": 0.15106020867824554, "learning_rate": 0.0005, "loss": 2.1558, "step": 46480 }, { "epoch": 0.17695241430235303, "grad_norm": 0.13041353225708008, "learning_rate": 0.0005, "loss": 2.1357, "step": 46490 }, { "epoch": 0.1769904767704757, "grad_norm": 0.13027580082416534, "learning_rate": 0.0005, "loss": 2.1385, "step": 46500 }, { "epoch": 0.17702853923859838, "grad_norm": 0.11549444496631622, "learning_rate": 0.0005, "loss": 2.1465, "step": 46510 }, { "epoch": 0.17706660170672106, "grad_norm": 0.13323059678077698, "learning_rate": 0.0005, "loss": 2.1233, "step": 46520 }, { "epoch": 0.17710466417484375, "grad_norm": 0.1289878934621811, "learning_rate": 0.0005, "loss": 2.1228, "step": 46530 }, { "epoch": 0.17714272664296643, "grad_norm": 0.13852861523628235, "learning_rate": 0.0005, "loss": 2.1216, "step": 46540 }, { "epoch": 0.17718078911108912, "grad_norm": 0.13921350240707397, "learning_rate": 0.0005, "loss": 2.1385, "step": 46550 }, { "epoch": 0.1772188515792118, "grad_norm": 0.12161636352539062, "learning_rate": 0.0005, "loss": 2.1171, "step": 46560 }, { "epoch": 0.17725691404733449, "grad_norm": 0.13465529680252075, "learning_rate": 0.0005, "loss": 2.1225, "step": 46570 }, { "epoch": 0.17729497651545717, "grad_norm": 0.17854134738445282, "learning_rate": 0.0005, "loss": 2.1433, "step": 46580 }, { "epoch": 0.17733303898357985, "grad_norm": 0.12033980339765549, "learning_rate": 0.0005, "loss": 2.1301, "step": 46590 }, { "epoch": 0.17737110145170254, "grad_norm": 0.1267291158437729, "learning_rate": 0.0005, "loss": 2.1373, "step": 46600 }, { "epoch": 0.17740916391982522, "grad_norm": 0.11675343662500381, "learning_rate": 0.0005, "loss": 2.1312, "step": 46610 }, { "epoch": 0.1774472263879479, "grad_norm": 0.12187250703573227, "learning_rate": 0.0005, "loss": 2.1395, "step": 46620 }, { "epoch": 0.1774852888560706, "grad_norm": 0.12096511572599411, "learning_rate": 0.0005, "loss": 2.148, "step": 46630 }, { "epoch": 0.17752335132419328, "grad_norm": 0.12929442524909973, "learning_rate": 0.0005, "loss": 2.1401, "step": 46640 }, { "epoch": 0.17756141379231594, "grad_norm": 0.12891370058059692, "learning_rate": 0.0005, "loss": 2.1316, "step": 46650 }, { "epoch": 0.17759947626043862, "grad_norm": 0.12333334982395172, "learning_rate": 0.0005, "loss": 2.1385, "step": 46660 }, { "epoch": 0.1776375387285613, "grad_norm": 0.1466992199420929, "learning_rate": 0.0005, "loss": 2.1293, "step": 46670 }, { "epoch": 0.177675601196684, "grad_norm": 0.12020840495824814, "learning_rate": 0.0005, "loss": 2.1251, "step": 46680 }, { "epoch": 0.17771366366480668, "grad_norm": 0.12505066394805908, "learning_rate": 0.0005, "loss": 2.139, "step": 46690 }, { "epoch": 0.17775172613292936, "grad_norm": 0.1469816416501999, "learning_rate": 0.0005, "loss": 2.1555, "step": 46700 }, { "epoch": 0.17778978860105205, "grad_norm": 0.12790228426456451, "learning_rate": 0.0005, "loss": 2.1441, "step": 46710 }, { "epoch": 0.17782785106917473, "grad_norm": 0.12211241573095322, "learning_rate": 0.0005, "loss": 2.1296, "step": 46720 }, { "epoch": 0.17786591353729742, "grad_norm": 0.1326773464679718, "learning_rate": 0.0005, "loss": 2.1269, "step": 46730 }, { "epoch": 0.1779039760054201, "grad_norm": 0.14631135761737823, "learning_rate": 0.0005, "loss": 2.1217, "step": 46740 }, { "epoch": 0.17794203847354279, "grad_norm": 0.12743769586086273, "learning_rate": 0.0005, "loss": 2.1391, "step": 46750 }, { "epoch": 0.17798010094166547, "grad_norm": 0.12034343928098679, "learning_rate": 0.0005, "loss": 2.131, "step": 46760 }, { "epoch": 0.17801816340978815, "grad_norm": 0.146920844912529, "learning_rate": 0.0005, "loss": 2.1296, "step": 46770 }, { "epoch": 0.17805622587791084, "grad_norm": 0.12294947355985641, "learning_rate": 0.0005, "loss": 2.1376, "step": 46780 }, { "epoch": 0.1780942883460335, "grad_norm": 0.13369952142238617, "learning_rate": 0.0005, "loss": 2.1288, "step": 46790 }, { "epoch": 0.17813235081415618, "grad_norm": 0.12316075712442398, "learning_rate": 0.0005, "loss": 2.1397, "step": 46800 }, { "epoch": 0.17817041328227887, "grad_norm": 0.1283416450023651, "learning_rate": 0.0005, "loss": 2.1346, "step": 46810 }, { "epoch": 0.17820847575040155, "grad_norm": 0.11663859337568283, "learning_rate": 0.0005, "loss": 2.1317, "step": 46820 }, { "epoch": 0.17824653821852424, "grad_norm": 0.1286969780921936, "learning_rate": 0.0005, "loss": 2.1371, "step": 46830 }, { "epoch": 0.17828460068664692, "grad_norm": 0.12117967009544373, "learning_rate": 0.0005, "loss": 2.1359, "step": 46840 }, { "epoch": 0.1783226631547696, "grad_norm": 0.1272825002670288, "learning_rate": 0.0005, "loss": 2.1303, "step": 46850 }, { "epoch": 0.1783607256228923, "grad_norm": 0.12218283116817474, "learning_rate": 0.0005, "loss": 2.1309, "step": 46860 }, { "epoch": 0.17839878809101498, "grad_norm": 0.12377354502677917, "learning_rate": 0.0005, "loss": 2.1587, "step": 46870 }, { "epoch": 0.17843685055913766, "grad_norm": 0.12458238005638123, "learning_rate": 0.0005, "loss": 2.1252, "step": 46880 }, { "epoch": 0.17847491302726035, "grad_norm": 0.12494272738695145, "learning_rate": 0.0005, "loss": 2.1267, "step": 46890 }, { "epoch": 0.17851297549538303, "grad_norm": 0.13676008582115173, "learning_rate": 0.0005, "loss": 2.1283, "step": 46900 }, { "epoch": 0.17855103796350572, "grad_norm": 0.14277496933937073, "learning_rate": 0.0005, "loss": 2.1218, "step": 46910 }, { "epoch": 0.1785891004316284, "grad_norm": 0.12220119684934616, "learning_rate": 0.0005, "loss": 2.1252, "step": 46920 }, { "epoch": 0.17862716289975109, "grad_norm": 0.13440436124801636, "learning_rate": 0.0005, "loss": 2.1328, "step": 46930 }, { "epoch": 0.17866522536787374, "grad_norm": 0.1321924924850464, "learning_rate": 0.0005, "loss": 2.1359, "step": 46940 }, { "epoch": 0.17870328783599643, "grad_norm": 0.13210120797157288, "learning_rate": 0.0005, "loss": 2.1487, "step": 46950 }, { "epoch": 0.1787413503041191, "grad_norm": 0.12004867941141129, "learning_rate": 0.0005, "loss": 2.1285, "step": 46960 }, { "epoch": 0.1787794127722418, "grad_norm": 0.13169914484024048, "learning_rate": 0.0005, "loss": 2.1232, "step": 46970 }, { "epoch": 0.17881747524036448, "grad_norm": 0.12706497311592102, "learning_rate": 0.0005, "loss": 2.133, "step": 46980 }, { "epoch": 0.17885553770848717, "grad_norm": 0.1386055052280426, "learning_rate": 0.0005, "loss": 2.1358, "step": 46990 }, { "epoch": 0.17889360017660985, "grad_norm": 0.1212029978632927, "learning_rate": 0.0005, "loss": 2.1131, "step": 47000 }, { "epoch": 0.17893166264473254, "grad_norm": 0.12467978894710541, "learning_rate": 0.0005, "loss": 2.1351, "step": 47010 }, { "epoch": 0.17896972511285522, "grad_norm": 0.141191303730011, "learning_rate": 0.0005, "loss": 2.1508, "step": 47020 }, { "epoch": 0.1790077875809779, "grad_norm": 0.13441681861877441, "learning_rate": 0.0005, "loss": 2.142, "step": 47030 }, { "epoch": 0.1790458500491006, "grad_norm": 0.12653379142284393, "learning_rate": 0.0005, "loss": 2.1302, "step": 47040 }, { "epoch": 0.17908391251722328, "grad_norm": 0.1331082135438919, "learning_rate": 0.0005, "loss": 2.1312, "step": 47050 }, { "epoch": 0.17912197498534596, "grad_norm": 0.12425190955400467, "learning_rate": 0.0005, "loss": 2.1286, "step": 47060 }, { "epoch": 0.17916003745346865, "grad_norm": 0.12457893788814545, "learning_rate": 0.0005, "loss": 2.1319, "step": 47070 }, { "epoch": 0.1791980999215913, "grad_norm": 0.1274491250514984, "learning_rate": 0.0005, "loss": 2.1284, "step": 47080 }, { "epoch": 0.179236162389714, "grad_norm": 0.12396488338708878, "learning_rate": 0.0005, "loss": 2.1264, "step": 47090 }, { "epoch": 0.17927422485783667, "grad_norm": 0.12364597618579865, "learning_rate": 0.0005, "loss": 2.1279, "step": 47100 }, { "epoch": 0.17931228732595936, "grad_norm": 0.11393121629953384, "learning_rate": 0.0005, "loss": 2.1472, "step": 47110 }, { "epoch": 0.17935034979408204, "grad_norm": 0.14089448750019073, "learning_rate": 0.0005, "loss": 2.1216, "step": 47120 }, { "epoch": 0.17938841226220473, "grad_norm": 0.12793299555778503, "learning_rate": 0.0005, "loss": 2.1297, "step": 47130 }, { "epoch": 0.1794264747303274, "grad_norm": 0.14278213679790497, "learning_rate": 0.0005, "loss": 2.1544, "step": 47140 }, { "epoch": 0.1794645371984501, "grad_norm": 0.12074515223503113, "learning_rate": 0.0005, "loss": 2.125, "step": 47150 }, { "epoch": 0.17950259966657278, "grad_norm": 0.11604276299476624, "learning_rate": 0.0005, "loss": 2.1381, "step": 47160 }, { "epoch": 0.17954066213469547, "grad_norm": 0.12846846878528595, "learning_rate": 0.0005, "loss": 2.1196, "step": 47170 }, { "epoch": 0.17957872460281815, "grad_norm": 0.10897751897573471, "learning_rate": 0.0005, "loss": 2.1344, "step": 47180 }, { "epoch": 0.17961678707094084, "grad_norm": 0.12026036530733109, "learning_rate": 0.0005, "loss": 2.1331, "step": 47190 }, { "epoch": 0.17965484953906352, "grad_norm": 0.12652936577796936, "learning_rate": 0.0005, "loss": 2.1321, "step": 47200 }, { "epoch": 0.1796929120071862, "grad_norm": 0.1351630538702011, "learning_rate": 0.0005, "loss": 2.1254, "step": 47210 }, { "epoch": 0.17973097447530886, "grad_norm": 0.15102262794971466, "learning_rate": 0.0005, "loss": 2.1306, "step": 47220 }, { "epoch": 0.17976903694343155, "grad_norm": 0.1387786865234375, "learning_rate": 0.0005, "loss": 2.1278, "step": 47230 }, { "epoch": 0.17980709941155423, "grad_norm": 0.11889027804136276, "learning_rate": 0.0005, "loss": 2.1259, "step": 47240 }, { "epoch": 0.17984516187967692, "grad_norm": 0.1278916597366333, "learning_rate": 0.0005, "loss": 2.1412, "step": 47250 }, { "epoch": 0.1798832243477996, "grad_norm": 0.13283556699752808, "learning_rate": 0.0005, "loss": 2.1228, "step": 47260 }, { "epoch": 0.1799212868159223, "grad_norm": 0.11985579878091812, "learning_rate": 0.0005, "loss": 2.1388, "step": 47270 }, { "epoch": 0.17995934928404497, "grad_norm": 0.11472861468791962, "learning_rate": 0.0005, "loss": 2.1242, "step": 47280 }, { "epoch": 0.17999741175216766, "grad_norm": 0.11842813342809677, "learning_rate": 0.0005, "loss": 2.1277, "step": 47290 }, { "epoch": 0.18003547422029034, "grad_norm": 0.12179480493068695, "learning_rate": 0.0005, "loss": 2.1311, "step": 47300 }, { "epoch": 0.18007353668841303, "grad_norm": 0.11679831147193909, "learning_rate": 0.0005, "loss": 2.1309, "step": 47310 }, { "epoch": 0.1801115991565357, "grad_norm": 0.12437867373228073, "learning_rate": 0.0005, "loss": 2.1533, "step": 47320 }, { "epoch": 0.1801496616246584, "grad_norm": 0.11317010223865509, "learning_rate": 0.0005, "loss": 2.1368, "step": 47330 }, { "epoch": 0.18018772409278108, "grad_norm": 0.13073226809501648, "learning_rate": 0.0005, "loss": 2.139, "step": 47340 }, { "epoch": 0.18022578656090377, "grad_norm": 0.12761497497558594, "learning_rate": 0.0005, "loss": 2.1213, "step": 47350 }, { "epoch": 0.18026384902902645, "grad_norm": 0.1252940446138382, "learning_rate": 0.0005, "loss": 2.1271, "step": 47360 }, { "epoch": 0.1803019114971491, "grad_norm": 0.12291482836008072, "learning_rate": 0.0005, "loss": 2.138, "step": 47370 }, { "epoch": 0.1803399739652718, "grad_norm": 0.12462043762207031, "learning_rate": 0.0005, "loss": 2.1393, "step": 47380 }, { "epoch": 0.18037803643339448, "grad_norm": 0.13734501600265503, "learning_rate": 0.0005, "loss": 2.117, "step": 47390 }, { "epoch": 0.18041609890151716, "grad_norm": 0.13022781908512115, "learning_rate": 0.0005, "loss": 2.1407, "step": 47400 }, { "epoch": 0.18045416136963985, "grad_norm": 0.13055624067783356, "learning_rate": 0.0005, "loss": 2.1245, "step": 47410 }, { "epoch": 0.18049222383776253, "grad_norm": 0.1153981164097786, "learning_rate": 0.0005, "loss": 2.1245, "step": 47420 }, { "epoch": 0.18053028630588522, "grad_norm": 0.14261126518249512, "learning_rate": 0.0005, "loss": 2.1468, "step": 47430 }, { "epoch": 0.1805683487740079, "grad_norm": 0.15162013471126556, "learning_rate": 0.0005, "loss": 2.1276, "step": 47440 }, { "epoch": 0.1806064112421306, "grad_norm": 0.12845498323440552, "learning_rate": 0.0005, "loss": 2.1388, "step": 47450 }, { "epoch": 0.18064447371025327, "grad_norm": 0.11967799067497253, "learning_rate": 0.0005, "loss": 2.1424, "step": 47460 }, { "epoch": 0.18068253617837596, "grad_norm": 0.12578807771205902, "learning_rate": 0.0005, "loss": 2.1198, "step": 47470 }, { "epoch": 0.18072059864649864, "grad_norm": 0.1274806410074234, "learning_rate": 0.0005, "loss": 2.1378, "step": 47480 }, { "epoch": 0.18075866111462133, "grad_norm": 0.14504271745681763, "learning_rate": 0.0005, "loss": 2.1318, "step": 47490 }, { "epoch": 0.180796723582744, "grad_norm": 0.13173021376132965, "learning_rate": 0.0005, "loss": 2.1268, "step": 47500 }, { "epoch": 0.18083478605086667, "grad_norm": 0.13466207683086395, "learning_rate": 0.0005, "loss": 2.1169, "step": 47510 }, { "epoch": 0.18087284851898935, "grad_norm": 0.1106971949338913, "learning_rate": 0.0005, "loss": 2.1239, "step": 47520 }, { "epoch": 0.18091091098711204, "grad_norm": 0.128569096326828, "learning_rate": 0.0005, "loss": 2.1347, "step": 47530 }, { "epoch": 0.18094897345523472, "grad_norm": 0.13816776871681213, "learning_rate": 0.0005, "loss": 2.1538, "step": 47540 }, { "epoch": 0.1809870359233574, "grad_norm": 0.14278821647167206, "learning_rate": 0.0005, "loss": 2.1346, "step": 47550 }, { "epoch": 0.1810250983914801, "grad_norm": 0.13838042318820953, "learning_rate": 0.0005, "loss": 2.1327, "step": 47560 }, { "epoch": 0.18106316085960278, "grad_norm": 0.1207902655005455, "learning_rate": 0.0005, "loss": 2.1333, "step": 47570 }, { "epoch": 0.18110122332772546, "grad_norm": 0.12243013083934784, "learning_rate": 0.0005, "loss": 2.1375, "step": 47580 }, { "epoch": 0.18113928579584815, "grad_norm": 0.11340376734733582, "learning_rate": 0.0005, "loss": 2.1225, "step": 47590 }, { "epoch": 0.18117734826397083, "grad_norm": 0.12646237015724182, "learning_rate": 0.0005, "loss": 2.1239, "step": 47600 }, { "epoch": 0.18121541073209352, "grad_norm": 0.13434185087680817, "learning_rate": 0.0005, "loss": 2.1437, "step": 47610 }, { "epoch": 0.1812534732002162, "grad_norm": 0.11621993035078049, "learning_rate": 0.0005, "loss": 2.1269, "step": 47620 }, { "epoch": 0.1812915356683389, "grad_norm": 0.12238717824220657, "learning_rate": 0.0005, "loss": 2.131, "step": 47630 }, { "epoch": 0.18132959813646157, "grad_norm": 0.12311246991157532, "learning_rate": 0.0005, "loss": 2.1302, "step": 47640 }, { "epoch": 0.18136766060458423, "grad_norm": 0.12371234595775604, "learning_rate": 0.0005, "loss": 2.13, "step": 47650 }, { "epoch": 0.18140572307270691, "grad_norm": 0.13039273023605347, "learning_rate": 0.0005, "loss": 2.1267, "step": 47660 }, { "epoch": 0.1814437855408296, "grad_norm": 0.12399288266897202, "learning_rate": 0.0005, "loss": 2.125, "step": 47670 }, { "epoch": 0.18148184800895228, "grad_norm": 0.13051196932792664, "learning_rate": 0.0005, "loss": 2.1284, "step": 47680 }, { "epoch": 0.18151991047707497, "grad_norm": 0.11748791486024857, "learning_rate": 0.0005, "loss": 2.1194, "step": 47690 }, { "epoch": 0.18155797294519765, "grad_norm": 0.11903548985719681, "learning_rate": 0.0005, "loss": 2.1301, "step": 47700 }, { "epoch": 0.18159603541332034, "grad_norm": 0.12035155296325684, "learning_rate": 0.0005, "loss": 2.1372, "step": 47710 }, { "epoch": 0.18163409788144302, "grad_norm": 0.11695858836174011, "learning_rate": 0.0005, "loss": 2.1347, "step": 47720 }, { "epoch": 0.1816721603495657, "grad_norm": 0.12408757954835892, "learning_rate": 0.0005, "loss": 2.1357, "step": 47730 }, { "epoch": 0.1817102228176884, "grad_norm": 0.12435764074325562, "learning_rate": 0.0005, "loss": 2.1431, "step": 47740 }, { "epoch": 0.18174828528581108, "grad_norm": 0.12510110437870026, "learning_rate": 0.0005, "loss": 2.1285, "step": 47750 }, { "epoch": 0.18178634775393376, "grad_norm": 0.12769056856632233, "learning_rate": 0.0005, "loss": 2.1338, "step": 47760 }, { "epoch": 0.18182441022205645, "grad_norm": 0.12242080271244049, "learning_rate": 0.0005, "loss": 2.129, "step": 47770 }, { "epoch": 0.18186247269017913, "grad_norm": 0.13732148706912994, "learning_rate": 0.0005, "loss": 2.1438, "step": 47780 }, { "epoch": 0.18190053515830182, "grad_norm": 0.13399755954742432, "learning_rate": 0.0005, "loss": 2.1422, "step": 47790 }, { "epoch": 0.18193859762642448, "grad_norm": 0.12308243662118912, "learning_rate": 0.0005, "loss": 2.1423, "step": 47800 }, { "epoch": 0.18197666009454716, "grad_norm": 0.11698419600725174, "learning_rate": 0.0005, "loss": 2.1341, "step": 47810 }, { "epoch": 0.18201472256266985, "grad_norm": 0.10664495825767517, "learning_rate": 0.0005, "loss": 2.1291, "step": 47820 }, { "epoch": 0.18205278503079253, "grad_norm": 0.1362016648054123, "learning_rate": 0.0005, "loss": 2.1298, "step": 47830 }, { "epoch": 0.18209084749891521, "grad_norm": 0.12378685176372528, "learning_rate": 0.0005, "loss": 2.1292, "step": 47840 }, { "epoch": 0.1821289099670379, "grad_norm": 0.11399336904287338, "learning_rate": 0.0005, "loss": 2.1361, "step": 47850 }, { "epoch": 0.18216697243516058, "grad_norm": 0.13297849893569946, "learning_rate": 0.0005, "loss": 2.1319, "step": 47860 }, { "epoch": 0.18220503490328327, "grad_norm": 0.13696694374084473, "learning_rate": 0.0005, "loss": 2.1336, "step": 47870 }, { "epoch": 0.18224309737140595, "grad_norm": 0.13576148450374603, "learning_rate": 0.0005, "loss": 2.1403, "step": 47880 }, { "epoch": 0.18228115983952864, "grad_norm": 0.12247444689273834, "learning_rate": 0.0005, "loss": 2.1131, "step": 47890 }, { "epoch": 0.18231922230765132, "grad_norm": 0.12061848491430283, "learning_rate": 0.0005, "loss": 2.1139, "step": 47900 }, { "epoch": 0.182357284775774, "grad_norm": 0.1310357004404068, "learning_rate": 0.0005, "loss": 2.1354, "step": 47910 }, { "epoch": 0.1823953472438967, "grad_norm": 0.12328796088695526, "learning_rate": 0.0005, "loss": 2.1339, "step": 47920 }, { "epoch": 0.18243340971201938, "grad_norm": 0.13524876534938812, "learning_rate": 0.0005, "loss": 2.1137, "step": 47930 }, { "epoch": 0.18247147218014204, "grad_norm": 0.13573935627937317, "learning_rate": 0.0005, "loss": 2.1258, "step": 47940 }, { "epoch": 0.18250953464826472, "grad_norm": 0.12921947240829468, "learning_rate": 0.0005, "loss": 2.1355, "step": 47950 }, { "epoch": 0.1825475971163874, "grad_norm": 0.12181194871664047, "learning_rate": 0.0005, "loss": 2.126, "step": 47960 }, { "epoch": 0.1825856595845101, "grad_norm": 0.12479119002819061, "learning_rate": 0.0005, "loss": 2.1371, "step": 47970 }, { "epoch": 0.18262372205263278, "grad_norm": 0.1317525953054428, "learning_rate": 0.0005, "loss": 2.1417, "step": 47980 }, { "epoch": 0.18266178452075546, "grad_norm": 0.12510502338409424, "learning_rate": 0.0005, "loss": 2.1258, "step": 47990 }, { "epoch": 0.18269984698887815, "grad_norm": 0.13090606033802032, "learning_rate": 0.0005, "loss": 2.1355, "step": 48000 }, { "epoch": 0.18273790945700083, "grad_norm": 0.1211482584476471, "learning_rate": 0.0005, "loss": 2.1377, "step": 48010 }, { "epoch": 0.18277597192512351, "grad_norm": 0.12430895119905472, "learning_rate": 0.0005, "loss": 2.1331, "step": 48020 }, { "epoch": 0.1828140343932462, "grad_norm": 0.1278323233127594, "learning_rate": 0.0005, "loss": 2.1359, "step": 48030 }, { "epoch": 0.18285209686136888, "grad_norm": 0.13269633054733276, "learning_rate": 0.0005, "loss": 2.1329, "step": 48040 }, { "epoch": 0.18289015932949157, "grad_norm": 0.1252775341272354, "learning_rate": 0.0005, "loss": 2.1377, "step": 48050 }, { "epoch": 0.18292822179761425, "grad_norm": 0.14520369470119476, "learning_rate": 0.0005, "loss": 2.134, "step": 48060 }, { "epoch": 0.18296628426573694, "grad_norm": 0.1397608369588852, "learning_rate": 0.0005, "loss": 2.1384, "step": 48070 }, { "epoch": 0.18300434673385962, "grad_norm": 0.1267949640750885, "learning_rate": 0.0005, "loss": 2.1344, "step": 48080 }, { "epoch": 0.18304240920198228, "grad_norm": 0.13453710079193115, "learning_rate": 0.0005, "loss": 2.1287, "step": 48090 }, { "epoch": 0.18308047167010497, "grad_norm": 0.12775638699531555, "learning_rate": 0.0005, "loss": 2.131, "step": 48100 }, { "epoch": 0.18311853413822765, "grad_norm": 0.1129986122250557, "learning_rate": 0.0005, "loss": 2.1412, "step": 48110 }, { "epoch": 0.18315659660635034, "grad_norm": 0.11675728112459183, "learning_rate": 0.0005, "loss": 2.1316, "step": 48120 }, { "epoch": 0.18319465907447302, "grad_norm": 0.11513984203338623, "learning_rate": 0.0005, "loss": 2.1249, "step": 48130 }, { "epoch": 0.1832327215425957, "grad_norm": 0.12235622853040695, "learning_rate": 0.0005, "loss": 2.129, "step": 48140 }, { "epoch": 0.1832707840107184, "grad_norm": 0.13701052963733673, "learning_rate": 0.0005, "loss": 2.1198, "step": 48150 }, { "epoch": 0.18330884647884108, "grad_norm": 0.11779025197029114, "learning_rate": 0.0005, "loss": 2.1254, "step": 48160 }, { "epoch": 0.18334690894696376, "grad_norm": 0.11653947830200195, "learning_rate": 0.0005, "loss": 2.1302, "step": 48170 }, { "epoch": 0.18338497141508645, "grad_norm": 0.11426492035388947, "learning_rate": 0.0005, "loss": 2.1253, "step": 48180 }, { "epoch": 0.18342303388320913, "grad_norm": 0.12913554906845093, "learning_rate": 0.0005, "loss": 2.1215, "step": 48190 }, { "epoch": 0.18346109635133181, "grad_norm": 0.11243739724159241, "learning_rate": 0.0005, "loss": 2.1272, "step": 48200 }, { "epoch": 0.1834991588194545, "grad_norm": 0.12019768357276917, "learning_rate": 0.0005, "loss": 2.1478, "step": 48210 }, { "epoch": 0.18353722128757718, "grad_norm": 0.12446115911006927, "learning_rate": 0.0005, "loss": 2.1333, "step": 48220 }, { "epoch": 0.18357528375569984, "grad_norm": 0.12636056542396545, "learning_rate": 0.0005, "loss": 2.1278, "step": 48230 }, { "epoch": 0.18361334622382253, "grad_norm": 0.1382257640361786, "learning_rate": 0.0005, "loss": 2.1227, "step": 48240 }, { "epoch": 0.1836514086919452, "grad_norm": 0.12824708223342896, "learning_rate": 0.0005, "loss": 2.1179, "step": 48250 }, { "epoch": 0.1836894711600679, "grad_norm": 0.11088469624519348, "learning_rate": 0.0005, "loss": 2.1323, "step": 48260 }, { "epoch": 0.18372753362819058, "grad_norm": 0.12594173848628998, "learning_rate": 0.0005, "loss": 2.1227, "step": 48270 }, { "epoch": 0.18376559609631327, "grad_norm": 0.1285485476255417, "learning_rate": 0.0005, "loss": 2.1239, "step": 48280 }, { "epoch": 0.18380365856443595, "grad_norm": 0.13596130907535553, "learning_rate": 0.0005, "loss": 2.129, "step": 48290 }, { "epoch": 0.18384172103255864, "grad_norm": 0.13879016041755676, "learning_rate": 0.0005, "loss": 2.1272, "step": 48300 }, { "epoch": 0.18387978350068132, "grad_norm": 0.12482644617557526, "learning_rate": 0.0005, "loss": 2.1346, "step": 48310 }, { "epoch": 0.183917845968804, "grad_norm": 0.13269701600074768, "learning_rate": 0.0005, "loss": 2.1409, "step": 48320 }, { "epoch": 0.1839559084369267, "grad_norm": 0.1344316005706787, "learning_rate": 0.0005, "loss": 2.1467, "step": 48330 }, { "epoch": 0.18399397090504938, "grad_norm": 0.1299740970134735, "learning_rate": 0.0005, "loss": 2.1424, "step": 48340 }, { "epoch": 0.18403203337317206, "grad_norm": 0.1218443289399147, "learning_rate": 0.0005, "loss": 2.1231, "step": 48350 }, { "epoch": 0.18407009584129475, "grad_norm": 0.12609827518463135, "learning_rate": 0.0005, "loss": 2.1343, "step": 48360 }, { "epoch": 0.1841081583094174, "grad_norm": 0.12949436902999878, "learning_rate": 0.0005, "loss": 2.1122, "step": 48370 }, { "epoch": 0.1841462207775401, "grad_norm": 0.13706724345684052, "learning_rate": 0.0005, "loss": 2.1588, "step": 48380 }, { "epoch": 0.18418428324566277, "grad_norm": 0.11350060999393463, "learning_rate": 0.0005, "loss": 2.1325, "step": 48390 }, { "epoch": 0.18422234571378546, "grad_norm": 0.12369034439325333, "learning_rate": 0.0005, "loss": 2.1297, "step": 48400 }, { "epoch": 0.18426040818190814, "grad_norm": 0.13806772232055664, "learning_rate": 0.0005, "loss": 2.1407, "step": 48410 }, { "epoch": 0.18429847065003083, "grad_norm": 0.13240782916545868, "learning_rate": 0.0005, "loss": 2.1333, "step": 48420 }, { "epoch": 0.1843365331181535, "grad_norm": 0.1262694150209427, "learning_rate": 0.0005, "loss": 2.1217, "step": 48430 }, { "epoch": 0.1843745955862762, "grad_norm": 0.13484366238117218, "learning_rate": 0.0005, "loss": 2.161, "step": 48440 }, { "epoch": 0.18441265805439888, "grad_norm": 0.1304924190044403, "learning_rate": 0.0005, "loss": 2.1394, "step": 48450 }, { "epoch": 0.18445072052252157, "grad_norm": 0.12426736950874329, "learning_rate": 0.0005, "loss": 2.1385, "step": 48460 }, { "epoch": 0.18448878299064425, "grad_norm": 0.13261044025421143, "learning_rate": 0.0005, "loss": 2.1303, "step": 48470 }, { "epoch": 0.18452684545876694, "grad_norm": 0.11962955445051193, "learning_rate": 0.0005, "loss": 2.1344, "step": 48480 }, { "epoch": 0.18456490792688962, "grad_norm": 0.1203152984380722, "learning_rate": 0.0005, "loss": 2.1259, "step": 48490 }, { "epoch": 0.1846029703950123, "grad_norm": 0.12043966352939606, "learning_rate": 0.0005, "loss": 2.1257, "step": 48500 }, { "epoch": 0.184641032863135, "grad_norm": 0.1077461838722229, "learning_rate": 0.0005, "loss": 2.1308, "step": 48510 }, { "epoch": 0.18467909533125765, "grad_norm": 0.125952810049057, "learning_rate": 0.0005, "loss": 2.144, "step": 48520 }, { "epoch": 0.18471715779938033, "grad_norm": 0.11711869388818741, "learning_rate": 0.0005, "loss": 2.1256, "step": 48530 }, { "epoch": 0.18475522026750302, "grad_norm": 0.12230297923088074, "learning_rate": 0.0005, "loss": 2.1317, "step": 48540 }, { "epoch": 0.1847932827356257, "grad_norm": 0.12001660466194153, "learning_rate": 0.0005, "loss": 2.1404, "step": 48550 }, { "epoch": 0.1848313452037484, "grad_norm": 0.12919172644615173, "learning_rate": 0.0005, "loss": 2.1481, "step": 48560 }, { "epoch": 0.18486940767187107, "grad_norm": 0.1313171237707138, "learning_rate": 0.0005, "loss": 2.1337, "step": 48570 }, { "epoch": 0.18490747013999376, "grad_norm": 0.9556818604469299, "learning_rate": 0.0005, "loss": 2.1124, "step": 48580 }, { "epoch": 0.18494553260811644, "grad_norm": 0.24016167223453522, "learning_rate": 0.0005, "loss": 2.1325, "step": 48590 }, { "epoch": 0.18498359507623913, "grad_norm": 0.12441037595272064, "learning_rate": 0.0005, "loss": 2.1284, "step": 48600 }, { "epoch": 0.1850216575443618, "grad_norm": 0.13198134303092957, "learning_rate": 0.0005, "loss": 2.1425, "step": 48610 }, { "epoch": 0.1850597200124845, "grad_norm": 0.11687224358320236, "learning_rate": 0.0005, "loss": 2.1388, "step": 48620 }, { "epoch": 0.18509778248060718, "grad_norm": 0.11632097512483597, "learning_rate": 0.0005, "loss": 2.1314, "step": 48630 }, { "epoch": 0.18513584494872987, "grad_norm": 0.12353020161390305, "learning_rate": 0.0005, "loss": 2.1371, "step": 48640 }, { "epoch": 0.18517390741685255, "grad_norm": 0.13218729197978973, "learning_rate": 0.0005, "loss": 2.1272, "step": 48650 }, { "epoch": 0.1852119698849752, "grad_norm": 0.10913823544979095, "learning_rate": 0.0005, "loss": 2.1371, "step": 48660 }, { "epoch": 0.1852500323530979, "grad_norm": 0.12533710896968842, "learning_rate": 0.0005, "loss": 2.1189, "step": 48670 }, { "epoch": 0.18528809482122058, "grad_norm": 0.10914388298988342, "learning_rate": 0.0005, "loss": 2.1267, "step": 48680 }, { "epoch": 0.18532615728934326, "grad_norm": 0.13747212290763855, "learning_rate": 0.0005, "loss": 2.1313, "step": 48690 }, { "epoch": 0.18536421975746595, "grad_norm": 0.140378937125206, "learning_rate": 0.0005, "loss": 2.1326, "step": 48700 }, { "epoch": 0.18540228222558863, "grad_norm": 0.1217515617609024, "learning_rate": 0.0005, "loss": 2.1266, "step": 48710 }, { "epoch": 0.18544034469371132, "grad_norm": 0.11559165269136429, "learning_rate": 0.0005, "loss": 2.1307, "step": 48720 }, { "epoch": 0.185478407161834, "grad_norm": 0.11101264506578445, "learning_rate": 0.0005, "loss": 2.1329, "step": 48730 }, { "epoch": 0.1855164696299567, "grad_norm": 0.12215148657560349, "learning_rate": 0.0005, "loss": 2.1358, "step": 48740 }, { "epoch": 0.18555453209807937, "grad_norm": 0.12364372611045837, "learning_rate": 0.0005, "loss": 2.1237, "step": 48750 }, { "epoch": 0.18559259456620206, "grad_norm": 0.12270593643188477, "learning_rate": 0.0005, "loss": 2.1238, "step": 48760 }, { "epoch": 0.18563065703432474, "grad_norm": 0.13053959608078003, "learning_rate": 0.0005, "loss": 2.1361, "step": 48770 }, { "epoch": 0.18566871950244743, "grad_norm": 0.11770855635404587, "learning_rate": 0.0005, "loss": 2.1225, "step": 48780 }, { "epoch": 0.1857067819705701, "grad_norm": 0.12644435465335846, "learning_rate": 0.0005, "loss": 2.1259, "step": 48790 }, { "epoch": 0.18574484443869277, "grad_norm": 0.1235184296965599, "learning_rate": 0.0005, "loss": 2.1288, "step": 48800 }, { "epoch": 0.18578290690681545, "grad_norm": 0.12732064723968506, "learning_rate": 0.0005, "loss": 2.1258, "step": 48810 }, { "epoch": 0.18582096937493814, "grad_norm": 0.11984202265739441, "learning_rate": 0.0005, "loss": 2.1173, "step": 48820 }, { "epoch": 0.18585903184306082, "grad_norm": 0.1389389932155609, "learning_rate": 0.0005, "loss": 2.1268, "step": 48830 }, { "epoch": 0.1858970943111835, "grad_norm": 0.118854820728302, "learning_rate": 0.0005, "loss": 2.1302, "step": 48840 }, { "epoch": 0.1859351567793062, "grad_norm": 0.134931281208992, "learning_rate": 0.0005, "loss": 2.1413, "step": 48850 }, { "epoch": 0.18597321924742888, "grad_norm": 0.1198066771030426, "learning_rate": 0.0005, "loss": 2.1423, "step": 48860 }, { "epoch": 0.18601128171555156, "grad_norm": 0.11885160207748413, "learning_rate": 0.0005, "loss": 2.1393, "step": 48870 }, { "epoch": 0.18604934418367425, "grad_norm": 0.11900264024734497, "learning_rate": 0.0005, "loss": 2.1411, "step": 48880 }, { "epoch": 0.18608740665179693, "grad_norm": 0.12022736668586731, "learning_rate": 0.0005, "loss": 2.1359, "step": 48890 }, { "epoch": 0.18612546911991962, "grad_norm": 0.12380018830299377, "learning_rate": 0.0005, "loss": 2.1258, "step": 48900 }, { "epoch": 0.1861635315880423, "grad_norm": 0.12020589411258698, "learning_rate": 0.0005, "loss": 2.1331, "step": 48910 }, { "epoch": 0.186201594056165, "grad_norm": 0.11976821720600128, "learning_rate": 0.0005, "loss": 2.1389, "step": 48920 }, { "epoch": 0.18623965652428767, "grad_norm": 0.11832680553197861, "learning_rate": 0.0005, "loss": 2.1339, "step": 48930 }, { "epoch": 0.18627771899241036, "grad_norm": 0.13924725353717804, "learning_rate": 0.0005, "loss": 2.111, "step": 48940 }, { "epoch": 0.18631578146053301, "grad_norm": 0.13474136590957642, "learning_rate": 0.0005, "loss": 2.128, "step": 48950 }, { "epoch": 0.1863538439286557, "grad_norm": 0.12513698637485504, "learning_rate": 0.0005, "loss": 2.1351, "step": 48960 }, { "epoch": 0.18639190639677838, "grad_norm": 0.13066360354423523, "learning_rate": 0.0005, "loss": 2.1269, "step": 48970 }, { "epoch": 0.18642996886490107, "grad_norm": 0.12704874575138092, "learning_rate": 0.0005, "loss": 2.1366, "step": 48980 }, { "epoch": 0.18646803133302375, "grad_norm": 0.13729922473430634, "learning_rate": 0.0005, "loss": 2.1317, "step": 48990 }, { "epoch": 0.18650609380114644, "grad_norm": 0.1181914284825325, "learning_rate": 0.0005, "loss": 2.1381, "step": 49000 }, { "epoch": 0.18654415626926912, "grad_norm": 0.11445435136556625, "learning_rate": 0.0005, "loss": 2.1299, "step": 49010 }, { "epoch": 0.1865822187373918, "grad_norm": 0.12288684397935867, "learning_rate": 0.0005, "loss": 2.1354, "step": 49020 }, { "epoch": 0.1866202812055145, "grad_norm": 0.14258378744125366, "learning_rate": 0.0005, "loss": 2.1157, "step": 49030 }, { "epoch": 0.18665834367363718, "grad_norm": 0.12620224058628082, "learning_rate": 0.0005, "loss": 2.1516, "step": 49040 }, { "epoch": 0.18669640614175986, "grad_norm": 0.12300539016723633, "learning_rate": 0.0005, "loss": 2.1358, "step": 49050 }, { "epoch": 0.18673446860988255, "grad_norm": 0.12325773388147354, "learning_rate": 0.0005, "loss": 2.1246, "step": 49060 }, { "epoch": 0.18677253107800523, "grad_norm": 0.11611814796924591, "learning_rate": 0.0005, "loss": 2.1202, "step": 49070 }, { "epoch": 0.18681059354612792, "grad_norm": 0.1306041181087494, "learning_rate": 0.0005, "loss": 2.128, "step": 49080 }, { "epoch": 0.18684865601425057, "grad_norm": 0.12352015823125839, "learning_rate": 0.0005, "loss": 2.1381, "step": 49090 }, { "epoch": 0.18688671848237326, "grad_norm": 0.12239983677864075, "learning_rate": 0.0005, "loss": 2.1373, "step": 49100 }, { "epoch": 0.18692478095049594, "grad_norm": 0.12327679991722107, "learning_rate": 0.0005, "loss": 2.1248, "step": 49110 }, { "epoch": 0.18696284341861863, "grad_norm": 0.12638281285762787, "learning_rate": 0.0005, "loss": 2.136, "step": 49120 }, { "epoch": 0.18700090588674131, "grad_norm": 0.11979183554649353, "learning_rate": 0.0005, "loss": 2.1606, "step": 49130 }, { "epoch": 0.187038968354864, "grad_norm": 0.11857368052005768, "learning_rate": 0.0005, "loss": 2.1293, "step": 49140 }, { "epoch": 0.18707703082298668, "grad_norm": 0.11690958589315414, "learning_rate": 0.0005, "loss": 2.1292, "step": 49150 }, { "epoch": 0.18711509329110937, "grad_norm": 0.13325461745262146, "learning_rate": 0.0005, "loss": 2.1565, "step": 49160 }, { "epoch": 0.18715315575923205, "grad_norm": 0.1198049932718277, "learning_rate": 0.0005, "loss": 2.1298, "step": 49170 }, { "epoch": 0.18719121822735474, "grad_norm": 0.12244052439928055, "learning_rate": 0.0005, "loss": 2.1441, "step": 49180 }, { "epoch": 0.18722928069547742, "grad_norm": 0.11625416576862335, "learning_rate": 0.0005, "loss": 2.1308, "step": 49190 }, { "epoch": 0.1872673431636001, "grad_norm": 0.12328100204467773, "learning_rate": 0.0005, "loss": 2.1328, "step": 49200 }, { "epoch": 0.1873054056317228, "grad_norm": 0.1413453072309494, "learning_rate": 0.0005, "loss": 2.1237, "step": 49210 }, { "epoch": 0.18734346809984548, "grad_norm": 0.11608593910932541, "learning_rate": 0.0005, "loss": 2.1308, "step": 49220 }, { "epoch": 0.18738153056796816, "grad_norm": 0.142526313662529, "learning_rate": 0.0005, "loss": 2.1204, "step": 49230 }, { "epoch": 0.18741959303609082, "grad_norm": 0.13891670107841492, "learning_rate": 0.0005, "loss": 2.138, "step": 49240 }, { "epoch": 0.1874576555042135, "grad_norm": 0.119914211332798, "learning_rate": 0.0005, "loss": 2.137, "step": 49250 }, { "epoch": 0.1874957179723362, "grad_norm": 0.12706288695335388, "learning_rate": 0.0005, "loss": 2.1211, "step": 49260 }, { "epoch": 0.18753378044045887, "grad_norm": 0.13240917026996613, "learning_rate": 0.0005, "loss": 2.1241, "step": 49270 }, { "epoch": 0.18757184290858156, "grad_norm": 0.1289173662662506, "learning_rate": 0.0005, "loss": 2.1508, "step": 49280 }, { "epoch": 0.18760990537670424, "grad_norm": 0.110454261302948, "learning_rate": 0.0005, "loss": 2.1393, "step": 49290 }, { "epoch": 0.18764796784482693, "grad_norm": 0.10865352302789688, "learning_rate": 0.0005, "loss": 2.1256, "step": 49300 }, { "epoch": 0.18768603031294961, "grad_norm": 0.11935272812843323, "learning_rate": 0.0005, "loss": 2.1477, "step": 49310 }, { "epoch": 0.1877240927810723, "grad_norm": 0.12708429992198944, "learning_rate": 0.0005, "loss": 2.1261, "step": 49320 }, { "epoch": 0.18776215524919498, "grad_norm": 0.13141992688179016, "learning_rate": 0.0005, "loss": 2.1402, "step": 49330 }, { "epoch": 0.18780021771731767, "grad_norm": 0.12144871056079865, "learning_rate": 0.0005, "loss": 2.1189, "step": 49340 }, { "epoch": 0.18783828018544035, "grad_norm": 0.12627474963665009, "learning_rate": 0.0005, "loss": 2.1348, "step": 49350 }, { "epoch": 0.18787634265356304, "grad_norm": 0.11026687920093536, "learning_rate": 0.0005, "loss": 2.1376, "step": 49360 }, { "epoch": 0.18791440512168572, "grad_norm": 0.12230470031499863, "learning_rate": 0.0005, "loss": 2.1227, "step": 49370 }, { "epoch": 0.18795246758980838, "grad_norm": 0.12970934808254242, "learning_rate": 0.0005, "loss": 2.1304, "step": 49380 }, { "epoch": 0.18799053005793107, "grad_norm": 0.13543622195720673, "learning_rate": 0.0005, "loss": 2.1287, "step": 49390 }, { "epoch": 0.18802859252605375, "grad_norm": 0.11476121097803116, "learning_rate": 0.0005, "loss": 2.1291, "step": 49400 }, { "epoch": 0.18806665499417644, "grad_norm": 0.12133664637804031, "learning_rate": 0.0005, "loss": 2.1158, "step": 49410 }, { "epoch": 0.18810471746229912, "grad_norm": 0.12081196159124374, "learning_rate": 0.0005, "loss": 2.1394, "step": 49420 }, { "epoch": 0.1881427799304218, "grad_norm": 0.12121246755123138, "learning_rate": 0.0005, "loss": 2.1377, "step": 49430 }, { "epoch": 0.1881808423985445, "grad_norm": 0.1321023851633072, "learning_rate": 0.0005, "loss": 2.1339, "step": 49440 }, { "epoch": 0.18821890486666717, "grad_norm": 0.12149399518966675, "learning_rate": 0.0005, "loss": 2.1304, "step": 49450 }, { "epoch": 0.18825696733478986, "grad_norm": 0.12127463519573212, "learning_rate": 0.0005, "loss": 2.1303, "step": 49460 }, { "epoch": 0.18829502980291254, "grad_norm": 0.12894226610660553, "learning_rate": 0.0005, "loss": 2.1347, "step": 49470 }, { "epoch": 0.18833309227103523, "grad_norm": 0.11598140746355057, "learning_rate": 0.0005, "loss": 2.1335, "step": 49480 }, { "epoch": 0.18837115473915791, "grad_norm": 0.11026100814342499, "learning_rate": 0.0005, "loss": 2.1223, "step": 49490 }, { "epoch": 0.1884092172072806, "grad_norm": 0.11874309182167053, "learning_rate": 0.0005, "loss": 2.1258, "step": 49500 }, { "epoch": 0.18844727967540328, "grad_norm": 0.12631499767303467, "learning_rate": 0.0005, "loss": 2.1305, "step": 49510 }, { "epoch": 0.18848534214352594, "grad_norm": 0.1200314313173294, "learning_rate": 0.0005, "loss": 2.1302, "step": 49520 }, { "epoch": 0.18852340461164863, "grad_norm": 0.12178215384483337, "learning_rate": 0.0005, "loss": 2.1252, "step": 49530 }, { "epoch": 0.1885614670797713, "grad_norm": 0.11553493142127991, "learning_rate": 0.0005, "loss": 2.1306, "step": 49540 }, { "epoch": 0.188599529547894, "grad_norm": 0.24640120565891266, "learning_rate": 0.0005, "loss": 2.1243, "step": 49550 }, { "epoch": 0.18863759201601668, "grad_norm": 0.12426994740962982, "learning_rate": 0.0005, "loss": 2.1342, "step": 49560 }, { "epoch": 0.18867565448413937, "grad_norm": 0.13868281245231628, "learning_rate": 0.0005, "loss": 2.1437, "step": 49570 }, { "epoch": 0.18871371695226205, "grad_norm": 0.12489303201436996, "learning_rate": 0.0005, "loss": 2.1391, "step": 49580 }, { "epoch": 0.18875177942038474, "grad_norm": 0.1338043361902237, "learning_rate": 0.0005, "loss": 2.1402, "step": 49590 }, { "epoch": 0.18878984188850742, "grad_norm": 0.13678200542926788, "learning_rate": 0.0005, "loss": 2.1437, "step": 49600 }, { "epoch": 0.1888279043566301, "grad_norm": 0.12890838086605072, "learning_rate": 0.0005, "loss": 2.1372, "step": 49610 }, { "epoch": 0.1888659668247528, "grad_norm": 0.11385848373174667, "learning_rate": 0.0005, "loss": 2.139, "step": 49620 }, { "epoch": 0.18890402929287547, "grad_norm": 0.11929275095462799, "learning_rate": 0.0005, "loss": 2.1457, "step": 49630 }, { "epoch": 0.18894209176099816, "grad_norm": 0.13784067332744598, "learning_rate": 0.0005, "loss": 2.1211, "step": 49640 }, { "epoch": 0.18898015422912084, "grad_norm": 0.12938465178012848, "learning_rate": 0.0005, "loss": 2.1264, "step": 49650 }, { "epoch": 0.18901821669724353, "grad_norm": 0.14009609818458557, "learning_rate": 0.0005, "loss": 2.1328, "step": 49660 }, { "epoch": 0.1890562791653662, "grad_norm": 0.1335345357656479, "learning_rate": 0.0005, "loss": 2.1265, "step": 49670 }, { "epoch": 0.18909434163348887, "grad_norm": 0.12277305871248245, "learning_rate": 0.0005, "loss": 2.1308, "step": 49680 }, { "epoch": 0.18913240410161156, "grad_norm": 0.12641063332557678, "learning_rate": 0.0005, "loss": 2.1311, "step": 49690 }, { "epoch": 0.18917046656973424, "grad_norm": 0.13216422498226166, "learning_rate": 0.0005, "loss": 2.1444, "step": 49700 }, { "epoch": 0.18920852903785693, "grad_norm": 0.1259606033563614, "learning_rate": 0.0005, "loss": 2.124, "step": 49710 }, { "epoch": 0.1892465915059796, "grad_norm": 0.1319742500782013, "learning_rate": 0.0005, "loss": 2.1273, "step": 49720 }, { "epoch": 0.1892846539741023, "grad_norm": 0.14151360094547272, "learning_rate": 0.0005, "loss": 2.1328, "step": 49730 }, { "epoch": 0.18932271644222498, "grad_norm": 0.1352107673883438, "learning_rate": 0.0005, "loss": 2.146, "step": 49740 }, { "epoch": 0.18936077891034767, "grad_norm": 0.11739267408847809, "learning_rate": 0.0005, "loss": 2.1385, "step": 49750 }, { "epoch": 0.18939884137847035, "grad_norm": 0.1501825600862503, "learning_rate": 0.0005, "loss": 2.1296, "step": 49760 }, { "epoch": 0.18943690384659304, "grad_norm": 0.1281415820121765, "learning_rate": 0.0005, "loss": 2.1305, "step": 49770 }, { "epoch": 0.18947496631471572, "grad_norm": 0.12109538912773132, "learning_rate": 0.0005, "loss": 2.1366, "step": 49780 }, { "epoch": 0.1895130287828384, "grad_norm": 0.13439509272575378, "learning_rate": 0.0005, "loss": 2.156, "step": 49790 }, { "epoch": 0.1895510912509611, "grad_norm": 0.11888731271028519, "learning_rate": 0.0005, "loss": 2.135, "step": 49800 }, { "epoch": 0.18958915371908375, "grad_norm": 0.13279883563518524, "learning_rate": 0.0005, "loss": 2.1175, "step": 49810 }, { "epoch": 0.18962721618720643, "grad_norm": 0.11696521192789078, "learning_rate": 0.0005, "loss": 2.1277, "step": 49820 }, { "epoch": 0.18966527865532912, "grad_norm": 0.11308915913105011, "learning_rate": 0.0005, "loss": 2.1298, "step": 49830 }, { "epoch": 0.1897033411234518, "grad_norm": 0.12376297265291214, "learning_rate": 0.0005, "loss": 2.1321, "step": 49840 }, { "epoch": 0.1897414035915745, "grad_norm": 0.14907288551330566, "learning_rate": 0.0005, "loss": 2.1456, "step": 49850 }, { "epoch": 0.18977946605969717, "grad_norm": 0.11262844502925873, "learning_rate": 0.0005, "loss": 2.1287, "step": 49860 }, { "epoch": 0.18981752852781986, "grad_norm": 0.12050331383943558, "learning_rate": 0.0005, "loss": 2.1414, "step": 49870 }, { "epoch": 0.18985559099594254, "grad_norm": 0.12309075146913528, "learning_rate": 0.0005, "loss": 2.113, "step": 49880 }, { "epoch": 0.18989365346406523, "grad_norm": 0.12859241664409637, "learning_rate": 0.0005, "loss": 2.1124, "step": 49890 }, { "epoch": 0.1899317159321879, "grad_norm": 0.13295090198516846, "learning_rate": 0.0005, "loss": 2.1306, "step": 49900 }, { "epoch": 0.1899697784003106, "grad_norm": 0.13653841614723206, "learning_rate": 0.0005, "loss": 2.1398, "step": 49910 }, { "epoch": 0.19000784086843328, "grad_norm": 0.12351825833320618, "learning_rate": 0.0005, "loss": 2.135, "step": 49920 }, { "epoch": 0.19004590333655597, "grad_norm": 0.11759936064481735, "learning_rate": 0.0005, "loss": 2.1284, "step": 49930 }, { "epoch": 0.19008396580467865, "grad_norm": 0.12333490699529648, "learning_rate": 0.0005, "loss": 2.1312, "step": 49940 }, { "epoch": 0.1901220282728013, "grad_norm": 0.12636221945285797, "learning_rate": 0.0005, "loss": 2.1266, "step": 49950 }, { "epoch": 0.190160090740924, "grad_norm": 0.11555564403533936, "learning_rate": 0.0005, "loss": 2.1439, "step": 49960 }, { "epoch": 0.19019815320904668, "grad_norm": 0.11537200212478638, "learning_rate": 0.0005, "loss": 2.136, "step": 49970 }, { "epoch": 0.19023621567716936, "grad_norm": 0.1284588724374771, "learning_rate": 0.0005, "loss": 2.1446, "step": 49980 }, { "epoch": 0.19027427814529205, "grad_norm": 0.151369109749794, "learning_rate": 0.0005, "loss": 2.1327, "step": 49990 }, { "epoch": 0.19031234061341473, "grad_norm": 0.11686275154352188, "learning_rate": 0.0005, "loss": 2.1461, "step": 50000 }, { "epoch": 0.19035040308153742, "grad_norm": 0.10784438997507095, "learning_rate": 0.0005, "loss": 2.1342, "step": 50010 }, { "epoch": 0.1903884655496601, "grad_norm": 0.22661909461021423, "learning_rate": 0.0005, "loss": 2.1315, "step": 50020 }, { "epoch": 0.1904265280177828, "grad_norm": 0.10580966621637344, "learning_rate": 0.0005, "loss": 2.1217, "step": 50030 }, { "epoch": 0.19046459048590547, "grad_norm": 0.12006790190935135, "learning_rate": 0.0005, "loss": 2.1377, "step": 50040 }, { "epoch": 0.19050265295402816, "grad_norm": 0.1274092197418213, "learning_rate": 0.0005, "loss": 2.1348, "step": 50050 }, { "epoch": 0.19054071542215084, "grad_norm": 0.13993249833583832, "learning_rate": 0.0005, "loss": 2.1332, "step": 50060 }, { "epoch": 0.19057877789027353, "grad_norm": 0.13096733391284943, "learning_rate": 0.0005, "loss": 2.1319, "step": 50070 }, { "epoch": 0.1906168403583962, "grad_norm": 0.11933179944753647, "learning_rate": 0.0005, "loss": 2.1427, "step": 50080 }, { "epoch": 0.1906549028265189, "grad_norm": 0.1263686716556549, "learning_rate": 0.0005, "loss": 2.1311, "step": 50090 }, { "epoch": 0.19069296529464155, "grad_norm": 0.12206216901540756, "learning_rate": 0.0005, "loss": 2.1448, "step": 50100 }, { "epoch": 0.19073102776276424, "grad_norm": 0.13494496047496796, "learning_rate": 0.0005, "loss": 2.1366, "step": 50110 }, { "epoch": 0.19076909023088692, "grad_norm": 0.11924020200967789, "learning_rate": 0.0005, "loss": 2.1282, "step": 50120 }, { "epoch": 0.1908071526990096, "grad_norm": 0.13925659656524658, "learning_rate": 0.0005, "loss": 2.1257, "step": 50130 }, { "epoch": 0.1908452151671323, "grad_norm": 0.12809541821479797, "learning_rate": 0.0005, "loss": 2.1378, "step": 50140 }, { "epoch": 0.19088327763525498, "grad_norm": 0.11883596330881119, "learning_rate": 0.0005, "loss": 2.1307, "step": 50150 }, { "epoch": 0.19092134010337766, "grad_norm": 0.11638153344392776, "learning_rate": 0.0005, "loss": 2.1246, "step": 50160 }, { "epoch": 0.19095940257150035, "grad_norm": 0.12264906615018845, "learning_rate": 0.0005, "loss": 2.1291, "step": 50170 }, { "epoch": 0.19099746503962303, "grad_norm": 0.12147228419780731, "learning_rate": 0.0005, "loss": 2.1194, "step": 50180 }, { "epoch": 0.19103552750774572, "grad_norm": 0.11615349352359772, "learning_rate": 0.0005, "loss": 2.1278, "step": 50190 }, { "epoch": 0.1910735899758684, "grad_norm": 0.1336466521024704, "learning_rate": 0.0005, "loss": 2.1222, "step": 50200 }, { "epoch": 0.1911116524439911, "grad_norm": 0.12595194578170776, "learning_rate": 0.0005, "loss": 2.134, "step": 50210 }, { "epoch": 0.19114971491211377, "grad_norm": 0.11743763834238052, "learning_rate": 0.0005, "loss": 2.1313, "step": 50220 }, { "epoch": 0.19118777738023646, "grad_norm": 0.11929907649755478, "learning_rate": 0.0005, "loss": 2.1418, "step": 50230 }, { "epoch": 0.1912258398483591, "grad_norm": 0.11452007293701172, "learning_rate": 0.0005, "loss": 2.1289, "step": 50240 }, { "epoch": 0.1912639023164818, "grad_norm": 0.13159744441509247, "learning_rate": 0.0005, "loss": 2.1356, "step": 50250 }, { "epoch": 0.19130196478460448, "grad_norm": 0.13729321956634521, "learning_rate": 0.0005, "loss": 2.1262, "step": 50260 }, { "epoch": 0.19134002725272717, "grad_norm": 0.1385490447282791, "learning_rate": 0.0005, "loss": 2.1428, "step": 50270 }, { "epoch": 0.19137808972084985, "grad_norm": 0.12463898211717606, "learning_rate": 0.0005, "loss": 2.1292, "step": 50280 }, { "epoch": 0.19141615218897254, "grad_norm": 0.13166537880897522, "learning_rate": 0.0005, "loss": 2.1283, "step": 50290 }, { "epoch": 0.19145421465709522, "grad_norm": 0.1303899586200714, "learning_rate": 0.0005, "loss": 2.1222, "step": 50300 }, { "epoch": 0.1914922771252179, "grad_norm": 0.12919609248638153, "learning_rate": 0.0005, "loss": 2.1142, "step": 50310 }, { "epoch": 0.1915303395933406, "grad_norm": 0.13564112782478333, "learning_rate": 0.0005, "loss": 2.1357, "step": 50320 }, { "epoch": 0.19156840206146328, "grad_norm": 0.12312301993370056, "learning_rate": 0.0005, "loss": 2.141, "step": 50330 }, { "epoch": 0.19160646452958596, "grad_norm": 0.13214318454265594, "learning_rate": 0.0005, "loss": 2.1327, "step": 50340 }, { "epoch": 0.19164452699770865, "grad_norm": 0.12344446033239365, "learning_rate": 0.0005, "loss": 2.1262, "step": 50350 }, { "epoch": 0.19168258946583133, "grad_norm": 0.1164083480834961, "learning_rate": 0.0005, "loss": 2.1427, "step": 50360 }, { "epoch": 0.19172065193395402, "grad_norm": 0.1222231537103653, "learning_rate": 0.0005, "loss": 2.12, "step": 50370 }, { "epoch": 0.1917587144020767, "grad_norm": 0.13423392176628113, "learning_rate": 0.0005, "loss": 2.1452, "step": 50380 }, { "epoch": 0.19179677687019936, "grad_norm": 0.14245694875717163, "learning_rate": 0.0005, "loss": 2.1305, "step": 50390 }, { "epoch": 0.19183483933832204, "grad_norm": 0.1276567131280899, "learning_rate": 0.0005, "loss": 2.1307, "step": 50400 }, { "epoch": 0.19187290180644473, "grad_norm": 0.1121436059474945, "learning_rate": 0.0005, "loss": 2.1191, "step": 50410 }, { "epoch": 0.1919109642745674, "grad_norm": 0.1311924308538437, "learning_rate": 0.0005, "loss": 2.1238, "step": 50420 }, { "epoch": 0.1919490267426901, "grad_norm": 0.12689100205898285, "learning_rate": 0.0005, "loss": 2.1429, "step": 50430 }, { "epoch": 0.19198708921081278, "grad_norm": 0.1223243772983551, "learning_rate": 0.0005, "loss": 2.1402, "step": 50440 }, { "epoch": 0.19202515167893547, "grad_norm": 0.11202087998390198, "learning_rate": 0.0005, "loss": 2.1272, "step": 50450 }, { "epoch": 0.19206321414705815, "grad_norm": 0.12189372628927231, "learning_rate": 0.0005, "loss": 2.1215, "step": 50460 }, { "epoch": 0.19210127661518084, "grad_norm": 0.1238129734992981, "learning_rate": 0.0005, "loss": 2.1257, "step": 50470 }, { "epoch": 0.19213933908330352, "grad_norm": 0.1346205621957779, "learning_rate": 0.0005, "loss": 2.1453, "step": 50480 }, { "epoch": 0.1921774015514262, "grad_norm": 0.11418620496988297, "learning_rate": 0.0005, "loss": 2.132, "step": 50490 }, { "epoch": 0.1922154640195489, "grad_norm": 0.12495341897010803, "learning_rate": 0.0005, "loss": 2.1299, "step": 50500 }, { "epoch": 0.19225352648767158, "grad_norm": 0.11481189727783203, "learning_rate": 0.0005, "loss": 2.1382, "step": 50510 }, { "epoch": 0.19229158895579426, "grad_norm": 0.13317109644412994, "learning_rate": 0.0005, "loss": 2.1398, "step": 50520 }, { "epoch": 0.19232965142391692, "grad_norm": 0.12128578871488571, "learning_rate": 0.0005, "loss": 2.1313, "step": 50530 }, { "epoch": 0.1923677138920396, "grad_norm": 0.12536193430423737, "learning_rate": 0.0005, "loss": 2.138, "step": 50540 }, { "epoch": 0.1924057763601623, "grad_norm": 0.12248624861240387, "learning_rate": 0.0005, "loss": 2.1299, "step": 50550 }, { "epoch": 0.19244383882828497, "grad_norm": 0.11845123022794724, "learning_rate": 0.0005, "loss": 2.1377, "step": 50560 }, { "epoch": 0.19248190129640766, "grad_norm": 0.11631769686937332, "learning_rate": 0.0005, "loss": 2.1302, "step": 50570 }, { "epoch": 0.19251996376453034, "grad_norm": 0.11704082041978836, "learning_rate": 0.0005, "loss": 2.1302, "step": 50580 }, { "epoch": 0.19255802623265303, "grad_norm": 0.11989939212799072, "learning_rate": 0.0005, "loss": 2.1465, "step": 50590 }, { "epoch": 0.1925960887007757, "grad_norm": 0.11196364462375641, "learning_rate": 0.0005, "loss": 2.1315, "step": 50600 }, { "epoch": 0.1926341511688984, "grad_norm": 0.15060563385486603, "learning_rate": 0.0005, "loss": 2.12, "step": 50610 }, { "epoch": 0.19267221363702108, "grad_norm": 0.1247202679514885, "learning_rate": 0.0005, "loss": 2.1388, "step": 50620 }, { "epoch": 0.19271027610514377, "grad_norm": 0.13385535776615143, "learning_rate": 0.0005, "loss": 2.1307, "step": 50630 }, { "epoch": 0.19274833857326645, "grad_norm": 0.12829150259494781, "learning_rate": 0.0005, "loss": 2.1464, "step": 50640 }, { "epoch": 0.19278640104138914, "grad_norm": 0.11384773999452591, "learning_rate": 0.0005, "loss": 2.1336, "step": 50650 }, { "epoch": 0.19282446350951182, "grad_norm": 0.11690258234739304, "learning_rate": 0.0005, "loss": 2.1262, "step": 50660 }, { "epoch": 0.19286252597763448, "grad_norm": 0.12879469990730286, "learning_rate": 0.0005, "loss": 2.132, "step": 50670 }, { "epoch": 0.19290058844575717, "grad_norm": 0.13677574694156647, "learning_rate": 0.0005, "loss": 2.1315, "step": 50680 }, { "epoch": 0.19293865091387985, "grad_norm": 0.11285001039505005, "learning_rate": 0.0005, "loss": 2.1361, "step": 50690 }, { "epoch": 0.19297671338200253, "grad_norm": 0.11710193753242493, "learning_rate": 0.0005, "loss": 2.1132, "step": 50700 }, { "epoch": 0.19301477585012522, "grad_norm": 0.13037873804569244, "learning_rate": 0.0005, "loss": 2.1204, "step": 50710 }, { "epoch": 0.1930528383182479, "grad_norm": 0.11687429249286652, "learning_rate": 0.0005, "loss": 2.1206, "step": 50720 }, { "epoch": 0.1930909007863706, "grad_norm": 0.11817038804292679, "learning_rate": 0.0005, "loss": 2.1167, "step": 50730 }, { "epoch": 0.19312896325449327, "grad_norm": 0.12508970499038696, "learning_rate": 0.0005, "loss": 2.1219, "step": 50740 }, { "epoch": 0.19316702572261596, "grad_norm": 0.11719117313623428, "learning_rate": 0.0005, "loss": 2.1417, "step": 50750 }, { "epoch": 0.19320508819073864, "grad_norm": 0.12354373931884766, "learning_rate": 0.0005, "loss": 2.1255, "step": 50760 }, { "epoch": 0.19324315065886133, "grad_norm": 0.5578530430793762, "learning_rate": 0.0005, "loss": 2.1176, "step": 50770 }, { "epoch": 0.19328121312698401, "grad_norm": 0.13043108582496643, "learning_rate": 0.0005, "loss": 2.1305, "step": 50780 }, { "epoch": 0.1933192755951067, "grad_norm": 0.12195798009634018, "learning_rate": 0.0005, "loss": 2.1492, "step": 50790 }, { "epoch": 0.19335733806322938, "grad_norm": 0.13272693753242493, "learning_rate": 0.0005, "loss": 2.1238, "step": 50800 }, { "epoch": 0.19339540053135207, "grad_norm": 0.1230696439743042, "learning_rate": 0.0005, "loss": 2.123, "step": 50810 }, { "epoch": 0.19343346299947473, "grad_norm": 0.12242135405540466, "learning_rate": 0.0005, "loss": 2.1368, "step": 50820 }, { "epoch": 0.1934715254675974, "grad_norm": 0.11244331300258636, "learning_rate": 0.0005, "loss": 2.1297, "step": 50830 }, { "epoch": 0.1935095879357201, "grad_norm": 0.1284191906452179, "learning_rate": 0.0005, "loss": 2.1366, "step": 50840 }, { "epoch": 0.19354765040384278, "grad_norm": 0.14463888108730316, "learning_rate": 0.0005, "loss": 2.1242, "step": 50850 }, { "epoch": 0.19358571287196547, "grad_norm": 0.12740777432918549, "learning_rate": 0.0005, "loss": 2.1075, "step": 50860 }, { "epoch": 0.19362377534008815, "grad_norm": 0.1244569942355156, "learning_rate": 0.0005, "loss": 2.1224, "step": 50870 }, { "epoch": 0.19366183780821083, "grad_norm": 0.1358763873577118, "learning_rate": 0.0005, "loss": 2.1282, "step": 50880 }, { "epoch": 0.19369990027633352, "grad_norm": 0.13158994913101196, "learning_rate": 0.0005, "loss": 2.1354, "step": 50890 }, { "epoch": 0.1937379627444562, "grad_norm": 0.12264818698167801, "learning_rate": 0.0005, "loss": 2.1286, "step": 50900 }, { "epoch": 0.1937760252125789, "grad_norm": 0.12059519439935684, "learning_rate": 0.0005, "loss": 2.1343, "step": 50910 }, { "epoch": 0.19381408768070157, "grad_norm": 0.12643907964229584, "learning_rate": 0.0005, "loss": 2.1394, "step": 50920 }, { "epoch": 0.19385215014882426, "grad_norm": 0.12796556949615479, "learning_rate": 0.0005, "loss": 2.1383, "step": 50930 }, { "epoch": 0.19389021261694694, "grad_norm": 0.11849892139434814, "learning_rate": 0.0005, "loss": 2.134, "step": 50940 }, { "epoch": 0.19392827508506963, "grad_norm": 0.12396416068077087, "learning_rate": 0.0005, "loss": 2.1149, "step": 50950 }, { "epoch": 0.1939663375531923, "grad_norm": 0.1255057454109192, "learning_rate": 0.0005, "loss": 2.1433, "step": 50960 }, { "epoch": 0.19400440002131497, "grad_norm": 0.11904613673686981, "learning_rate": 0.0005, "loss": 2.1168, "step": 50970 }, { "epoch": 0.19404246248943766, "grad_norm": 0.13274280726909637, "learning_rate": 0.0005, "loss": 2.1324, "step": 50980 }, { "epoch": 0.19408052495756034, "grad_norm": 0.11690010130405426, "learning_rate": 0.0005, "loss": 2.1324, "step": 50990 }, { "epoch": 0.19411858742568303, "grad_norm": 0.12247753888368607, "learning_rate": 0.0005, "loss": 2.1138, "step": 51000 }, { "epoch": 0.1941566498938057, "grad_norm": 0.11586964875459671, "learning_rate": 0.0005, "loss": 2.1377, "step": 51010 }, { "epoch": 0.1941947123619284, "grad_norm": 0.12469108402729034, "learning_rate": 0.0005, "loss": 2.1348, "step": 51020 }, { "epoch": 0.19423277483005108, "grad_norm": 0.14118361473083496, "learning_rate": 0.0005, "loss": 2.1217, "step": 51030 }, { "epoch": 0.19427083729817377, "grad_norm": 0.15017235279083252, "learning_rate": 0.0005, "loss": 2.1334, "step": 51040 }, { "epoch": 0.19430889976629645, "grad_norm": 0.12537063658237457, "learning_rate": 0.0005, "loss": 2.1251, "step": 51050 }, { "epoch": 0.19434696223441913, "grad_norm": 0.13568291068077087, "learning_rate": 0.0005, "loss": 2.1272, "step": 51060 }, { "epoch": 0.19438502470254182, "grad_norm": 0.1382315754890442, "learning_rate": 0.0005, "loss": 2.1238, "step": 51070 }, { "epoch": 0.1944230871706645, "grad_norm": 0.35568517446517944, "learning_rate": 0.0005, "loss": 2.1246, "step": 51080 }, { "epoch": 0.1944611496387872, "grad_norm": 0.11623500287532806, "learning_rate": 0.0005, "loss": 2.1183, "step": 51090 }, { "epoch": 0.19449921210690985, "grad_norm": 0.13279907405376434, "learning_rate": 0.0005, "loss": 2.1277, "step": 51100 }, { "epoch": 0.19453727457503253, "grad_norm": 0.1344604790210724, "learning_rate": 0.0005, "loss": 2.1221, "step": 51110 }, { "epoch": 0.19457533704315522, "grad_norm": 0.12161675095558167, "learning_rate": 0.0005, "loss": 2.1429, "step": 51120 }, { "epoch": 0.1946133995112779, "grad_norm": 0.11104878783226013, "learning_rate": 0.0005, "loss": 2.1392, "step": 51130 }, { "epoch": 0.1946514619794006, "grad_norm": 0.12998434901237488, "learning_rate": 0.0005, "loss": 2.1308, "step": 51140 }, { "epoch": 0.19468952444752327, "grad_norm": 0.12012049555778503, "learning_rate": 0.0005, "loss": 2.135, "step": 51150 }, { "epoch": 0.19472758691564596, "grad_norm": 0.16529619693756104, "learning_rate": 0.0005, "loss": 2.1257, "step": 51160 }, { "epoch": 0.19476564938376864, "grad_norm": 0.11907965689897537, "learning_rate": 0.0005, "loss": 2.123, "step": 51170 }, { "epoch": 0.19480371185189133, "grad_norm": 0.13431844115257263, "learning_rate": 0.0005, "loss": 2.1355, "step": 51180 }, { "epoch": 0.194841774320014, "grad_norm": 0.12098430842161179, "learning_rate": 0.0005, "loss": 2.1242, "step": 51190 }, { "epoch": 0.1948798367881367, "grad_norm": 0.11998952925205231, "learning_rate": 0.0005, "loss": 2.1241, "step": 51200 }, { "epoch": 0.19491789925625938, "grad_norm": 0.12975624203681946, "learning_rate": 0.0005, "loss": 2.1387, "step": 51210 }, { "epoch": 0.19495596172438207, "grad_norm": 0.1254945546388626, "learning_rate": 0.0005, "loss": 2.1415, "step": 51220 }, { "epoch": 0.19499402419250475, "grad_norm": 0.11360818147659302, "learning_rate": 0.0005, "loss": 2.1321, "step": 51230 }, { "epoch": 0.19503208666062744, "grad_norm": 0.12699361145496368, "learning_rate": 0.0005, "loss": 2.1382, "step": 51240 }, { "epoch": 0.1950701491287501, "grad_norm": 0.11986838281154633, "learning_rate": 0.0005, "loss": 2.1277, "step": 51250 }, { "epoch": 0.19510821159687278, "grad_norm": 0.11837062984704971, "learning_rate": 0.0005, "loss": 2.1339, "step": 51260 }, { "epoch": 0.19514627406499546, "grad_norm": 0.1267804652452469, "learning_rate": 0.0005, "loss": 2.1336, "step": 51270 }, { "epoch": 0.19518433653311815, "grad_norm": 0.12151280045509338, "learning_rate": 0.0005, "loss": 2.1211, "step": 51280 }, { "epoch": 0.19522239900124083, "grad_norm": 0.13360044360160828, "learning_rate": 0.0005, "loss": 2.138, "step": 51290 }, { "epoch": 0.19526046146936352, "grad_norm": 0.12029470503330231, "learning_rate": 0.0005, "loss": 2.1386, "step": 51300 }, { "epoch": 0.1952985239374862, "grad_norm": 0.1321459412574768, "learning_rate": 0.0005, "loss": 2.1188, "step": 51310 }, { "epoch": 0.1953365864056089, "grad_norm": 0.12111317366361618, "learning_rate": 0.0005, "loss": 2.1442, "step": 51320 }, { "epoch": 0.19537464887373157, "grad_norm": 0.13475729525089264, "learning_rate": 0.0005, "loss": 2.1346, "step": 51330 }, { "epoch": 0.19541271134185426, "grad_norm": 0.13722528517246246, "learning_rate": 0.0005, "loss": 2.1175, "step": 51340 }, { "epoch": 0.19545077380997694, "grad_norm": 0.12443096190690994, "learning_rate": 0.0005, "loss": 2.1342, "step": 51350 }, { "epoch": 0.19548883627809963, "grad_norm": 0.13265202939510345, "learning_rate": 0.0005, "loss": 2.1271, "step": 51360 }, { "epoch": 0.1955268987462223, "grad_norm": 0.12839211523532867, "learning_rate": 0.0005, "loss": 2.1198, "step": 51370 }, { "epoch": 0.195564961214345, "grad_norm": 0.11944104731082916, "learning_rate": 0.0005, "loss": 2.1279, "step": 51380 }, { "epoch": 0.19560302368246765, "grad_norm": 0.12753385305404663, "learning_rate": 0.0005, "loss": 2.13, "step": 51390 }, { "epoch": 0.19564108615059034, "grad_norm": 0.12177974730730057, "learning_rate": 0.0005, "loss": 2.1327, "step": 51400 }, { "epoch": 0.19567914861871302, "grad_norm": 0.12434303760528564, "learning_rate": 0.0005, "loss": 2.1232, "step": 51410 }, { "epoch": 0.1957172110868357, "grad_norm": 0.13650812208652496, "learning_rate": 0.0005, "loss": 2.1349, "step": 51420 }, { "epoch": 0.1957552735549584, "grad_norm": 0.11836228519678116, "learning_rate": 0.0005, "loss": 2.1356, "step": 51430 }, { "epoch": 0.19579333602308108, "grad_norm": 0.1358971744775772, "learning_rate": 0.0005, "loss": 2.1442, "step": 51440 }, { "epoch": 0.19583139849120376, "grad_norm": 0.1255125254392624, "learning_rate": 0.0005, "loss": 2.1368, "step": 51450 }, { "epoch": 0.19586946095932645, "grad_norm": 0.12064887583255768, "learning_rate": 0.0005, "loss": 2.123, "step": 51460 }, { "epoch": 0.19590752342744913, "grad_norm": 0.13757894933223724, "learning_rate": 0.0005, "loss": 2.1235, "step": 51470 }, { "epoch": 0.19594558589557182, "grad_norm": 0.12057624012231827, "learning_rate": 0.0005, "loss": 2.1379, "step": 51480 }, { "epoch": 0.1959836483636945, "grad_norm": 0.12316569685935974, "learning_rate": 0.0005, "loss": 2.128, "step": 51490 }, { "epoch": 0.1960217108318172, "grad_norm": 0.13517151772975922, "learning_rate": 0.0005, "loss": 2.125, "step": 51500 }, { "epoch": 0.19605977329993987, "grad_norm": 0.12520772218704224, "learning_rate": 0.0005, "loss": 2.1343, "step": 51510 }, { "epoch": 0.19609783576806256, "grad_norm": 0.13712680339813232, "learning_rate": 0.0005, "loss": 2.1234, "step": 51520 }, { "epoch": 0.19613589823618524, "grad_norm": 0.1230507493019104, "learning_rate": 0.0005, "loss": 2.1159, "step": 51530 }, { "epoch": 0.1961739607043079, "grad_norm": 0.12265963107347488, "learning_rate": 0.0005, "loss": 2.1313, "step": 51540 }, { "epoch": 0.19621202317243058, "grad_norm": 0.1328250914812088, "learning_rate": 0.0005, "loss": 2.1404, "step": 51550 }, { "epoch": 0.19625008564055327, "grad_norm": 0.13123562932014465, "learning_rate": 0.0005, "loss": 2.1101, "step": 51560 }, { "epoch": 0.19628814810867595, "grad_norm": 0.12056384980678558, "learning_rate": 0.0005, "loss": 2.128, "step": 51570 }, { "epoch": 0.19632621057679864, "grad_norm": 0.12020045518875122, "learning_rate": 0.0005, "loss": 2.1548, "step": 51580 }, { "epoch": 0.19636427304492132, "grad_norm": 0.11768834292888641, "learning_rate": 0.0005, "loss": 2.1352, "step": 51590 }, { "epoch": 0.196402335513044, "grad_norm": 0.12291178852319717, "learning_rate": 0.0005, "loss": 2.1215, "step": 51600 }, { "epoch": 0.1964403979811667, "grad_norm": 0.12086016684770584, "learning_rate": 0.0005, "loss": 2.1344, "step": 51610 }, { "epoch": 0.19647846044928938, "grad_norm": 0.12200043350458145, "learning_rate": 0.0005, "loss": 2.135, "step": 51620 }, { "epoch": 0.19651652291741206, "grad_norm": 0.1366575062274933, "learning_rate": 0.0005, "loss": 2.1251, "step": 51630 }, { "epoch": 0.19655458538553475, "grad_norm": 0.1327793151140213, "learning_rate": 0.0005, "loss": 2.1308, "step": 51640 }, { "epoch": 0.19659264785365743, "grad_norm": 0.1226876825094223, "learning_rate": 0.0005, "loss": 2.1255, "step": 51650 }, { "epoch": 0.19663071032178012, "grad_norm": 0.11945180594921112, "learning_rate": 0.0005, "loss": 2.1417, "step": 51660 }, { "epoch": 0.1966687727899028, "grad_norm": 0.12915171682834625, "learning_rate": 0.0005, "loss": 2.1343, "step": 51670 }, { "epoch": 0.19670683525802546, "grad_norm": 0.12176530808210373, "learning_rate": 0.0005, "loss": 2.1359, "step": 51680 }, { "epoch": 0.19674489772614814, "grad_norm": 0.11432671546936035, "learning_rate": 0.0005, "loss": 2.1316, "step": 51690 }, { "epoch": 0.19678296019427083, "grad_norm": 0.12373737245798111, "learning_rate": 0.0005, "loss": 2.1377, "step": 51700 }, { "epoch": 0.1968210226623935, "grad_norm": 0.12407080084085464, "learning_rate": 0.0005, "loss": 2.1308, "step": 51710 }, { "epoch": 0.1968590851305162, "grad_norm": 0.11970575898885727, "learning_rate": 0.0005, "loss": 2.13, "step": 51720 }, { "epoch": 0.19689714759863888, "grad_norm": 0.13847972452640533, "learning_rate": 0.0005, "loss": 2.1225, "step": 51730 }, { "epoch": 0.19693521006676157, "grad_norm": 0.13606782257556915, "learning_rate": 0.0005, "loss": 2.1276, "step": 51740 }, { "epoch": 0.19697327253488425, "grad_norm": 0.11516030877828598, "learning_rate": 0.0005, "loss": 2.1331, "step": 51750 }, { "epoch": 0.19701133500300694, "grad_norm": 0.13698327541351318, "learning_rate": 0.0005, "loss": 2.133, "step": 51760 }, { "epoch": 0.19704939747112962, "grad_norm": 0.12343499809503555, "learning_rate": 0.0005, "loss": 2.1175, "step": 51770 }, { "epoch": 0.1970874599392523, "grad_norm": 0.11765126883983612, "learning_rate": 0.0005, "loss": 2.1062, "step": 51780 }, { "epoch": 0.197125522407375, "grad_norm": 0.1384083479642868, "learning_rate": 0.0005, "loss": 2.1212, "step": 51790 }, { "epoch": 0.19716358487549768, "grad_norm": 0.12069667130708694, "learning_rate": 0.0005, "loss": 2.1239, "step": 51800 }, { "epoch": 0.19720164734362036, "grad_norm": 0.1360340118408203, "learning_rate": 0.0005, "loss": 2.1423, "step": 51810 }, { "epoch": 0.19723970981174302, "grad_norm": 0.13412491977214813, "learning_rate": 0.0005, "loss": 2.125, "step": 51820 }, { "epoch": 0.1972777722798657, "grad_norm": 0.14328962564468384, "learning_rate": 0.0005, "loss": 2.1234, "step": 51830 }, { "epoch": 0.1973158347479884, "grad_norm": 0.12440038472414017, "learning_rate": 0.0005, "loss": 2.1273, "step": 51840 }, { "epoch": 0.19735389721611107, "grad_norm": 0.11765279620885849, "learning_rate": 0.0005, "loss": 2.1358, "step": 51850 }, { "epoch": 0.19739195968423376, "grad_norm": 0.12797802686691284, "learning_rate": 0.0005, "loss": 2.1406, "step": 51860 }, { "epoch": 0.19743002215235644, "grad_norm": 0.12895552814006805, "learning_rate": 0.0005, "loss": 2.1275, "step": 51870 }, { "epoch": 0.19746808462047913, "grad_norm": 0.11671297252178192, "learning_rate": 0.0005, "loss": 2.1375, "step": 51880 }, { "epoch": 0.1975061470886018, "grad_norm": 0.13894277811050415, "learning_rate": 0.0005, "loss": 2.1386, "step": 51890 }, { "epoch": 0.1975442095567245, "grad_norm": 0.13398738205432892, "learning_rate": 0.0005, "loss": 2.136, "step": 51900 }, { "epoch": 0.19758227202484718, "grad_norm": 0.12182886153459549, "learning_rate": 0.0005, "loss": 2.116, "step": 51910 }, { "epoch": 0.19762033449296987, "grad_norm": 0.11361069977283478, "learning_rate": 0.0005, "loss": 2.1331, "step": 51920 }, { "epoch": 0.19765839696109255, "grad_norm": 0.12386829406023026, "learning_rate": 0.0005, "loss": 2.1343, "step": 51930 }, { "epoch": 0.19769645942921524, "grad_norm": 0.13164442777633667, "learning_rate": 0.0005, "loss": 2.1334, "step": 51940 }, { "epoch": 0.19773452189733792, "grad_norm": 0.11578863859176636, "learning_rate": 0.0005, "loss": 2.1328, "step": 51950 }, { "epoch": 0.1977725843654606, "grad_norm": 0.1270083636045456, "learning_rate": 0.0005, "loss": 2.1089, "step": 51960 }, { "epoch": 0.19781064683358326, "grad_norm": 0.11406195908784866, "learning_rate": 0.0005, "loss": 2.1441, "step": 51970 }, { "epoch": 0.19784870930170595, "grad_norm": 0.12531724572181702, "learning_rate": 0.0005, "loss": 2.1355, "step": 51980 }, { "epoch": 0.19788677176982863, "grad_norm": 0.12408492714166641, "learning_rate": 0.0005, "loss": 2.1328, "step": 51990 }, { "epoch": 0.19792483423795132, "grad_norm": 0.12840083241462708, "learning_rate": 0.0005, "loss": 2.1505, "step": 52000 }, { "epoch": 0.197962896706074, "grad_norm": 0.1234857514500618, "learning_rate": 0.0005, "loss": 2.1399, "step": 52010 }, { "epoch": 0.1980009591741967, "grad_norm": 0.12177139520645142, "learning_rate": 0.0005, "loss": 2.1337, "step": 52020 }, { "epoch": 0.19803902164231937, "grad_norm": 0.13745243847370148, "learning_rate": 0.0005, "loss": 2.1414, "step": 52030 }, { "epoch": 0.19807708411044206, "grad_norm": 0.12879960238933563, "learning_rate": 0.0005, "loss": 2.1492, "step": 52040 }, { "epoch": 0.19811514657856474, "grad_norm": 0.12872251868247986, "learning_rate": 0.0005, "loss": 2.1369, "step": 52050 }, { "epoch": 0.19815320904668743, "grad_norm": 0.11028557270765305, "learning_rate": 0.0005, "loss": 2.1333, "step": 52060 }, { "epoch": 0.1981912715148101, "grad_norm": 0.12214988470077515, "learning_rate": 0.0005, "loss": 2.144, "step": 52070 }, { "epoch": 0.1982293339829328, "grad_norm": 0.12096168845891953, "learning_rate": 0.0005, "loss": 2.1354, "step": 52080 }, { "epoch": 0.19826739645105548, "grad_norm": 0.12791207432746887, "learning_rate": 0.0005, "loss": 2.1248, "step": 52090 }, { "epoch": 0.19830545891917817, "grad_norm": 0.12044400721788406, "learning_rate": 0.0005, "loss": 2.1274, "step": 52100 }, { "epoch": 0.19834352138730083, "grad_norm": 0.128586083650589, "learning_rate": 0.0005, "loss": 2.11, "step": 52110 }, { "epoch": 0.1983815838554235, "grad_norm": 0.12912602722644806, "learning_rate": 0.0005, "loss": 2.1275, "step": 52120 }, { "epoch": 0.1984196463235462, "grad_norm": 0.11891495436429977, "learning_rate": 0.0005, "loss": 2.1213, "step": 52130 }, { "epoch": 0.19845770879166888, "grad_norm": 0.16164763271808624, "learning_rate": 0.0005, "loss": 2.1334, "step": 52140 }, { "epoch": 0.19849577125979156, "grad_norm": 0.12841705977916718, "learning_rate": 0.0005, "loss": 2.1312, "step": 52150 }, { "epoch": 0.19853383372791425, "grad_norm": 0.12408839911222458, "learning_rate": 0.0005, "loss": 2.1269, "step": 52160 }, { "epoch": 0.19857189619603693, "grad_norm": 0.1209225282073021, "learning_rate": 0.0005, "loss": 2.1371, "step": 52170 }, { "epoch": 0.19860995866415962, "grad_norm": 0.1380387544631958, "learning_rate": 0.0005, "loss": 2.1324, "step": 52180 }, { "epoch": 0.1986480211322823, "grad_norm": 0.1275915503501892, "learning_rate": 0.0005, "loss": 2.1382, "step": 52190 }, { "epoch": 0.198686083600405, "grad_norm": 0.11722587794065475, "learning_rate": 0.0005, "loss": 2.125, "step": 52200 }, { "epoch": 0.19872414606852767, "grad_norm": 0.12306281924247742, "learning_rate": 0.0005, "loss": 2.1341, "step": 52210 }, { "epoch": 0.19876220853665036, "grad_norm": 0.1213437169790268, "learning_rate": 0.0005, "loss": 2.1269, "step": 52220 }, { "epoch": 0.19880027100477304, "grad_norm": 0.12615956366062164, "learning_rate": 0.0005, "loss": 2.1302, "step": 52230 }, { "epoch": 0.19883833347289573, "grad_norm": 0.11721881479024887, "learning_rate": 0.0005, "loss": 2.1315, "step": 52240 }, { "epoch": 0.19887639594101839, "grad_norm": 0.11572463065385818, "learning_rate": 0.0005, "loss": 2.1314, "step": 52250 }, { "epoch": 0.19891445840914107, "grad_norm": 0.12876826524734497, "learning_rate": 0.0005, "loss": 2.1245, "step": 52260 }, { "epoch": 0.19895252087726376, "grad_norm": 0.12575292587280273, "learning_rate": 0.0005, "loss": 2.1317, "step": 52270 }, { "epoch": 0.19899058334538644, "grad_norm": 0.12764307856559753, "learning_rate": 0.0005, "loss": 2.134, "step": 52280 }, { "epoch": 0.19902864581350913, "grad_norm": 0.1270405650138855, "learning_rate": 0.0005, "loss": 2.1227, "step": 52290 }, { "epoch": 0.1990667082816318, "grad_norm": 0.13285189867019653, "learning_rate": 0.0005, "loss": 2.1318, "step": 52300 }, { "epoch": 0.1991047707497545, "grad_norm": 0.11400274932384491, "learning_rate": 0.0005, "loss": 2.1503, "step": 52310 }, { "epoch": 0.19914283321787718, "grad_norm": 0.1264936625957489, "learning_rate": 0.0005, "loss": 2.1457, "step": 52320 }, { "epoch": 0.19918089568599986, "grad_norm": 0.13964684307575226, "learning_rate": 0.0005, "loss": 2.1124, "step": 52330 }, { "epoch": 0.19921895815412255, "grad_norm": 0.1267102211713791, "learning_rate": 0.0005, "loss": 2.1339, "step": 52340 }, { "epoch": 0.19925702062224523, "grad_norm": 0.11472469568252563, "learning_rate": 0.0005, "loss": 2.1358, "step": 52350 }, { "epoch": 0.19929508309036792, "grad_norm": 0.11693333089351654, "learning_rate": 0.0005, "loss": 2.1335, "step": 52360 }, { "epoch": 0.1993331455584906, "grad_norm": 0.1316034495830536, "learning_rate": 0.0005, "loss": 2.1274, "step": 52370 }, { "epoch": 0.1993712080266133, "grad_norm": 0.12253102660179138, "learning_rate": 0.0005, "loss": 2.1316, "step": 52380 }, { "epoch": 0.19940927049473597, "grad_norm": 0.11671502143144608, "learning_rate": 0.0005, "loss": 2.1509, "step": 52390 }, { "epoch": 0.19944733296285863, "grad_norm": 0.11247535049915314, "learning_rate": 0.0005, "loss": 2.1463, "step": 52400 }, { "epoch": 0.19948539543098132, "grad_norm": 0.11922860145568848, "learning_rate": 0.0005, "loss": 2.1409, "step": 52410 }, { "epoch": 0.199523457899104, "grad_norm": 0.1103215217590332, "learning_rate": 0.0005, "loss": 2.1383, "step": 52420 }, { "epoch": 0.19956152036722669, "grad_norm": 0.16089947521686554, "learning_rate": 0.0005, "loss": 2.1388, "step": 52430 }, { "epoch": 0.19959958283534937, "grad_norm": 0.1370892971754074, "learning_rate": 0.0005, "loss": 2.1206, "step": 52440 }, { "epoch": 0.19963764530347206, "grad_norm": 0.13198357820510864, "learning_rate": 0.0005, "loss": 2.132, "step": 52450 }, { "epoch": 0.19967570777159474, "grad_norm": 0.11394302546977997, "learning_rate": 0.0005, "loss": 2.1271, "step": 52460 }, { "epoch": 0.19971377023971743, "grad_norm": 0.11629348993301392, "learning_rate": 0.0005, "loss": 2.1339, "step": 52470 }, { "epoch": 0.1997518327078401, "grad_norm": 0.11404618620872498, "learning_rate": 0.0005, "loss": 2.1303, "step": 52480 }, { "epoch": 0.1997898951759628, "grad_norm": 0.13269637525081635, "learning_rate": 0.0005, "loss": 2.1294, "step": 52490 }, { "epoch": 0.19982795764408548, "grad_norm": 0.12785989046096802, "learning_rate": 0.0005, "loss": 2.1339, "step": 52500 }, { "epoch": 0.19986602011220816, "grad_norm": 0.12473263591527939, "learning_rate": 0.0005, "loss": 2.1319, "step": 52510 }, { "epoch": 0.19990408258033085, "grad_norm": 0.12538190186023712, "learning_rate": 0.0005, "loss": 2.1316, "step": 52520 }, { "epoch": 0.19994214504845353, "grad_norm": 0.1268540769815445, "learning_rate": 0.0005, "loss": 2.1445, "step": 52530 }, { "epoch": 0.1999802075165762, "grad_norm": 0.1344127058982849, "learning_rate": 0.0005, "loss": 2.1502, "step": 52540 }, { "epoch": 0.20001826998469888, "grad_norm": 0.12185916304588318, "learning_rate": 0.0005, "loss": 2.1356, "step": 52550 }, { "epoch": 0.20005633245282156, "grad_norm": 0.12213872373104095, "learning_rate": 0.0005, "loss": 2.1174, "step": 52560 }, { "epoch": 0.20009439492094425, "grad_norm": 0.12694446742534637, "learning_rate": 0.0005, "loss": 2.1414, "step": 52570 }, { "epoch": 0.20013245738906693, "grad_norm": 0.13906416296958923, "learning_rate": 0.0005, "loss": 2.1343, "step": 52580 }, { "epoch": 0.20017051985718962, "grad_norm": 0.13742320239543915, "learning_rate": 0.0005, "loss": 2.1264, "step": 52590 }, { "epoch": 0.2002085823253123, "grad_norm": 0.13378769159317017, "learning_rate": 0.0005, "loss": 2.127, "step": 52600 }, { "epoch": 0.20024664479343499, "grad_norm": 0.13367900252342224, "learning_rate": 0.0005, "loss": 2.1334, "step": 52610 }, { "epoch": 0.20028470726155767, "grad_norm": 0.13418786227703094, "learning_rate": 0.0005, "loss": 2.129, "step": 52620 }, { "epoch": 0.20032276972968036, "grad_norm": 0.12620803713798523, "learning_rate": 0.0005, "loss": 2.1501, "step": 52630 }, { "epoch": 0.20036083219780304, "grad_norm": 0.12302321195602417, "learning_rate": 0.0005, "loss": 2.1295, "step": 52640 }, { "epoch": 0.20039889466592573, "grad_norm": 0.11489461362361908, "learning_rate": 0.0005, "loss": 2.1233, "step": 52650 }, { "epoch": 0.2004369571340484, "grad_norm": 0.12589262425899506, "learning_rate": 0.0005, "loss": 2.1553, "step": 52660 }, { "epoch": 0.2004750196021711, "grad_norm": 0.12370242923498154, "learning_rate": 0.0005, "loss": 2.1337, "step": 52670 }, { "epoch": 0.20051308207029378, "grad_norm": 0.12273893505334854, "learning_rate": 0.0005, "loss": 2.131, "step": 52680 }, { "epoch": 0.20055114453841644, "grad_norm": 0.12193376570940018, "learning_rate": 0.0005, "loss": 2.1227, "step": 52690 }, { "epoch": 0.20058920700653912, "grad_norm": 0.125262051820755, "learning_rate": 0.0005, "loss": 2.1293, "step": 52700 }, { "epoch": 0.2006272694746618, "grad_norm": 0.13586099445819855, "learning_rate": 0.0005, "loss": 2.127, "step": 52710 }, { "epoch": 0.2006653319427845, "grad_norm": 0.11950060725212097, "learning_rate": 0.0005, "loss": 2.1454, "step": 52720 }, { "epoch": 0.20070339441090718, "grad_norm": 0.1289469301700592, "learning_rate": 0.0005, "loss": 2.122, "step": 52730 }, { "epoch": 0.20074145687902986, "grad_norm": 0.12563686072826385, "learning_rate": 0.0005, "loss": 2.1371, "step": 52740 }, { "epoch": 0.20077951934715255, "grad_norm": 0.11492034047842026, "learning_rate": 0.0005, "loss": 2.1386, "step": 52750 }, { "epoch": 0.20081758181527523, "grad_norm": 0.12467867881059647, "learning_rate": 0.0005, "loss": 2.1217, "step": 52760 }, { "epoch": 0.20085564428339792, "grad_norm": 0.1249057725071907, "learning_rate": 0.0005, "loss": 2.1252, "step": 52770 }, { "epoch": 0.2008937067515206, "grad_norm": 0.13855883479118347, "learning_rate": 0.0005, "loss": 2.1318, "step": 52780 }, { "epoch": 0.20093176921964329, "grad_norm": 0.12252828478813171, "learning_rate": 0.0005, "loss": 2.1297, "step": 52790 }, { "epoch": 0.20096983168776597, "grad_norm": 0.11606127768754959, "learning_rate": 0.0005, "loss": 2.1307, "step": 52800 }, { "epoch": 0.20100789415588866, "grad_norm": 0.1138739287853241, "learning_rate": 0.0005, "loss": 2.1256, "step": 52810 }, { "epoch": 0.20104595662401134, "grad_norm": 0.14339643716812134, "learning_rate": 0.0005, "loss": 2.1283, "step": 52820 }, { "epoch": 0.201084019092134, "grad_norm": 0.13310876488685608, "learning_rate": 0.0005, "loss": 2.1401, "step": 52830 }, { "epoch": 0.20112208156025668, "grad_norm": 0.11275873333215714, "learning_rate": 0.0005, "loss": 2.1375, "step": 52840 }, { "epoch": 0.20116014402837937, "grad_norm": 0.11566092818975449, "learning_rate": 0.0005, "loss": 2.1203, "step": 52850 }, { "epoch": 0.20119820649650205, "grad_norm": 0.14016412198543549, "learning_rate": 0.0005, "loss": 2.1348, "step": 52860 }, { "epoch": 0.20123626896462474, "grad_norm": 0.12313041090965271, "learning_rate": 0.0005, "loss": 2.1131, "step": 52870 }, { "epoch": 0.20127433143274742, "grad_norm": 0.12930551171302795, "learning_rate": 0.0005, "loss": 2.1241, "step": 52880 }, { "epoch": 0.2013123939008701, "grad_norm": 0.13301874697208405, "learning_rate": 0.0005, "loss": 2.1374, "step": 52890 }, { "epoch": 0.2013504563689928, "grad_norm": 0.11824183166027069, "learning_rate": 0.0005, "loss": 2.1415, "step": 52900 }, { "epoch": 0.20138851883711548, "grad_norm": 0.1287565529346466, "learning_rate": 0.0005, "loss": 2.1277, "step": 52910 }, { "epoch": 0.20142658130523816, "grad_norm": 0.13923634588718414, "learning_rate": 0.0005, "loss": 2.1251, "step": 52920 }, { "epoch": 0.20146464377336085, "grad_norm": 0.134573295712471, "learning_rate": 0.0005, "loss": 2.1216, "step": 52930 }, { "epoch": 0.20150270624148353, "grad_norm": 0.1330741047859192, "learning_rate": 0.0005, "loss": 2.1266, "step": 52940 }, { "epoch": 0.20154076870960622, "grad_norm": 0.12362990528345108, "learning_rate": 0.0005, "loss": 2.1305, "step": 52950 }, { "epoch": 0.2015788311777289, "grad_norm": 0.11643882095813751, "learning_rate": 0.0005, "loss": 2.1298, "step": 52960 }, { "epoch": 0.20161689364585156, "grad_norm": 0.13288746774196625, "learning_rate": 0.0005, "loss": 2.1367, "step": 52970 }, { "epoch": 0.20165495611397424, "grad_norm": 0.11521294713020325, "learning_rate": 0.0005, "loss": 2.1305, "step": 52980 }, { "epoch": 0.20169301858209693, "grad_norm": 0.1399589329957962, "learning_rate": 0.0005, "loss": 2.1432, "step": 52990 }, { "epoch": 0.2017310810502196, "grad_norm": 0.12761197984218597, "learning_rate": 0.0005, "loss": 2.124, "step": 53000 }, { "epoch": 0.2017691435183423, "grad_norm": 0.14741909503936768, "learning_rate": 0.0005, "loss": 2.1266, "step": 53010 }, { "epoch": 0.20180720598646498, "grad_norm": 0.12950895726680756, "learning_rate": 0.0005, "loss": 2.1236, "step": 53020 }, { "epoch": 0.20184526845458767, "grad_norm": 0.1153949499130249, "learning_rate": 0.0005, "loss": 2.126, "step": 53030 }, { "epoch": 0.20188333092271035, "grad_norm": 0.1166260614991188, "learning_rate": 0.0005, "loss": 2.1328, "step": 53040 }, { "epoch": 0.20192139339083304, "grad_norm": 0.11685400456190109, "learning_rate": 0.0005, "loss": 2.1309, "step": 53050 }, { "epoch": 0.20195945585895572, "grad_norm": 0.1260029524564743, "learning_rate": 0.0005, "loss": 2.1241, "step": 53060 }, { "epoch": 0.2019975183270784, "grad_norm": 0.11610860377550125, "learning_rate": 0.0005, "loss": 2.141, "step": 53070 }, { "epoch": 0.2020355807952011, "grad_norm": 0.11834888905286789, "learning_rate": 0.0005, "loss": 2.1316, "step": 53080 }, { "epoch": 0.20207364326332378, "grad_norm": 0.1214648187160492, "learning_rate": 0.0005, "loss": 2.1294, "step": 53090 }, { "epoch": 0.20211170573144646, "grad_norm": 0.12263604253530502, "learning_rate": 0.0005, "loss": 2.1214, "step": 53100 }, { "epoch": 0.20214976819956915, "grad_norm": 0.11759454756975174, "learning_rate": 0.0005, "loss": 2.1251, "step": 53110 }, { "epoch": 0.2021878306676918, "grad_norm": 0.1269487589597702, "learning_rate": 0.0005, "loss": 2.1293, "step": 53120 }, { "epoch": 0.2022258931358145, "grad_norm": 0.13687825202941895, "learning_rate": 0.0005, "loss": 2.1333, "step": 53130 }, { "epoch": 0.20226395560393717, "grad_norm": 0.13485699892044067, "learning_rate": 0.0005, "loss": 2.1369, "step": 53140 }, { "epoch": 0.20230201807205986, "grad_norm": 0.12800122797489166, "learning_rate": 0.0005, "loss": 2.1404, "step": 53150 }, { "epoch": 0.20234008054018254, "grad_norm": 0.12188751995563507, "learning_rate": 0.0005, "loss": 2.1416, "step": 53160 }, { "epoch": 0.20237814300830523, "grad_norm": 0.11998622864484787, "learning_rate": 0.0005, "loss": 2.1371, "step": 53170 }, { "epoch": 0.2024162054764279, "grad_norm": 0.12778060138225555, "learning_rate": 0.0005, "loss": 2.1198, "step": 53180 }, { "epoch": 0.2024542679445506, "grad_norm": 0.12257424741983414, "learning_rate": 0.0005, "loss": 2.1448, "step": 53190 }, { "epoch": 0.20249233041267328, "grad_norm": 0.1922028511762619, "learning_rate": 0.0005, "loss": 2.1369, "step": 53200 }, { "epoch": 0.20253039288079597, "grad_norm": 0.11676789075136185, "learning_rate": 0.0005, "loss": 2.1322, "step": 53210 }, { "epoch": 0.20256845534891865, "grad_norm": 0.12324552983045578, "learning_rate": 0.0005, "loss": 2.1328, "step": 53220 }, { "epoch": 0.20260651781704134, "grad_norm": 0.12745507061481476, "learning_rate": 0.0005, "loss": 2.1402, "step": 53230 }, { "epoch": 0.20264458028516402, "grad_norm": 0.13323475420475006, "learning_rate": 0.0005, "loss": 2.1402, "step": 53240 }, { "epoch": 0.2026826427532867, "grad_norm": 0.13235697150230408, "learning_rate": 0.0005, "loss": 2.1445, "step": 53250 }, { "epoch": 0.20272070522140936, "grad_norm": 0.126156285405159, "learning_rate": 0.0005, "loss": 2.1347, "step": 53260 }, { "epoch": 0.20275876768953205, "grad_norm": 0.12333841621875763, "learning_rate": 0.0005, "loss": 2.1318, "step": 53270 }, { "epoch": 0.20279683015765473, "grad_norm": 0.12026585638523102, "learning_rate": 0.0005, "loss": 2.131, "step": 53280 }, { "epoch": 0.20283489262577742, "grad_norm": 0.13235647976398468, "learning_rate": 0.0005, "loss": 2.1329, "step": 53290 }, { "epoch": 0.2028729550939001, "grad_norm": 0.11956822127103806, "learning_rate": 0.0005, "loss": 2.1305, "step": 53300 }, { "epoch": 0.2029110175620228, "grad_norm": 0.12753432989120483, "learning_rate": 0.0005, "loss": 2.1277, "step": 53310 }, { "epoch": 0.20294908003014547, "grad_norm": 0.11608036607503891, "learning_rate": 0.0005, "loss": 2.1353, "step": 53320 }, { "epoch": 0.20298714249826816, "grad_norm": 0.1314079910516739, "learning_rate": 0.0005, "loss": 2.1281, "step": 53330 }, { "epoch": 0.20302520496639084, "grad_norm": 0.11957383155822754, "learning_rate": 0.0005, "loss": 2.1416, "step": 53340 }, { "epoch": 0.20306326743451353, "grad_norm": 0.14062124490737915, "learning_rate": 0.0005, "loss": 2.1506, "step": 53350 }, { "epoch": 0.2031013299026362, "grad_norm": 0.12807902693748474, "learning_rate": 0.0005, "loss": 2.1436, "step": 53360 }, { "epoch": 0.2031393923707589, "grad_norm": 0.1215902715921402, "learning_rate": 0.0005, "loss": 2.1318, "step": 53370 }, { "epoch": 0.20317745483888158, "grad_norm": 0.12466635555028915, "learning_rate": 0.0005, "loss": 2.1237, "step": 53380 }, { "epoch": 0.20321551730700427, "grad_norm": 0.12310461699962616, "learning_rate": 0.0005, "loss": 2.1397, "step": 53390 }, { "epoch": 0.20325357977512692, "grad_norm": 0.13518789410591125, "learning_rate": 0.0005, "loss": 2.132, "step": 53400 }, { "epoch": 0.2032916422432496, "grad_norm": 0.13395272195339203, "learning_rate": 0.0005, "loss": 2.1046, "step": 53410 }, { "epoch": 0.2033297047113723, "grad_norm": 0.12013889849185944, "learning_rate": 0.0005, "loss": 2.1338, "step": 53420 }, { "epoch": 0.20336776717949498, "grad_norm": 0.12862953543663025, "learning_rate": 0.0005, "loss": 2.135, "step": 53430 }, { "epoch": 0.20340582964761766, "grad_norm": 0.119823157787323, "learning_rate": 0.0005, "loss": 2.1258, "step": 53440 }, { "epoch": 0.20344389211574035, "grad_norm": 0.12746182084083557, "learning_rate": 0.0005, "loss": 2.133, "step": 53450 }, { "epoch": 0.20348195458386303, "grad_norm": 0.12012957781553268, "learning_rate": 0.0005, "loss": 2.1299, "step": 53460 }, { "epoch": 0.20352001705198572, "grad_norm": 0.12806500494480133, "learning_rate": 0.0005, "loss": 2.1273, "step": 53470 }, { "epoch": 0.2035580795201084, "grad_norm": 0.12169921398162842, "learning_rate": 0.0005, "loss": 2.1254, "step": 53480 }, { "epoch": 0.2035961419882311, "grad_norm": 0.5924624800682068, "learning_rate": 0.0005, "loss": 2.1444, "step": 53490 }, { "epoch": 0.20363420445635377, "grad_norm": 0.1756318360567093, "learning_rate": 0.0005, "loss": 2.1477, "step": 53500 }, { "epoch": 0.20367226692447646, "grad_norm": 0.14018510282039642, "learning_rate": 0.0005, "loss": 2.1425, "step": 53510 }, { "epoch": 0.20371032939259914, "grad_norm": 0.12534722685813904, "learning_rate": 0.0005, "loss": 2.1346, "step": 53520 }, { "epoch": 0.20374839186072183, "grad_norm": 0.1105349212884903, "learning_rate": 0.0005, "loss": 2.1251, "step": 53530 }, { "epoch": 0.2037864543288445, "grad_norm": 0.11550001800060272, "learning_rate": 0.0005, "loss": 2.1156, "step": 53540 }, { "epoch": 0.20382451679696717, "grad_norm": 0.12112603336572647, "learning_rate": 0.0005, "loss": 2.1179, "step": 53550 }, { "epoch": 0.20386257926508985, "grad_norm": 0.12003546953201294, "learning_rate": 0.0005, "loss": 2.1291, "step": 53560 }, { "epoch": 0.20390064173321254, "grad_norm": 0.13228186964988708, "learning_rate": 0.0005, "loss": 2.1254, "step": 53570 }, { "epoch": 0.20393870420133522, "grad_norm": 0.12017644941806793, "learning_rate": 0.0005, "loss": 2.1275, "step": 53580 }, { "epoch": 0.2039767666694579, "grad_norm": 0.12018509954214096, "learning_rate": 0.0005, "loss": 2.1234, "step": 53590 }, { "epoch": 0.2040148291375806, "grad_norm": 0.12411874532699585, "learning_rate": 0.0005, "loss": 2.1357, "step": 53600 }, { "epoch": 0.20405289160570328, "grad_norm": 0.1323518604040146, "learning_rate": 0.0005, "loss": 2.1477, "step": 53610 }, { "epoch": 0.20409095407382596, "grad_norm": 0.12746353447437286, "learning_rate": 0.0005, "loss": 2.1399, "step": 53620 }, { "epoch": 0.20412901654194865, "grad_norm": 0.11377382278442383, "learning_rate": 0.0005, "loss": 2.1376, "step": 53630 }, { "epoch": 0.20416707901007133, "grad_norm": 0.1344747096300125, "learning_rate": 0.0005, "loss": 2.1277, "step": 53640 }, { "epoch": 0.20420514147819402, "grad_norm": 0.12653642892837524, "learning_rate": 0.0005, "loss": 2.1357, "step": 53650 }, { "epoch": 0.2042432039463167, "grad_norm": 0.13320209085941315, "learning_rate": 0.0005, "loss": 2.1412, "step": 53660 }, { "epoch": 0.2042812664144394, "grad_norm": 0.12339047342538834, "learning_rate": 0.0005, "loss": 2.1335, "step": 53670 }, { "epoch": 0.20431932888256207, "grad_norm": 0.1247817873954773, "learning_rate": 0.0005, "loss": 2.1501, "step": 53680 }, { "epoch": 0.20435739135068473, "grad_norm": 0.11579610407352448, "learning_rate": 0.0005, "loss": 2.1519, "step": 53690 }, { "epoch": 0.20439545381880742, "grad_norm": 0.11140631884336472, "learning_rate": 0.0005, "loss": 2.1308, "step": 53700 }, { "epoch": 0.2044335162869301, "grad_norm": 0.11983554810285568, "learning_rate": 0.0005, "loss": 2.1353, "step": 53710 }, { "epoch": 0.20447157875505279, "grad_norm": 0.11999595910310745, "learning_rate": 0.0005, "loss": 2.1353, "step": 53720 }, { "epoch": 0.20450964122317547, "grad_norm": 0.11656316369771957, "learning_rate": 0.0005, "loss": 2.1404, "step": 53730 }, { "epoch": 0.20454770369129815, "grad_norm": 0.1222524419426918, "learning_rate": 0.0005, "loss": 2.1379, "step": 53740 }, { "epoch": 0.20458576615942084, "grad_norm": 0.1305672526359558, "learning_rate": 0.0005, "loss": 2.1388, "step": 53750 }, { "epoch": 0.20462382862754352, "grad_norm": 0.13974280655384064, "learning_rate": 0.0005, "loss": 2.1359, "step": 53760 }, { "epoch": 0.2046618910956662, "grad_norm": 0.11578554660081863, "learning_rate": 0.0005, "loss": 2.1114, "step": 53770 }, { "epoch": 0.2046999535637889, "grad_norm": 0.12131907045841217, "learning_rate": 0.0005, "loss": 2.127, "step": 53780 }, { "epoch": 0.20473801603191158, "grad_norm": 0.36936134099960327, "learning_rate": 0.0005, "loss": 2.1329, "step": 53790 }, { "epoch": 0.20477607850003426, "grad_norm": 0.11620636284351349, "learning_rate": 0.0005, "loss": 2.1319, "step": 53800 }, { "epoch": 0.20481414096815695, "grad_norm": 0.15168805420398712, "learning_rate": 0.0005, "loss": 2.1277, "step": 53810 }, { "epoch": 0.20485220343627963, "grad_norm": 0.1311609297990799, "learning_rate": 0.0005, "loss": 2.1341, "step": 53820 }, { "epoch": 0.20489026590440232, "grad_norm": 0.11689729988574982, "learning_rate": 0.0005, "loss": 2.1341, "step": 53830 }, { "epoch": 0.20492832837252498, "grad_norm": 0.12733341753482819, "learning_rate": 0.0005, "loss": 2.1518, "step": 53840 }, { "epoch": 0.20496639084064766, "grad_norm": 0.13116632401943207, "learning_rate": 0.0005, "loss": 2.1264, "step": 53850 }, { "epoch": 0.20500445330877035, "grad_norm": 0.13507050275802612, "learning_rate": 0.0005, "loss": 2.1254, "step": 53860 }, { "epoch": 0.20504251577689303, "grad_norm": 0.14825035631656647, "learning_rate": 0.0005, "loss": 2.1486, "step": 53870 }, { "epoch": 0.20508057824501572, "grad_norm": 0.1258191466331482, "learning_rate": 0.0005, "loss": 2.1242, "step": 53880 }, { "epoch": 0.2051186407131384, "grad_norm": 0.12025585025548935, "learning_rate": 0.0005, "loss": 2.1262, "step": 53890 }, { "epoch": 0.20515670318126109, "grad_norm": 0.11997051537036896, "learning_rate": 0.0005, "loss": 2.1335, "step": 53900 }, { "epoch": 0.20519476564938377, "grad_norm": 0.1282651275396347, "learning_rate": 0.0005, "loss": 2.1287, "step": 53910 }, { "epoch": 0.20523282811750646, "grad_norm": 0.11547555774450302, "learning_rate": 0.0005, "loss": 2.1247, "step": 53920 }, { "epoch": 0.20527089058562914, "grad_norm": 0.1203666478395462, "learning_rate": 0.0005, "loss": 2.1295, "step": 53930 }, { "epoch": 0.20530895305375182, "grad_norm": 0.12874288856983185, "learning_rate": 0.0005, "loss": 2.1196, "step": 53940 }, { "epoch": 0.2053470155218745, "grad_norm": 0.10989463329315186, "learning_rate": 0.0005, "loss": 2.1269, "step": 53950 }, { "epoch": 0.2053850779899972, "grad_norm": 0.11719923466444016, "learning_rate": 0.0005, "loss": 2.1328, "step": 53960 }, { "epoch": 0.20542314045811988, "grad_norm": 0.14622372388839722, "learning_rate": 0.0005, "loss": 2.1304, "step": 53970 }, { "epoch": 0.20546120292624254, "grad_norm": 0.12204207479953766, "learning_rate": 0.0005, "loss": 2.1293, "step": 53980 }, { "epoch": 0.20549926539436522, "grad_norm": 0.11967600882053375, "learning_rate": 0.0005, "loss": 2.1247, "step": 53990 }, { "epoch": 0.2055373278624879, "grad_norm": 0.13717426359653473, "learning_rate": 0.0005, "loss": 2.1311, "step": 54000 }, { "epoch": 0.2055753903306106, "grad_norm": 0.12481812387704849, "learning_rate": 0.0005, "loss": 2.13, "step": 54010 }, { "epoch": 0.20561345279873328, "grad_norm": 0.12184108048677444, "learning_rate": 0.0005, "loss": 2.1223, "step": 54020 }, { "epoch": 0.20565151526685596, "grad_norm": 0.1144619882106781, "learning_rate": 0.0005, "loss": 2.1322, "step": 54030 }, { "epoch": 0.20568957773497865, "grad_norm": 0.12227953970432281, "learning_rate": 0.0005, "loss": 2.1404, "step": 54040 }, { "epoch": 0.20572764020310133, "grad_norm": 0.12673652172088623, "learning_rate": 0.0005, "loss": 2.1079, "step": 54050 }, { "epoch": 0.20576570267122402, "grad_norm": 0.12035863101482391, "learning_rate": 0.0005, "loss": 2.138, "step": 54060 }, { "epoch": 0.2058037651393467, "grad_norm": 0.1242937445640564, "learning_rate": 0.0005, "loss": 2.1468, "step": 54070 }, { "epoch": 0.20584182760746939, "grad_norm": 0.1221139058470726, "learning_rate": 0.0005, "loss": 2.1362, "step": 54080 }, { "epoch": 0.20587989007559207, "grad_norm": 0.12572316825389862, "learning_rate": 0.0005, "loss": 2.1196, "step": 54090 }, { "epoch": 0.20591795254371476, "grad_norm": 0.10956557840108871, "learning_rate": 0.0005, "loss": 2.1414, "step": 54100 }, { "epoch": 0.20595601501183744, "grad_norm": 0.12266967445611954, "learning_rate": 0.0005, "loss": 2.1358, "step": 54110 }, { "epoch": 0.2059940774799601, "grad_norm": 0.12685400247573853, "learning_rate": 0.0005, "loss": 2.1494, "step": 54120 }, { "epoch": 0.20603213994808278, "grad_norm": 0.12227378040552139, "learning_rate": 0.0005, "loss": 2.1251, "step": 54130 }, { "epoch": 0.20607020241620547, "grad_norm": 0.11356621235609055, "learning_rate": 0.0005, "loss": 2.1365, "step": 54140 }, { "epoch": 0.20610826488432815, "grad_norm": 0.12191888689994812, "learning_rate": 0.0005, "loss": 2.1311, "step": 54150 }, { "epoch": 0.20614632735245084, "grad_norm": 0.14179301261901855, "learning_rate": 0.0005, "loss": 2.1271, "step": 54160 }, { "epoch": 0.20618438982057352, "grad_norm": 0.13185860216617584, "learning_rate": 0.0005, "loss": 2.1361, "step": 54170 }, { "epoch": 0.2062224522886962, "grad_norm": 0.12983562052249908, "learning_rate": 0.0005, "loss": 2.1345, "step": 54180 }, { "epoch": 0.2062605147568189, "grad_norm": 0.13673518598079681, "learning_rate": 0.0005, "loss": 2.1347, "step": 54190 }, { "epoch": 0.20629857722494158, "grad_norm": 0.13771027326583862, "learning_rate": 0.0005, "loss": 2.1257, "step": 54200 }, { "epoch": 0.20633663969306426, "grad_norm": 0.12526799738407135, "learning_rate": 0.0005, "loss": 2.1365, "step": 54210 }, { "epoch": 0.20637470216118695, "grad_norm": 0.11866194754838943, "learning_rate": 0.0005, "loss": 2.1321, "step": 54220 }, { "epoch": 0.20641276462930963, "grad_norm": 0.11927735805511475, "learning_rate": 0.0005, "loss": 2.1309, "step": 54230 }, { "epoch": 0.20645082709743232, "grad_norm": 0.11958098411560059, "learning_rate": 0.0005, "loss": 2.1246, "step": 54240 }, { "epoch": 0.206488889565555, "grad_norm": 0.11478394269943237, "learning_rate": 0.0005, "loss": 2.1323, "step": 54250 }, { "epoch": 0.20652695203367769, "grad_norm": 0.13040174543857574, "learning_rate": 0.0005, "loss": 2.1435, "step": 54260 }, { "epoch": 0.20656501450180034, "grad_norm": 0.13240981101989746, "learning_rate": 0.0005, "loss": 2.125, "step": 54270 }, { "epoch": 0.20660307696992303, "grad_norm": 0.11602520942687988, "learning_rate": 0.0005, "loss": 2.1252, "step": 54280 }, { "epoch": 0.2066411394380457, "grad_norm": 0.13566020131111145, "learning_rate": 0.0005, "loss": 2.1265, "step": 54290 }, { "epoch": 0.2066792019061684, "grad_norm": 0.1175849512219429, "learning_rate": 0.0005, "loss": 2.1247, "step": 54300 }, { "epoch": 0.20671726437429108, "grad_norm": 0.12190808355808258, "learning_rate": 0.0005, "loss": 2.1389, "step": 54310 }, { "epoch": 0.20675532684241377, "grad_norm": 0.11281128227710724, "learning_rate": 0.0005, "loss": 2.1386, "step": 54320 }, { "epoch": 0.20679338931053645, "grad_norm": 0.11461440473794937, "learning_rate": 0.0005, "loss": 2.1184, "step": 54330 }, { "epoch": 0.20683145177865914, "grad_norm": 0.121035136282444, "learning_rate": 0.0005, "loss": 2.1286, "step": 54340 }, { "epoch": 0.20686951424678182, "grad_norm": 0.12157423049211502, "learning_rate": 0.0005, "loss": 2.1393, "step": 54350 }, { "epoch": 0.2069075767149045, "grad_norm": 0.11936475336551666, "learning_rate": 0.0005, "loss": 2.1363, "step": 54360 }, { "epoch": 0.2069456391830272, "grad_norm": 0.1261560320854187, "learning_rate": 0.0005, "loss": 2.1168, "step": 54370 }, { "epoch": 0.20698370165114988, "grad_norm": 0.12038817256689072, "learning_rate": 0.0005, "loss": 2.1343, "step": 54380 }, { "epoch": 0.20702176411927256, "grad_norm": 0.12115965038537979, "learning_rate": 0.0005, "loss": 2.1452, "step": 54390 }, { "epoch": 0.20705982658739525, "grad_norm": 0.1181049793958664, "learning_rate": 0.0005, "loss": 2.1262, "step": 54400 }, { "epoch": 0.2070978890555179, "grad_norm": 0.11807470768690109, "learning_rate": 0.0005, "loss": 2.1409, "step": 54410 }, { "epoch": 0.2071359515236406, "grad_norm": 0.1191549152135849, "learning_rate": 0.0005, "loss": 2.1295, "step": 54420 }, { "epoch": 0.20717401399176327, "grad_norm": 0.11723997443914413, "learning_rate": 0.0005, "loss": 2.1336, "step": 54430 }, { "epoch": 0.20721207645988596, "grad_norm": 0.13964317739009857, "learning_rate": 0.0005, "loss": 2.1388, "step": 54440 }, { "epoch": 0.20725013892800864, "grad_norm": 0.12053508311510086, "learning_rate": 0.0005, "loss": 2.1345, "step": 54450 }, { "epoch": 0.20728820139613133, "grad_norm": 0.1290549635887146, "learning_rate": 0.0005, "loss": 2.145, "step": 54460 }, { "epoch": 0.207326263864254, "grad_norm": 0.11613258719444275, "learning_rate": 0.0005, "loss": 2.1324, "step": 54470 }, { "epoch": 0.2073643263323767, "grad_norm": 0.1176428496837616, "learning_rate": 0.0005, "loss": 2.1247, "step": 54480 }, { "epoch": 0.20740238880049938, "grad_norm": 0.12306077033281326, "learning_rate": 0.0005, "loss": 2.1226, "step": 54490 }, { "epoch": 0.20744045126862207, "grad_norm": 0.12678076326847076, "learning_rate": 0.0005, "loss": 2.1185, "step": 54500 }, { "epoch": 0.20747851373674475, "grad_norm": 0.13372944295406342, "learning_rate": 0.0005, "loss": 2.1275, "step": 54510 }, { "epoch": 0.20751657620486744, "grad_norm": 0.1182679682970047, "learning_rate": 0.0005, "loss": 2.1238, "step": 54520 }, { "epoch": 0.20755463867299012, "grad_norm": 0.11917141079902649, "learning_rate": 0.0005, "loss": 2.137, "step": 54530 }, { "epoch": 0.2075927011411128, "grad_norm": 0.12174999713897705, "learning_rate": 0.0005, "loss": 2.1427, "step": 54540 }, { "epoch": 0.20763076360923546, "grad_norm": 0.12785743176937103, "learning_rate": 0.0005, "loss": 2.1392, "step": 54550 }, { "epoch": 0.20766882607735815, "grad_norm": 0.13062579929828644, "learning_rate": 0.0005, "loss": 2.142, "step": 54560 }, { "epoch": 0.20770688854548083, "grad_norm": 0.1259687840938568, "learning_rate": 0.0005, "loss": 2.1442, "step": 54570 }, { "epoch": 0.20774495101360352, "grad_norm": 0.1330173909664154, "learning_rate": 0.0005, "loss": 2.1379, "step": 54580 }, { "epoch": 0.2077830134817262, "grad_norm": 0.12119072675704956, "learning_rate": 0.0005, "loss": 2.1318, "step": 54590 }, { "epoch": 0.2078210759498489, "grad_norm": 0.1250128597021103, "learning_rate": 0.0005, "loss": 2.1318, "step": 54600 }, { "epoch": 0.20785913841797157, "grad_norm": 0.12661468982696533, "learning_rate": 0.0005, "loss": 2.1338, "step": 54610 }, { "epoch": 0.20789720088609426, "grad_norm": 0.13943052291870117, "learning_rate": 0.0005, "loss": 2.1462, "step": 54620 }, { "epoch": 0.20793526335421694, "grad_norm": 0.1279287338256836, "learning_rate": 0.0005, "loss": 2.1299, "step": 54630 }, { "epoch": 0.20797332582233963, "grad_norm": 0.1283564567565918, "learning_rate": 0.0005, "loss": 2.1266, "step": 54640 }, { "epoch": 0.2080113882904623, "grad_norm": 0.12714558839797974, "learning_rate": 0.0005, "loss": 2.1364, "step": 54650 }, { "epoch": 0.208049450758585, "grad_norm": 0.12611687183380127, "learning_rate": 0.0005, "loss": 2.1301, "step": 54660 }, { "epoch": 0.20808751322670768, "grad_norm": 0.11924757808446884, "learning_rate": 0.0005, "loss": 2.1395, "step": 54670 }, { "epoch": 0.20812557569483037, "grad_norm": 0.11965058743953705, "learning_rate": 0.0005, "loss": 2.1342, "step": 54680 }, { "epoch": 0.20816363816295305, "grad_norm": 0.12703274190425873, "learning_rate": 0.0005, "loss": 2.1228, "step": 54690 }, { "epoch": 0.2082017006310757, "grad_norm": 0.10818231105804443, "learning_rate": 0.0005, "loss": 2.1182, "step": 54700 }, { "epoch": 0.2082397630991984, "grad_norm": 0.12397830188274384, "learning_rate": 0.0005, "loss": 2.1323, "step": 54710 }, { "epoch": 0.20827782556732108, "grad_norm": 0.11835771799087524, "learning_rate": 0.0005, "loss": 2.1308, "step": 54720 }, { "epoch": 0.20831588803544376, "grad_norm": 0.12970468401908875, "learning_rate": 0.0005, "loss": 2.1217, "step": 54730 }, { "epoch": 0.20835395050356645, "grad_norm": 0.1369071751832962, "learning_rate": 0.0005, "loss": 2.1162, "step": 54740 }, { "epoch": 0.20839201297168913, "grad_norm": 0.13071531057357788, "learning_rate": 0.0005, "loss": 2.1323, "step": 54750 }, { "epoch": 0.20843007543981182, "grad_norm": 0.12456535547971725, "learning_rate": 0.0005, "loss": 2.1221, "step": 54760 }, { "epoch": 0.2084681379079345, "grad_norm": 0.12861059606075287, "learning_rate": 0.0005, "loss": 2.1204, "step": 54770 }, { "epoch": 0.2085062003760572, "grad_norm": 0.12247523665428162, "learning_rate": 0.0005, "loss": 2.1233, "step": 54780 }, { "epoch": 0.20854426284417987, "grad_norm": 0.12303100526332855, "learning_rate": 0.0005, "loss": 2.1372, "step": 54790 }, { "epoch": 0.20858232531230256, "grad_norm": 0.13596375286579132, "learning_rate": 0.0005, "loss": 2.1302, "step": 54800 }, { "epoch": 0.20862038778042524, "grad_norm": 0.12867873907089233, "learning_rate": 0.0005, "loss": 2.1433, "step": 54810 }, { "epoch": 0.20865845024854793, "grad_norm": 0.12008437514305115, "learning_rate": 0.0005, "loss": 2.1352, "step": 54820 }, { "epoch": 0.2086965127166706, "grad_norm": 0.12185460329055786, "learning_rate": 0.0005, "loss": 2.1394, "step": 54830 }, { "epoch": 0.20873457518479327, "grad_norm": 0.12056107074022293, "learning_rate": 0.0005, "loss": 2.1454, "step": 54840 }, { "epoch": 0.20877263765291595, "grad_norm": 0.11947619169950485, "learning_rate": 0.0005, "loss": 2.1325, "step": 54850 }, { "epoch": 0.20881070012103864, "grad_norm": 0.14452539384365082, "learning_rate": 0.0005, "loss": 2.1376, "step": 54860 }, { "epoch": 0.20884876258916132, "grad_norm": 0.12263192236423492, "learning_rate": 0.0005, "loss": 2.1244, "step": 54870 }, { "epoch": 0.208886825057284, "grad_norm": 0.11858075857162476, "learning_rate": 0.0005, "loss": 2.1363, "step": 54880 }, { "epoch": 0.2089248875254067, "grad_norm": 0.12804926931858063, "learning_rate": 0.0005, "loss": 2.1296, "step": 54890 }, { "epoch": 0.20896294999352938, "grad_norm": 0.14871400594711304, "learning_rate": 0.0005, "loss": 2.1307, "step": 54900 }, { "epoch": 0.20900101246165206, "grad_norm": 0.12267819792032242, "learning_rate": 0.0005, "loss": 2.1301, "step": 54910 }, { "epoch": 0.20903907492977475, "grad_norm": 0.12310918420553207, "learning_rate": 0.0005, "loss": 2.1388, "step": 54920 }, { "epoch": 0.20907713739789743, "grad_norm": 0.12073167413473129, "learning_rate": 0.0005, "loss": 2.1404, "step": 54930 }, { "epoch": 0.20911519986602012, "grad_norm": 0.11783216148614883, "learning_rate": 0.0005, "loss": 2.121, "step": 54940 }, { "epoch": 0.2091532623341428, "grad_norm": 0.12339416891336441, "learning_rate": 0.0005, "loss": 2.1349, "step": 54950 }, { "epoch": 0.2091913248022655, "grad_norm": 0.14555498957633972, "learning_rate": 0.0005, "loss": 2.1269, "step": 54960 }, { "epoch": 0.20922938727038817, "grad_norm": 0.1503671109676361, "learning_rate": 0.0005, "loss": 2.129, "step": 54970 }, { "epoch": 0.20926744973851086, "grad_norm": 0.12491462379693985, "learning_rate": 0.0005, "loss": 2.148, "step": 54980 }, { "epoch": 0.20930551220663351, "grad_norm": 0.117496557533741, "learning_rate": 0.0005, "loss": 2.1292, "step": 54990 }, { "epoch": 0.2093435746747562, "grad_norm": 0.1343589425086975, "learning_rate": 0.0005, "loss": 2.1333, "step": 55000 }, { "epoch": 0.20938163714287888, "grad_norm": 0.12166984379291534, "learning_rate": 0.0005, "loss": 2.1285, "step": 55010 }, { "epoch": 0.20941969961100157, "grad_norm": 0.1231997087597847, "learning_rate": 0.0005, "loss": 2.138, "step": 55020 }, { "epoch": 0.20945776207912425, "grad_norm": 0.11930809915065765, "learning_rate": 0.0005, "loss": 2.1458, "step": 55030 }, { "epoch": 0.20949582454724694, "grad_norm": 0.13089969754219055, "learning_rate": 0.0005, "loss": 2.1331, "step": 55040 }, { "epoch": 0.20953388701536962, "grad_norm": 0.12502965331077576, "learning_rate": 0.0005, "loss": 2.122, "step": 55050 }, { "epoch": 0.2095719494834923, "grad_norm": 0.12578997015953064, "learning_rate": 0.0005, "loss": 2.1239, "step": 55060 }, { "epoch": 0.209610011951615, "grad_norm": 0.1373293399810791, "learning_rate": 0.0005, "loss": 2.1513, "step": 55070 }, { "epoch": 0.20964807441973768, "grad_norm": 0.11725837737321854, "learning_rate": 0.0005, "loss": 2.1403, "step": 55080 }, { "epoch": 0.20968613688786036, "grad_norm": 0.12207408994436264, "learning_rate": 0.0005, "loss": 2.1257, "step": 55090 }, { "epoch": 0.20972419935598305, "grad_norm": 0.13774704933166504, "learning_rate": 0.0005, "loss": 2.1229, "step": 55100 }, { "epoch": 0.20976226182410573, "grad_norm": 0.13758708536624908, "learning_rate": 0.0005, "loss": 2.1293, "step": 55110 }, { "epoch": 0.20980032429222842, "grad_norm": 0.1205608993768692, "learning_rate": 0.0005, "loss": 2.1346, "step": 55120 }, { "epoch": 0.20983838676035108, "grad_norm": 0.1296151876449585, "learning_rate": 0.0005, "loss": 2.1128, "step": 55130 }, { "epoch": 0.20987644922847376, "grad_norm": 0.1482541412115097, "learning_rate": 0.0005, "loss": 2.1304, "step": 55140 }, { "epoch": 0.20991451169659645, "grad_norm": 0.1319926232099533, "learning_rate": 0.0005, "loss": 2.1397, "step": 55150 }, { "epoch": 0.20995257416471913, "grad_norm": 0.12207752466201782, "learning_rate": 0.0005, "loss": 2.1404, "step": 55160 }, { "epoch": 0.20999063663284182, "grad_norm": 0.13193126022815704, "learning_rate": 0.0005, "loss": 2.1246, "step": 55170 }, { "epoch": 0.2100286991009645, "grad_norm": 0.11937710642814636, "learning_rate": 0.0005, "loss": 2.1251, "step": 55180 }, { "epoch": 0.21006676156908718, "grad_norm": 0.12013963609933853, "learning_rate": 0.0005, "loss": 2.1285, "step": 55190 }, { "epoch": 0.21010482403720987, "grad_norm": 0.12295087426900864, "learning_rate": 0.0005, "loss": 2.1359, "step": 55200 }, { "epoch": 0.21014288650533255, "grad_norm": 0.1291741281747818, "learning_rate": 0.0005, "loss": 2.1131, "step": 55210 }, { "epoch": 0.21018094897345524, "grad_norm": 0.14405788481235504, "learning_rate": 0.0005, "loss": 2.1376, "step": 55220 }, { "epoch": 0.21021901144157792, "grad_norm": 0.12718753516674042, "learning_rate": 0.0005, "loss": 2.117, "step": 55230 }, { "epoch": 0.2102570739097006, "grad_norm": 0.12040422111749649, "learning_rate": 0.0005, "loss": 2.1362, "step": 55240 }, { "epoch": 0.2102951363778233, "grad_norm": 0.11796865612268448, "learning_rate": 0.0005, "loss": 2.1173, "step": 55250 }, { "epoch": 0.21033319884594598, "grad_norm": 0.11834219098091125, "learning_rate": 0.0005, "loss": 2.1266, "step": 55260 }, { "epoch": 0.21037126131406864, "grad_norm": 0.12916326522827148, "learning_rate": 0.0005, "loss": 2.1309, "step": 55270 }, { "epoch": 0.21040932378219132, "grad_norm": 0.12509098649024963, "learning_rate": 0.0005, "loss": 2.1292, "step": 55280 }, { "epoch": 0.210447386250314, "grad_norm": 0.14400167763233185, "learning_rate": 0.0005, "loss": 2.1096, "step": 55290 }, { "epoch": 0.2104854487184367, "grad_norm": 0.11268658936023712, "learning_rate": 0.0005, "loss": 2.1228, "step": 55300 }, { "epoch": 0.21052351118655938, "grad_norm": 0.1184752807021141, "learning_rate": 0.0005, "loss": 2.1405, "step": 55310 }, { "epoch": 0.21056157365468206, "grad_norm": 0.12232375144958496, "learning_rate": 0.0005, "loss": 2.1203, "step": 55320 }, { "epoch": 0.21059963612280475, "grad_norm": 0.1253381222486496, "learning_rate": 0.0005, "loss": 2.1246, "step": 55330 }, { "epoch": 0.21063769859092743, "grad_norm": 0.1177864670753479, "learning_rate": 0.0005, "loss": 2.1212, "step": 55340 }, { "epoch": 0.21067576105905012, "grad_norm": 0.12369973212480545, "learning_rate": 0.0005, "loss": 2.1305, "step": 55350 }, { "epoch": 0.2107138235271728, "grad_norm": 0.12825240194797516, "learning_rate": 0.0005, "loss": 2.1353, "step": 55360 }, { "epoch": 0.21075188599529548, "grad_norm": 0.12207330763339996, "learning_rate": 0.0005, "loss": 2.114, "step": 55370 }, { "epoch": 0.21078994846341817, "grad_norm": 0.1274675577878952, "learning_rate": 0.0005, "loss": 2.1447, "step": 55380 }, { "epoch": 0.21082801093154085, "grad_norm": 0.14211657643318176, "learning_rate": 0.0005, "loss": 2.1188, "step": 55390 }, { "epoch": 0.21086607339966354, "grad_norm": 0.14069093763828278, "learning_rate": 0.0005, "loss": 2.132, "step": 55400 }, { "epoch": 0.21090413586778622, "grad_norm": 0.12487372756004333, "learning_rate": 0.0005, "loss": 2.1329, "step": 55410 }, { "epoch": 0.21094219833590888, "grad_norm": 0.12757901847362518, "learning_rate": 0.0005, "loss": 2.1341, "step": 55420 }, { "epoch": 0.21098026080403157, "grad_norm": 0.14222578704357147, "learning_rate": 0.0005, "loss": 2.1362, "step": 55430 }, { "epoch": 0.21101832327215425, "grad_norm": 0.12456969171762466, "learning_rate": 0.0005, "loss": 2.1261, "step": 55440 }, { "epoch": 0.21105638574027694, "grad_norm": 0.1276063323020935, "learning_rate": 0.0005, "loss": 2.1235, "step": 55450 }, { "epoch": 0.21109444820839962, "grad_norm": 0.14173118770122528, "learning_rate": 0.0005, "loss": 2.1182, "step": 55460 }, { "epoch": 0.2111325106765223, "grad_norm": 0.13104242086410522, "learning_rate": 0.0005, "loss": 2.1176, "step": 55470 }, { "epoch": 0.211170573144645, "grad_norm": 0.31636613607406616, "learning_rate": 0.0005, "loss": 2.1288, "step": 55480 }, { "epoch": 0.21120863561276768, "grad_norm": 0.11914330720901489, "learning_rate": 0.0005, "loss": 2.1302, "step": 55490 }, { "epoch": 0.21124669808089036, "grad_norm": 0.12945348024368286, "learning_rate": 0.0005, "loss": 2.1201, "step": 55500 }, { "epoch": 0.21128476054901305, "grad_norm": 0.13458223640918732, "learning_rate": 0.0005, "loss": 2.1469, "step": 55510 }, { "epoch": 0.21132282301713573, "grad_norm": 0.11938342452049255, "learning_rate": 0.0005, "loss": 2.1371, "step": 55520 }, { "epoch": 0.21136088548525842, "grad_norm": 0.12643754482269287, "learning_rate": 0.0005, "loss": 2.1282, "step": 55530 }, { "epoch": 0.2113989479533811, "grad_norm": 0.11904884874820709, "learning_rate": 0.0005, "loss": 2.1452, "step": 55540 }, { "epoch": 0.21143701042150378, "grad_norm": 0.12168022245168686, "learning_rate": 0.0005, "loss": 2.1273, "step": 55550 }, { "epoch": 0.21147507288962644, "grad_norm": 0.11386290192604065, "learning_rate": 0.0005, "loss": 2.1257, "step": 55560 }, { "epoch": 0.21151313535774913, "grad_norm": 0.12103776633739471, "learning_rate": 0.0005, "loss": 2.1098, "step": 55570 }, { "epoch": 0.2115511978258718, "grad_norm": 0.12591663002967834, "learning_rate": 0.0005, "loss": 2.1239, "step": 55580 }, { "epoch": 0.2115892602939945, "grad_norm": 0.17933526635169983, "learning_rate": 0.0005, "loss": 2.1389, "step": 55590 }, { "epoch": 0.21162732276211718, "grad_norm": 0.11258309334516525, "learning_rate": 0.0005, "loss": 2.1297, "step": 55600 }, { "epoch": 0.21166538523023987, "grad_norm": 0.12344492226839066, "learning_rate": 0.0005, "loss": 2.139, "step": 55610 }, { "epoch": 0.21170344769836255, "grad_norm": 0.13639947772026062, "learning_rate": 0.0005, "loss": 2.128, "step": 55620 }, { "epoch": 0.21174151016648524, "grad_norm": 0.12072647362947464, "learning_rate": 0.0005, "loss": 2.1342, "step": 55630 }, { "epoch": 0.21177957263460792, "grad_norm": 0.12417701631784439, "learning_rate": 0.0005, "loss": 2.1212, "step": 55640 }, { "epoch": 0.2118176351027306, "grad_norm": 0.12251461297273636, "learning_rate": 0.0005, "loss": 2.1297, "step": 55650 }, { "epoch": 0.2118556975708533, "grad_norm": 0.13269966840744019, "learning_rate": 0.0005, "loss": 2.1364, "step": 55660 }, { "epoch": 0.21189376003897598, "grad_norm": 0.12810970842838287, "learning_rate": 0.0005, "loss": 2.1414, "step": 55670 }, { "epoch": 0.21193182250709866, "grad_norm": 0.12790927290916443, "learning_rate": 0.0005, "loss": 2.1127, "step": 55680 }, { "epoch": 0.21196988497522135, "grad_norm": 0.11479417979717255, "learning_rate": 0.0005, "loss": 2.1267, "step": 55690 }, { "epoch": 0.212007947443344, "grad_norm": 0.1199273020029068, "learning_rate": 0.0005, "loss": 2.1304, "step": 55700 }, { "epoch": 0.2120460099114667, "grad_norm": 0.12614107131958008, "learning_rate": 0.0005, "loss": 2.1378, "step": 55710 }, { "epoch": 0.21208407237958937, "grad_norm": 0.13167840242385864, "learning_rate": 0.0005, "loss": 2.1398, "step": 55720 }, { "epoch": 0.21212213484771206, "grad_norm": 0.13410525023937225, "learning_rate": 0.0005, "loss": 2.1376, "step": 55730 }, { "epoch": 0.21216019731583474, "grad_norm": 0.11915973573923111, "learning_rate": 0.0005, "loss": 2.1359, "step": 55740 }, { "epoch": 0.21219825978395743, "grad_norm": 0.11454334855079651, "learning_rate": 0.0005, "loss": 2.1438, "step": 55750 }, { "epoch": 0.2122363222520801, "grad_norm": 0.11463811993598938, "learning_rate": 0.0005, "loss": 2.1367, "step": 55760 }, { "epoch": 0.2122743847202028, "grad_norm": 0.12144269049167633, "learning_rate": 0.0005, "loss": 2.1434, "step": 55770 }, { "epoch": 0.21231244718832548, "grad_norm": 0.1233169436454773, "learning_rate": 0.0005, "loss": 2.143, "step": 55780 }, { "epoch": 0.21235050965644817, "grad_norm": 0.12609365582466125, "learning_rate": 0.0005, "loss": 2.1371, "step": 55790 }, { "epoch": 0.21238857212457085, "grad_norm": 0.12684963643550873, "learning_rate": 0.0005, "loss": 2.1135, "step": 55800 }, { "epoch": 0.21242663459269354, "grad_norm": 0.11530327051877975, "learning_rate": 0.0005, "loss": 2.1194, "step": 55810 }, { "epoch": 0.21246469706081622, "grad_norm": 0.1189451664686203, "learning_rate": 0.0005, "loss": 2.1395, "step": 55820 }, { "epoch": 0.2125027595289389, "grad_norm": 0.12495563924312592, "learning_rate": 0.0005, "loss": 2.1307, "step": 55830 }, { "epoch": 0.2125408219970616, "grad_norm": 0.1268143653869629, "learning_rate": 0.0005, "loss": 2.1425, "step": 55840 }, { "epoch": 0.21257888446518425, "grad_norm": 0.12778593599796295, "learning_rate": 0.0005, "loss": 2.1201, "step": 55850 }, { "epoch": 0.21261694693330693, "grad_norm": 0.12761004269123077, "learning_rate": 0.0005, "loss": 2.1505, "step": 55860 }, { "epoch": 0.21265500940142962, "grad_norm": 0.11692184954881668, "learning_rate": 0.0005, "loss": 2.129, "step": 55870 }, { "epoch": 0.2126930718695523, "grad_norm": 0.13846901059150696, "learning_rate": 0.0005, "loss": 2.1159, "step": 55880 }, { "epoch": 0.212731134337675, "grad_norm": 0.11994395405054092, "learning_rate": 0.0005, "loss": 2.1297, "step": 55890 }, { "epoch": 0.21276919680579767, "grad_norm": 0.11813051998615265, "learning_rate": 0.0005, "loss": 2.1181, "step": 55900 }, { "epoch": 0.21280725927392036, "grad_norm": 0.11680085957050323, "learning_rate": 0.0005, "loss": 2.1302, "step": 55910 }, { "epoch": 0.21284532174204304, "grad_norm": 0.1264512985944748, "learning_rate": 0.0005, "loss": 2.1313, "step": 55920 }, { "epoch": 0.21288338421016573, "grad_norm": 0.12074155360460281, "learning_rate": 0.0005, "loss": 2.1334, "step": 55930 }, { "epoch": 0.2129214466782884, "grad_norm": 0.11390884965658188, "learning_rate": 0.0005, "loss": 2.1441, "step": 55940 }, { "epoch": 0.2129595091464111, "grad_norm": 0.10812906920909882, "learning_rate": 0.0005, "loss": 2.1203, "step": 55950 }, { "epoch": 0.21299757161453378, "grad_norm": 0.11598875373601913, "learning_rate": 0.0005, "loss": 2.1331, "step": 55960 }, { "epoch": 0.21303563408265647, "grad_norm": 0.14625869691371918, "learning_rate": 0.0005, "loss": 2.1328, "step": 55970 }, { "epoch": 0.21307369655077915, "grad_norm": 0.13654451072216034, "learning_rate": 0.0005, "loss": 2.1447, "step": 55980 }, { "epoch": 0.2131117590189018, "grad_norm": 0.12408126890659332, "learning_rate": 0.0005, "loss": 2.145, "step": 55990 }, { "epoch": 0.2131498214870245, "grad_norm": 0.12511536478996277, "learning_rate": 0.0005, "loss": 2.1173, "step": 56000 }, { "epoch": 0.21318788395514718, "grad_norm": 0.1340360790491104, "learning_rate": 0.0005, "loss": 2.1498, "step": 56010 }, { "epoch": 0.21322594642326986, "grad_norm": 0.13497580587863922, "learning_rate": 0.0005, "loss": 2.1304, "step": 56020 }, { "epoch": 0.21326400889139255, "grad_norm": 0.12230490148067474, "learning_rate": 0.0005, "loss": 2.1138, "step": 56030 }, { "epoch": 0.21330207135951523, "grad_norm": 0.11444181203842163, "learning_rate": 0.0005, "loss": 2.1269, "step": 56040 }, { "epoch": 0.21334013382763792, "grad_norm": 0.12098285555839539, "learning_rate": 0.0005, "loss": 2.1266, "step": 56050 }, { "epoch": 0.2133781962957606, "grad_norm": 0.13039463758468628, "learning_rate": 0.0005, "loss": 2.142, "step": 56060 }, { "epoch": 0.2134162587638833, "grad_norm": 0.12801070511341095, "learning_rate": 0.0005, "loss": 2.1291, "step": 56070 }, { "epoch": 0.21345432123200597, "grad_norm": 0.12597905099391937, "learning_rate": 0.0005, "loss": 2.124, "step": 56080 }, { "epoch": 0.21349238370012866, "grad_norm": 0.1314854919910431, "learning_rate": 0.0005, "loss": 2.1433, "step": 56090 }, { "epoch": 0.21353044616825134, "grad_norm": 0.11816215515136719, "learning_rate": 0.0005, "loss": 2.1349, "step": 56100 }, { "epoch": 0.21356850863637403, "grad_norm": 0.1357698142528534, "learning_rate": 0.0005, "loss": 2.1182, "step": 56110 }, { "epoch": 0.2136065711044967, "grad_norm": 0.13428688049316406, "learning_rate": 0.0005, "loss": 2.1394, "step": 56120 }, { "epoch": 0.2136446335726194, "grad_norm": 0.1258360743522644, "learning_rate": 0.0005, "loss": 2.1288, "step": 56130 }, { "epoch": 0.21368269604074205, "grad_norm": 0.12718361616134644, "learning_rate": 0.0005, "loss": 2.1336, "step": 56140 }, { "epoch": 0.21372075850886474, "grad_norm": 0.1284942328929901, "learning_rate": 0.0005, "loss": 2.1336, "step": 56150 }, { "epoch": 0.21375882097698742, "grad_norm": 0.12394726276397705, "learning_rate": 0.0005, "loss": 2.1317, "step": 56160 }, { "epoch": 0.2137968834451101, "grad_norm": 0.10978233814239502, "learning_rate": 0.0005, "loss": 2.1207, "step": 56170 }, { "epoch": 0.2138349459132328, "grad_norm": 0.1203223243355751, "learning_rate": 0.0005, "loss": 2.1431, "step": 56180 }, { "epoch": 0.21387300838135548, "grad_norm": 0.12922045588493347, "learning_rate": 0.0005, "loss": 2.1373, "step": 56190 }, { "epoch": 0.21391107084947816, "grad_norm": 0.12522399425506592, "learning_rate": 0.0005, "loss": 2.1338, "step": 56200 }, { "epoch": 0.21394913331760085, "grad_norm": 0.12170863896608353, "learning_rate": 0.0005, "loss": 2.1265, "step": 56210 }, { "epoch": 0.21398719578572353, "grad_norm": 0.1256794035434723, "learning_rate": 0.0005, "loss": 2.1367, "step": 56220 }, { "epoch": 0.21402525825384622, "grad_norm": 0.13030460476875305, "learning_rate": 0.0005, "loss": 2.1272, "step": 56230 }, { "epoch": 0.2140633207219689, "grad_norm": 0.11182166635990143, "learning_rate": 0.0005, "loss": 2.1392, "step": 56240 }, { "epoch": 0.2141013831900916, "grad_norm": 0.12376523017883301, "learning_rate": 0.0005, "loss": 2.1342, "step": 56250 }, { "epoch": 0.21413944565821427, "grad_norm": 0.1058717891573906, "learning_rate": 0.0005, "loss": 2.1368, "step": 56260 }, { "epoch": 0.21417750812633696, "grad_norm": 0.11659777164459229, "learning_rate": 0.0005, "loss": 2.1354, "step": 56270 }, { "epoch": 0.21421557059445961, "grad_norm": 0.12966254353523254, "learning_rate": 0.0005, "loss": 2.1243, "step": 56280 }, { "epoch": 0.2142536330625823, "grad_norm": 0.12180996686220169, "learning_rate": 0.0005, "loss": 2.1324, "step": 56290 }, { "epoch": 0.21429169553070498, "grad_norm": 0.1273222416639328, "learning_rate": 0.0005, "loss": 2.1176, "step": 56300 }, { "epoch": 0.21432975799882767, "grad_norm": 0.12970444560050964, "learning_rate": 0.0005, "loss": 2.1282, "step": 56310 }, { "epoch": 0.21436782046695035, "grad_norm": 0.11615218967199326, "learning_rate": 0.0005, "loss": 2.123, "step": 56320 }, { "epoch": 0.21440588293507304, "grad_norm": 0.13962231576442719, "learning_rate": 0.0005, "loss": 2.1351, "step": 56330 }, { "epoch": 0.21444394540319572, "grad_norm": 0.13303855061531067, "learning_rate": 0.0005, "loss": 2.1289, "step": 56340 }, { "epoch": 0.2144820078713184, "grad_norm": 0.12693250179290771, "learning_rate": 0.0005, "loss": 2.1295, "step": 56350 }, { "epoch": 0.2145200703394411, "grad_norm": 0.11924530565738678, "learning_rate": 0.0005, "loss": 2.1403, "step": 56360 }, { "epoch": 0.21455813280756378, "grad_norm": 0.12394033372402191, "learning_rate": 0.0005, "loss": 2.1369, "step": 56370 }, { "epoch": 0.21459619527568646, "grad_norm": 0.12687383592128754, "learning_rate": 0.0005, "loss": 2.1363, "step": 56380 }, { "epoch": 0.21463425774380915, "grad_norm": 0.1161162480711937, "learning_rate": 0.0005, "loss": 2.1356, "step": 56390 }, { "epoch": 0.21467232021193183, "grad_norm": 0.12959563732147217, "learning_rate": 0.0005, "loss": 2.1299, "step": 56400 }, { "epoch": 0.21471038268005452, "grad_norm": 0.1229131892323494, "learning_rate": 0.0005, "loss": 2.1319, "step": 56410 }, { "epoch": 0.21474844514817718, "grad_norm": 0.12190618366003036, "learning_rate": 0.0005, "loss": 2.1139, "step": 56420 }, { "epoch": 0.21478650761629986, "grad_norm": 0.11919743567705154, "learning_rate": 0.0005, "loss": 2.13, "step": 56430 }, { "epoch": 0.21482457008442254, "grad_norm": 0.12470883876085281, "learning_rate": 0.0005, "loss": 2.1408, "step": 56440 }, { "epoch": 0.21486263255254523, "grad_norm": 0.12248383462429047, "learning_rate": 0.0005, "loss": 2.138, "step": 56450 }, { "epoch": 0.21490069502066791, "grad_norm": 0.12124987691640854, "learning_rate": 0.0005, "loss": 2.1407, "step": 56460 }, { "epoch": 0.2149387574887906, "grad_norm": 0.13539837300777435, "learning_rate": 0.0005, "loss": 2.1318, "step": 56470 }, { "epoch": 0.21497681995691328, "grad_norm": 0.12916968762874603, "learning_rate": 0.0005, "loss": 2.1375, "step": 56480 }, { "epoch": 0.21501488242503597, "grad_norm": 0.11934902518987656, "learning_rate": 0.0005, "loss": 2.1204, "step": 56490 }, { "epoch": 0.21505294489315865, "grad_norm": 0.1366918981075287, "learning_rate": 0.0005, "loss": 2.1333, "step": 56500 }, { "epoch": 0.21509100736128134, "grad_norm": 0.11489017307758331, "learning_rate": 0.0005, "loss": 2.1223, "step": 56510 }, { "epoch": 0.21512906982940402, "grad_norm": 0.1359739601612091, "learning_rate": 0.0005, "loss": 2.1234, "step": 56520 }, { "epoch": 0.2151671322975267, "grad_norm": 0.11837109923362732, "learning_rate": 0.0005, "loss": 2.1305, "step": 56530 }, { "epoch": 0.2152051947656494, "grad_norm": 0.11647136509418488, "learning_rate": 0.0005, "loss": 2.1193, "step": 56540 }, { "epoch": 0.21524325723377208, "grad_norm": 0.13424059748649597, "learning_rate": 0.0005, "loss": 2.1292, "step": 56550 }, { "epoch": 0.21528131970189476, "grad_norm": 0.12664872407913208, "learning_rate": 0.0005, "loss": 2.1136, "step": 56560 }, { "epoch": 0.21531938217001742, "grad_norm": 0.12171950936317444, "learning_rate": 0.0005, "loss": 2.1375, "step": 56570 }, { "epoch": 0.2153574446381401, "grad_norm": 0.12074828892946243, "learning_rate": 0.0005, "loss": 2.1271, "step": 56580 }, { "epoch": 0.2153955071062628, "grad_norm": 0.1294257789850235, "learning_rate": 0.0005, "loss": 2.1247, "step": 56590 }, { "epoch": 0.21543356957438548, "grad_norm": 0.11204027384519577, "learning_rate": 0.0005, "loss": 2.1196, "step": 56600 }, { "epoch": 0.21547163204250816, "grad_norm": 0.12621349096298218, "learning_rate": 0.0005, "loss": 2.1311, "step": 56610 }, { "epoch": 0.21550969451063084, "grad_norm": 0.13478349149227142, "learning_rate": 0.0005, "loss": 2.1275, "step": 56620 }, { "epoch": 0.21554775697875353, "grad_norm": 0.11155591160058975, "learning_rate": 0.0005, "loss": 2.1175, "step": 56630 }, { "epoch": 0.21558581944687621, "grad_norm": 0.12927065789699554, "learning_rate": 0.0005, "loss": 2.1373, "step": 56640 }, { "epoch": 0.2156238819149989, "grad_norm": 0.11920293420553207, "learning_rate": 0.0005, "loss": 2.1124, "step": 56650 }, { "epoch": 0.21566194438312158, "grad_norm": 0.12005160003900528, "learning_rate": 0.0005, "loss": 2.1294, "step": 56660 }, { "epoch": 0.21570000685124427, "grad_norm": 0.1394878476858139, "learning_rate": 0.0005, "loss": 2.129, "step": 56670 }, { "epoch": 0.21573806931936695, "grad_norm": 0.12196838855743408, "learning_rate": 0.0005, "loss": 2.127, "step": 56680 }, { "epoch": 0.21577613178748964, "grad_norm": 0.11932896822690964, "learning_rate": 0.0005, "loss": 2.1297, "step": 56690 }, { "epoch": 0.21581419425561232, "grad_norm": 0.12393977493047714, "learning_rate": 0.0005, "loss": 2.1286, "step": 56700 }, { "epoch": 0.21585225672373498, "grad_norm": 0.13766314089298248, "learning_rate": 0.0005, "loss": 2.1223, "step": 56710 }, { "epoch": 0.21589031919185767, "grad_norm": 0.13553282618522644, "learning_rate": 0.0005, "loss": 2.1437, "step": 56720 }, { "epoch": 0.21592838165998035, "grad_norm": 0.18950755894184113, "learning_rate": 0.0005, "loss": 2.1284, "step": 56730 }, { "epoch": 0.21596644412810304, "grad_norm": 0.12270800769329071, "learning_rate": 0.0005, "loss": 2.1231, "step": 56740 }, { "epoch": 0.21600450659622572, "grad_norm": 0.11453820019960403, "learning_rate": 0.0005, "loss": 2.1551, "step": 56750 }, { "epoch": 0.2160425690643484, "grad_norm": 0.13194343447685242, "learning_rate": 0.0005, "loss": 2.124, "step": 56760 }, { "epoch": 0.2160806315324711, "grad_norm": 0.1288219690322876, "learning_rate": 0.0005, "loss": 2.1435, "step": 56770 }, { "epoch": 0.21611869400059378, "grad_norm": 0.12786085903644562, "learning_rate": 0.0005, "loss": 2.1344, "step": 56780 }, { "epoch": 0.21615675646871646, "grad_norm": 0.1349632740020752, "learning_rate": 0.0005, "loss": 2.1192, "step": 56790 }, { "epoch": 0.21619481893683914, "grad_norm": 0.12566514313220978, "learning_rate": 0.0005, "loss": 2.1361, "step": 56800 }, { "epoch": 0.21623288140496183, "grad_norm": 0.11484618484973907, "learning_rate": 0.0005, "loss": 2.1302, "step": 56810 }, { "epoch": 0.21627094387308451, "grad_norm": 0.12926548719406128, "learning_rate": 0.0005, "loss": 2.1232, "step": 56820 }, { "epoch": 0.2163090063412072, "grad_norm": 0.143005833029747, "learning_rate": 0.0005, "loss": 2.1409, "step": 56830 }, { "epoch": 0.21634706880932988, "grad_norm": 0.11763927340507507, "learning_rate": 0.0005, "loss": 2.1252, "step": 56840 }, { "epoch": 0.21638513127745254, "grad_norm": 0.14074474573135376, "learning_rate": 0.0005, "loss": 2.1358, "step": 56850 }, { "epoch": 0.21642319374557523, "grad_norm": 0.13238893449306488, "learning_rate": 0.0005, "loss": 2.13, "step": 56860 }, { "epoch": 0.2164612562136979, "grad_norm": 0.11847522109746933, "learning_rate": 0.0005, "loss": 2.1235, "step": 56870 }, { "epoch": 0.2164993186818206, "grad_norm": 0.13169977068901062, "learning_rate": 0.0005, "loss": 2.1505, "step": 56880 }, { "epoch": 0.21653738114994328, "grad_norm": 0.12500376999378204, "learning_rate": 0.0005, "loss": 2.128, "step": 56890 }, { "epoch": 0.21657544361806597, "grad_norm": 0.1372148096561432, "learning_rate": 0.0005, "loss": 2.1219, "step": 56900 }, { "epoch": 0.21661350608618865, "grad_norm": 0.13714422285556793, "learning_rate": 0.0005, "loss": 2.1253, "step": 56910 }, { "epoch": 0.21665156855431134, "grad_norm": 0.12532460689544678, "learning_rate": 0.0005, "loss": 2.1398, "step": 56920 }, { "epoch": 0.21668963102243402, "grad_norm": 0.45631900429725647, "learning_rate": 0.0005, "loss": 2.1333, "step": 56930 }, { "epoch": 0.2167276934905567, "grad_norm": 0.13041189312934875, "learning_rate": 0.0005, "loss": 2.1262, "step": 56940 }, { "epoch": 0.2167657559586794, "grad_norm": 0.1290024071931839, "learning_rate": 0.0005, "loss": 2.1292, "step": 56950 }, { "epoch": 0.21680381842680208, "grad_norm": 0.12570427358150482, "learning_rate": 0.0005, "loss": 2.1297, "step": 56960 }, { "epoch": 0.21684188089492476, "grad_norm": 0.12115761637687683, "learning_rate": 0.0005, "loss": 2.1312, "step": 56970 }, { "epoch": 0.21687994336304744, "grad_norm": 0.1134210154414177, "learning_rate": 0.0005, "loss": 2.1349, "step": 56980 }, { "epoch": 0.21691800583117013, "grad_norm": 0.12490539252758026, "learning_rate": 0.0005, "loss": 2.1326, "step": 56990 }, { "epoch": 0.2169560682992928, "grad_norm": 0.11384977400302887, "learning_rate": 0.0005, "loss": 2.1082, "step": 57000 }, { "epoch": 0.21699413076741547, "grad_norm": 0.12341322749853134, "learning_rate": 0.0005, "loss": 2.1339, "step": 57010 }, { "epoch": 0.21703219323553816, "grad_norm": 0.11756706982851028, "learning_rate": 0.0005, "loss": 2.1289, "step": 57020 }, { "epoch": 0.21707025570366084, "grad_norm": 0.11976876109838486, "learning_rate": 0.0005, "loss": 2.1333, "step": 57030 }, { "epoch": 0.21710831817178353, "grad_norm": 0.12024601548910141, "learning_rate": 0.0005, "loss": 2.1338, "step": 57040 }, { "epoch": 0.2171463806399062, "grad_norm": 0.11507564783096313, "learning_rate": 0.0005, "loss": 2.1188, "step": 57050 }, { "epoch": 0.2171844431080289, "grad_norm": 0.14740623533725739, "learning_rate": 0.0005, "loss": 2.1284, "step": 57060 }, { "epoch": 0.21722250557615158, "grad_norm": 0.12662845849990845, "learning_rate": 0.0005, "loss": 2.1357, "step": 57070 }, { "epoch": 0.21726056804427427, "grad_norm": 0.1305474191904068, "learning_rate": 0.0005, "loss": 2.1359, "step": 57080 }, { "epoch": 0.21729863051239695, "grad_norm": 0.1467132717370987, "learning_rate": 0.0005, "loss": 2.131, "step": 57090 }, { "epoch": 0.21733669298051964, "grad_norm": 0.13564428687095642, "learning_rate": 0.0005, "loss": 2.1317, "step": 57100 }, { "epoch": 0.21737475544864232, "grad_norm": 0.1273927390575409, "learning_rate": 0.0005, "loss": 2.1306, "step": 57110 }, { "epoch": 0.217412817916765, "grad_norm": 0.14611203968524933, "learning_rate": 0.0005, "loss": 2.1242, "step": 57120 }, { "epoch": 0.2174508803848877, "grad_norm": 0.1306157112121582, "learning_rate": 0.0005, "loss": 2.1303, "step": 57130 }, { "epoch": 0.21748894285301035, "grad_norm": 0.12513276934623718, "learning_rate": 0.0005, "loss": 2.1399, "step": 57140 }, { "epoch": 0.21752700532113303, "grad_norm": 0.11249308288097382, "learning_rate": 0.0005, "loss": 2.1121, "step": 57150 }, { "epoch": 0.21756506778925572, "grad_norm": 0.12884452939033508, "learning_rate": 0.0005, "loss": 2.1305, "step": 57160 }, { "epoch": 0.2176031302573784, "grad_norm": 0.1316312849521637, "learning_rate": 0.0005, "loss": 2.1288, "step": 57170 }, { "epoch": 0.2176411927255011, "grad_norm": 0.1316588968038559, "learning_rate": 0.0005, "loss": 2.1192, "step": 57180 }, { "epoch": 0.21767925519362377, "grad_norm": 0.12284323573112488, "learning_rate": 0.0005, "loss": 2.1337, "step": 57190 }, { "epoch": 0.21771731766174646, "grad_norm": 0.11931144446134567, "learning_rate": 0.0005, "loss": 2.1244, "step": 57200 }, { "epoch": 0.21775538012986914, "grad_norm": 0.12624037265777588, "learning_rate": 0.0005, "loss": 2.1358, "step": 57210 }, { "epoch": 0.21779344259799183, "grad_norm": 0.12465736269950867, "learning_rate": 0.0005, "loss": 2.1274, "step": 57220 }, { "epoch": 0.2178315050661145, "grad_norm": 0.12285862863063812, "learning_rate": 0.0005, "loss": 2.1264, "step": 57230 }, { "epoch": 0.2178695675342372, "grad_norm": 0.13169236481189728, "learning_rate": 0.0005, "loss": 2.1297, "step": 57240 }, { "epoch": 0.21790763000235988, "grad_norm": 0.1352112889289856, "learning_rate": 0.0005, "loss": 2.1286, "step": 57250 }, { "epoch": 0.21794569247048257, "grad_norm": 0.11984165757894516, "learning_rate": 0.0005, "loss": 2.1192, "step": 57260 }, { "epoch": 0.21798375493860525, "grad_norm": 0.12893593311309814, "learning_rate": 0.0005, "loss": 2.1332, "step": 57270 }, { "epoch": 0.2180218174067279, "grad_norm": 0.11700885742902756, "learning_rate": 0.0005, "loss": 2.1416, "step": 57280 }, { "epoch": 0.2180598798748506, "grad_norm": 0.24958999454975128, "learning_rate": 0.0005, "loss": 2.1432, "step": 57290 }, { "epoch": 0.21809794234297328, "grad_norm": 0.13076789677143097, "learning_rate": 0.0005, "loss": 2.1346, "step": 57300 }, { "epoch": 0.21813600481109596, "grad_norm": 0.12193410843610764, "learning_rate": 0.0005, "loss": 2.132, "step": 57310 }, { "epoch": 0.21817406727921865, "grad_norm": 0.1376117765903473, "learning_rate": 0.0005, "loss": 2.1288, "step": 57320 }, { "epoch": 0.21821212974734133, "grad_norm": 0.12163210660219193, "learning_rate": 0.0005, "loss": 2.141, "step": 57330 }, { "epoch": 0.21825019221546402, "grad_norm": 0.1255846470594406, "learning_rate": 0.0005, "loss": 2.1254, "step": 57340 }, { "epoch": 0.2182882546835867, "grad_norm": 0.13697326183319092, "learning_rate": 0.0005, "loss": 2.132, "step": 57350 }, { "epoch": 0.2183263171517094, "grad_norm": 0.12124402076005936, "learning_rate": 0.0005, "loss": 2.1457, "step": 57360 }, { "epoch": 0.21836437961983207, "grad_norm": 0.12365327030420303, "learning_rate": 0.0005, "loss": 2.1302, "step": 57370 }, { "epoch": 0.21840244208795476, "grad_norm": 0.11128167808055878, "learning_rate": 0.0005, "loss": 2.1158, "step": 57380 }, { "epoch": 0.21844050455607744, "grad_norm": 0.12737533450126648, "learning_rate": 0.0005, "loss": 2.1304, "step": 57390 }, { "epoch": 0.21847856702420013, "grad_norm": 0.1185334324836731, "learning_rate": 0.0005, "loss": 2.1426, "step": 57400 }, { "epoch": 0.2185166294923228, "grad_norm": 0.12620334327220917, "learning_rate": 0.0005, "loss": 2.1447, "step": 57410 }, { "epoch": 0.2185546919604455, "grad_norm": 0.11594204604625702, "learning_rate": 0.0005, "loss": 2.1211, "step": 57420 }, { "epoch": 0.21859275442856815, "grad_norm": 0.1219063401222229, "learning_rate": 0.0005, "loss": 2.1314, "step": 57430 }, { "epoch": 0.21863081689669084, "grad_norm": 0.11989148706197739, "learning_rate": 0.0005, "loss": 2.1399, "step": 57440 }, { "epoch": 0.21866887936481352, "grad_norm": 0.12347909063100815, "learning_rate": 0.0005, "loss": 2.1448, "step": 57450 }, { "epoch": 0.2187069418329362, "grad_norm": 0.13595686852931976, "learning_rate": 0.0005, "loss": 2.1327, "step": 57460 }, { "epoch": 0.2187450043010589, "grad_norm": 0.12859046459197998, "learning_rate": 0.0005, "loss": 2.1277, "step": 57470 }, { "epoch": 0.21878306676918158, "grad_norm": 0.12548702955245972, "learning_rate": 0.0005, "loss": 2.1392, "step": 57480 }, { "epoch": 0.21882112923730426, "grad_norm": 0.13332229852676392, "learning_rate": 0.0005, "loss": 2.1228, "step": 57490 }, { "epoch": 0.21885919170542695, "grad_norm": 0.1241702064871788, "learning_rate": 0.0005, "loss": 2.1245, "step": 57500 }, { "epoch": 0.21889725417354963, "grad_norm": 0.1415553241968155, "learning_rate": 0.0005, "loss": 2.1338, "step": 57510 }, { "epoch": 0.21893531664167232, "grad_norm": 0.151481032371521, "learning_rate": 0.0005, "loss": 2.1283, "step": 57520 }, { "epoch": 0.218973379109795, "grad_norm": 0.13601690530776978, "learning_rate": 0.0005, "loss": 2.1361, "step": 57530 }, { "epoch": 0.2190114415779177, "grad_norm": 0.13995510339736938, "learning_rate": 0.0005, "loss": 2.1309, "step": 57540 }, { "epoch": 0.21904950404604037, "grad_norm": 0.1266234964132309, "learning_rate": 0.0005, "loss": 2.1249, "step": 57550 }, { "epoch": 0.21908756651416306, "grad_norm": 0.11919340491294861, "learning_rate": 0.0005, "loss": 2.1196, "step": 57560 }, { "epoch": 0.21912562898228571, "grad_norm": 0.1267729252576828, "learning_rate": 0.0005, "loss": 2.1253, "step": 57570 }, { "epoch": 0.2191636914504084, "grad_norm": 0.11670149862766266, "learning_rate": 0.0005, "loss": 2.1289, "step": 57580 }, { "epoch": 0.21920175391853108, "grad_norm": 0.11498446017503738, "learning_rate": 0.0005, "loss": 2.1362, "step": 57590 }, { "epoch": 0.21923981638665377, "grad_norm": 0.1346731334924698, "learning_rate": 0.0005, "loss": 2.134, "step": 57600 }, { "epoch": 0.21927787885477645, "grad_norm": 0.12346021831035614, "learning_rate": 0.0005, "loss": 2.128, "step": 57610 }, { "epoch": 0.21931594132289914, "grad_norm": 0.11403003334999084, "learning_rate": 0.0005, "loss": 2.1431, "step": 57620 }, { "epoch": 0.21935400379102182, "grad_norm": 0.11895783990621567, "learning_rate": 0.0005, "loss": 2.1333, "step": 57630 }, { "epoch": 0.2193920662591445, "grad_norm": 0.13433951139450073, "learning_rate": 0.0005, "loss": 2.1418, "step": 57640 }, { "epoch": 0.2194301287272672, "grad_norm": 0.13205264508724213, "learning_rate": 0.0005, "loss": 2.1197, "step": 57650 }, { "epoch": 0.21946819119538988, "grad_norm": 0.13134273886680603, "learning_rate": 0.0005, "loss": 2.1391, "step": 57660 }, { "epoch": 0.21950625366351256, "grad_norm": 0.12045015394687653, "learning_rate": 0.0005, "loss": 2.1336, "step": 57670 }, { "epoch": 0.21954431613163525, "grad_norm": 0.1349083036184311, "learning_rate": 0.0005, "loss": 2.1303, "step": 57680 }, { "epoch": 0.21958237859975793, "grad_norm": 0.13863080739974976, "learning_rate": 0.0005, "loss": 2.1202, "step": 57690 }, { "epoch": 0.21962044106788062, "grad_norm": 0.13695912063121796, "learning_rate": 0.0005, "loss": 2.1294, "step": 57700 }, { "epoch": 0.2196585035360033, "grad_norm": 0.12278943508863449, "learning_rate": 0.0005, "loss": 2.1411, "step": 57710 }, { "epoch": 0.21969656600412596, "grad_norm": 0.1360238492488861, "learning_rate": 0.0005, "loss": 2.1343, "step": 57720 }, { "epoch": 0.21973462847224864, "grad_norm": 0.14207656681537628, "learning_rate": 0.0005, "loss": 2.1477, "step": 57730 }, { "epoch": 0.21977269094037133, "grad_norm": 0.13055188953876495, "learning_rate": 0.0005, "loss": 2.1302, "step": 57740 }, { "epoch": 0.21981075340849401, "grad_norm": 0.12562668323516846, "learning_rate": 0.0005, "loss": 2.1335, "step": 57750 }, { "epoch": 0.2198488158766167, "grad_norm": 0.12057308107614517, "learning_rate": 0.0005, "loss": 2.1252, "step": 57760 }, { "epoch": 0.21988687834473938, "grad_norm": 0.14905333518981934, "learning_rate": 0.0005, "loss": 2.1262, "step": 57770 }, { "epoch": 0.21992494081286207, "grad_norm": 0.14193713665008545, "learning_rate": 0.0005, "loss": 2.1335, "step": 57780 }, { "epoch": 0.21996300328098475, "grad_norm": 0.1211729571223259, "learning_rate": 0.0005, "loss": 2.1305, "step": 57790 }, { "epoch": 0.22000106574910744, "grad_norm": 0.12316634505987167, "learning_rate": 0.0005, "loss": 2.1308, "step": 57800 }, { "epoch": 0.22003912821723012, "grad_norm": 0.11444377154111862, "learning_rate": 0.0005, "loss": 2.1345, "step": 57810 }, { "epoch": 0.2200771906853528, "grad_norm": 0.1265476793050766, "learning_rate": 0.0005, "loss": 2.1383, "step": 57820 }, { "epoch": 0.2201152531534755, "grad_norm": 0.12637244164943695, "learning_rate": 0.0005, "loss": 2.1271, "step": 57830 }, { "epoch": 0.22015331562159818, "grad_norm": 0.11900036782026291, "learning_rate": 0.0005, "loss": 2.1343, "step": 57840 }, { "epoch": 0.22019137808972086, "grad_norm": 0.1138107106089592, "learning_rate": 0.0005, "loss": 2.1416, "step": 57850 }, { "epoch": 0.22022944055784352, "grad_norm": 0.11928112804889679, "learning_rate": 0.0005, "loss": 2.1467, "step": 57860 }, { "epoch": 0.2202675030259662, "grad_norm": 0.12178890407085419, "learning_rate": 0.0005, "loss": 2.1371, "step": 57870 }, { "epoch": 0.2203055654940889, "grad_norm": 0.12526173889636993, "learning_rate": 0.0005, "loss": 2.1054, "step": 57880 }, { "epoch": 0.22034362796221157, "grad_norm": 0.11737733334302902, "learning_rate": 0.0005, "loss": 2.1396, "step": 57890 }, { "epoch": 0.22038169043033426, "grad_norm": 0.12308654934167862, "learning_rate": 0.0005, "loss": 2.1249, "step": 57900 }, { "epoch": 0.22041975289845694, "grad_norm": 0.1160750612616539, "learning_rate": 0.0005, "loss": 2.1295, "step": 57910 }, { "epoch": 0.22045781536657963, "grad_norm": 0.11439873278141022, "learning_rate": 0.0005, "loss": 2.14, "step": 57920 }, { "epoch": 0.22049587783470231, "grad_norm": 0.12854161858558655, "learning_rate": 0.0005, "loss": 2.1406, "step": 57930 }, { "epoch": 0.220533940302825, "grad_norm": 0.11376269161701202, "learning_rate": 0.0005, "loss": 2.1242, "step": 57940 }, { "epoch": 0.22057200277094768, "grad_norm": 0.11966376006603241, "learning_rate": 0.0005, "loss": 2.1457, "step": 57950 }, { "epoch": 0.22061006523907037, "grad_norm": 0.12348086386919022, "learning_rate": 0.0005, "loss": 2.1228, "step": 57960 }, { "epoch": 0.22064812770719305, "grad_norm": 0.11511658877134323, "learning_rate": 0.0005, "loss": 2.1294, "step": 57970 }, { "epoch": 0.22068619017531574, "grad_norm": 0.12527251243591309, "learning_rate": 0.0005, "loss": 2.1393, "step": 57980 }, { "epoch": 0.22072425264343842, "grad_norm": 0.120834581553936, "learning_rate": 0.0005, "loss": 2.1307, "step": 57990 }, { "epoch": 0.22076231511156108, "grad_norm": 0.12000294774770737, "learning_rate": 0.0005, "loss": 2.1321, "step": 58000 }, { "epoch": 0.22080037757968377, "grad_norm": 0.1237744390964508, "learning_rate": 0.0005, "loss": 2.1267, "step": 58010 }, { "epoch": 0.22083844004780645, "grad_norm": 0.132208913564682, "learning_rate": 0.0005, "loss": 2.1381, "step": 58020 }, { "epoch": 0.22087650251592914, "grad_norm": 0.12867999076843262, "learning_rate": 0.0005, "loss": 2.1393, "step": 58030 }, { "epoch": 0.22091456498405182, "grad_norm": 0.12405936419963837, "learning_rate": 0.0005, "loss": 2.1379, "step": 58040 }, { "epoch": 0.2209526274521745, "grad_norm": 0.12567295134067535, "learning_rate": 0.0005, "loss": 2.1247, "step": 58050 }, { "epoch": 0.2209906899202972, "grad_norm": 0.1235484927892685, "learning_rate": 0.0005, "loss": 2.126, "step": 58060 }, { "epoch": 0.22102875238841987, "grad_norm": 0.13820816576480865, "learning_rate": 0.0005, "loss": 2.1299, "step": 58070 }, { "epoch": 0.22106681485654256, "grad_norm": 0.12887045741081238, "learning_rate": 0.0005, "loss": 2.1367, "step": 58080 }, { "epoch": 0.22110487732466524, "grad_norm": 0.13162380456924438, "learning_rate": 0.0005, "loss": 2.1419, "step": 58090 }, { "epoch": 0.22114293979278793, "grad_norm": 0.11963611096143723, "learning_rate": 0.0005, "loss": 2.1083, "step": 58100 }, { "epoch": 0.22118100226091061, "grad_norm": 0.11864755302667618, "learning_rate": 0.0005, "loss": 2.1344, "step": 58110 }, { "epoch": 0.2212190647290333, "grad_norm": 0.11773239076137543, "learning_rate": 0.0005, "loss": 2.126, "step": 58120 }, { "epoch": 0.22125712719715598, "grad_norm": 0.135623961687088, "learning_rate": 0.0005, "loss": 2.1335, "step": 58130 }, { "epoch": 0.22129518966527867, "grad_norm": 0.12874767184257507, "learning_rate": 0.0005, "loss": 2.1267, "step": 58140 }, { "epoch": 0.22133325213340133, "grad_norm": 0.1876586377620697, "learning_rate": 0.0005, "loss": 2.1246, "step": 58150 }, { "epoch": 0.221371314601524, "grad_norm": 0.12583576142787933, "learning_rate": 0.0005, "loss": 2.136, "step": 58160 }, { "epoch": 0.2214093770696467, "grad_norm": 0.13214285671710968, "learning_rate": 0.0005, "loss": 2.1138, "step": 58170 }, { "epoch": 0.22144743953776938, "grad_norm": 0.12951025366783142, "learning_rate": 0.0005, "loss": 2.1343, "step": 58180 }, { "epoch": 0.22148550200589207, "grad_norm": 0.13020943105220795, "learning_rate": 0.0005, "loss": 2.1322, "step": 58190 }, { "epoch": 0.22152356447401475, "grad_norm": 0.11596996337175369, "learning_rate": 0.0005, "loss": 2.1243, "step": 58200 }, { "epoch": 0.22156162694213744, "grad_norm": 0.12256282567977905, "learning_rate": 0.0005, "loss": 2.1418, "step": 58210 }, { "epoch": 0.22159968941026012, "grad_norm": 0.11792580038309097, "learning_rate": 0.0005, "loss": 2.1272, "step": 58220 }, { "epoch": 0.2216377518783828, "grad_norm": 0.11020893603563309, "learning_rate": 0.0005, "loss": 2.1259, "step": 58230 }, { "epoch": 0.2216758143465055, "grad_norm": 0.12330953776836395, "learning_rate": 0.0005, "loss": 2.1313, "step": 58240 }, { "epoch": 0.22171387681462817, "grad_norm": 0.14022402465343475, "learning_rate": 0.0005, "loss": 2.1342, "step": 58250 }, { "epoch": 0.22175193928275086, "grad_norm": 0.129679337143898, "learning_rate": 0.0005, "loss": 2.1409, "step": 58260 }, { "epoch": 0.22179000175087354, "grad_norm": 0.12865565717220306, "learning_rate": 0.0005, "loss": 2.1401, "step": 58270 }, { "epoch": 0.22182806421899623, "grad_norm": 0.13360846042633057, "learning_rate": 0.0005, "loss": 2.1174, "step": 58280 }, { "epoch": 0.2218661266871189, "grad_norm": 0.12509751319885254, "learning_rate": 0.0005, "loss": 2.1267, "step": 58290 }, { "epoch": 0.22190418915524157, "grad_norm": 0.127641499042511, "learning_rate": 0.0005, "loss": 2.1223, "step": 58300 }, { "epoch": 0.22194225162336426, "grad_norm": 0.11071023344993591, "learning_rate": 0.0005, "loss": 2.1355, "step": 58310 }, { "epoch": 0.22198031409148694, "grad_norm": 0.11509322375059128, "learning_rate": 0.0005, "loss": 2.1218, "step": 58320 }, { "epoch": 0.22201837655960963, "grad_norm": 0.11422469466924667, "learning_rate": 0.0005, "loss": 2.1308, "step": 58330 }, { "epoch": 0.2220564390277323, "grad_norm": 0.1354234218597412, "learning_rate": 0.0005, "loss": 2.1508, "step": 58340 }, { "epoch": 0.222094501495855, "grad_norm": 0.1264037936925888, "learning_rate": 0.0005, "loss": 2.1439, "step": 58350 }, { "epoch": 0.22213256396397768, "grad_norm": 0.12463847547769547, "learning_rate": 0.0005, "loss": 2.1435, "step": 58360 }, { "epoch": 0.22217062643210037, "grad_norm": 0.11444063484668732, "learning_rate": 0.0005, "loss": 2.1339, "step": 58370 }, { "epoch": 0.22220868890022305, "grad_norm": 0.12904338538646698, "learning_rate": 0.0005, "loss": 2.1466, "step": 58380 }, { "epoch": 0.22224675136834574, "grad_norm": 0.1209927424788475, "learning_rate": 0.0005, "loss": 2.1276, "step": 58390 }, { "epoch": 0.22228481383646842, "grad_norm": 0.14710427820682526, "learning_rate": 0.0005, "loss": 2.1266, "step": 58400 }, { "epoch": 0.2223228763045911, "grad_norm": 0.12501521408557892, "learning_rate": 0.0005, "loss": 2.1267, "step": 58410 }, { "epoch": 0.2223609387727138, "grad_norm": 0.11510436236858368, "learning_rate": 0.0005, "loss": 2.1175, "step": 58420 }, { "epoch": 0.22239900124083645, "grad_norm": 0.12269311398267746, "learning_rate": 0.0005, "loss": 2.1183, "step": 58430 }, { "epoch": 0.22243706370895913, "grad_norm": 0.12804774940013885, "learning_rate": 0.0005, "loss": 2.1221, "step": 58440 }, { "epoch": 0.22247512617708182, "grad_norm": 0.12788155674934387, "learning_rate": 0.0005, "loss": 2.1245, "step": 58450 }, { "epoch": 0.2225131886452045, "grad_norm": 0.12186574190855026, "learning_rate": 0.0005, "loss": 2.1391, "step": 58460 }, { "epoch": 0.2225512511133272, "grad_norm": 0.13440869748592377, "learning_rate": 0.0005, "loss": 2.1328, "step": 58470 }, { "epoch": 0.22258931358144987, "grad_norm": 0.1120695099234581, "learning_rate": 0.0005, "loss": 2.1371, "step": 58480 }, { "epoch": 0.22262737604957256, "grad_norm": 0.12774385511875153, "learning_rate": 0.0005, "loss": 2.1365, "step": 58490 }, { "epoch": 0.22266543851769524, "grad_norm": 0.12559452652931213, "learning_rate": 0.0005, "loss": 2.1286, "step": 58500 }, { "epoch": 0.22270350098581793, "grad_norm": 0.1218034029006958, "learning_rate": 0.0005, "loss": 2.1422, "step": 58510 }, { "epoch": 0.2227415634539406, "grad_norm": 0.12162283062934875, "learning_rate": 0.0005, "loss": 2.127, "step": 58520 }, { "epoch": 0.2227796259220633, "grad_norm": 0.13595223426818848, "learning_rate": 0.0005, "loss": 2.1275, "step": 58530 }, { "epoch": 0.22281768839018598, "grad_norm": 0.14094847440719604, "learning_rate": 0.0005, "loss": 2.1365, "step": 58540 }, { "epoch": 0.22285575085830867, "grad_norm": 0.14060798287391663, "learning_rate": 0.0005, "loss": 2.1268, "step": 58550 }, { "epoch": 0.22289381332643135, "grad_norm": 0.1249428391456604, "learning_rate": 0.0005, "loss": 2.131, "step": 58560 }, { "epoch": 0.22293187579455404, "grad_norm": 0.12495312839746475, "learning_rate": 0.0005, "loss": 2.1449, "step": 58570 }, { "epoch": 0.2229699382626767, "grad_norm": 0.12324405461549759, "learning_rate": 0.0005, "loss": 2.1359, "step": 58580 }, { "epoch": 0.22300800073079938, "grad_norm": 0.12775039672851562, "learning_rate": 0.0005, "loss": 2.1304, "step": 58590 }, { "epoch": 0.22304606319892206, "grad_norm": 0.12679044902324677, "learning_rate": 0.0005, "loss": 2.1269, "step": 58600 }, { "epoch": 0.22308412566704475, "grad_norm": 0.13023437559604645, "learning_rate": 0.0005, "loss": 2.1264, "step": 58610 }, { "epoch": 0.22312218813516743, "grad_norm": 0.13190847635269165, "learning_rate": 0.0005, "loss": 2.1143, "step": 58620 }, { "epoch": 0.22316025060329012, "grad_norm": 0.13421021401882172, "learning_rate": 0.0005, "loss": 2.129, "step": 58630 }, { "epoch": 0.2231983130714128, "grad_norm": 0.12466822564601898, "learning_rate": 0.0005, "loss": 2.1299, "step": 58640 }, { "epoch": 0.2232363755395355, "grad_norm": 0.12487687915563583, "learning_rate": 0.0005, "loss": 2.1367, "step": 58650 }, { "epoch": 0.22327443800765817, "grad_norm": 0.13558396697044373, "learning_rate": 0.0005, "loss": 2.1147, "step": 58660 }, { "epoch": 0.22331250047578086, "grad_norm": 0.12072444707155228, "learning_rate": 0.0005, "loss": 2.1265, "step": 58670 }, { "epoch": 0.22335056294390354, "grad_norm": 0.12258957326412201, "learning_rate": 0.0005, "loss": 2.1404, "step": 58680 }, { "epoch": 0.22338862541202623, "grad_norm": 0.12579022347927094, "learning_rate": 0.0005, "loss": 2.1241, "step": 58690 }, { "epoch": 0.2234266878801489, "grad_norm": 0.13216014206409454, "learning_rate": 0.0005, "loss": 2.1421, "step": 58700 }, { "epoch": 0.2234647503482716, "grad_norm": 0.12116879969835281, "learning_rate": 0.0005, "loss": 2.1339, "step": 58710 }, { "epoch": 0.22350281281639425, "grad_norm": 0.13866692781448364, "learning_rate": 0.0005, "loss": 2.1318, "step": 58720 }, { "epoch": 0.22354087528451694, "grad_norm": 0.13507381081581116, "learning_rate": 0.0005, "loss": 2.1346, "step": 58730 }, { "epoch": 0.22357893775263962, "grad_norm": 0.13417589664459229, "learning_rate": 0.0005, "loss": 2.1478, "step": 58740 }, { "epoch": 0.2236170002207623, "grad_norm": 0.12745191156864166, "learning_rate": 0.0005, "loss": 2.1183, "step": 58750 }, { "epoch": 0.223655062688885, "grad_norm": 0.13191528618335724, "learning_rate": 0.0005, "loss": 2.1224, "step": 58760 }, { "epoch": 0.22369312515700768, "grad_norm": 0.12790720164775848, "learning_rate": 0.0005, "loss": 2.1342, "step": 58770 }, { "epoch": 0.22373118762513036, "grad_norm": 0.1307389736175537, "learning_rate": 0.0005, "loss": 2.1261, "step": 58780 }, { "epoch": 0.22376925009325305, "grad_norm": 0.11681444197893143, "learning_rate": 0.0005, "loss": 2.122, "step": 58790 }, { "epoch": 0.22380731256137573, "grad_norm": 0.1257818192243576, "learning_rate": 0.0005, "loss": 2.1298, "step": 58800 }, { "epoch": 0.22384537502949842, "grad_norm": 0.12122656404972076, "learning_rate": 0.0005, "loss": 2.1252, "step": 58810 }, { "epoch": 0.2238834374976211, "grad_norm": 0.11498332023620605, "learning_rate": 0.0005, "loss": 2.1318, "step": 58820 }, { "epoch": 0.2239214999657438, "grad_norm": 0.12811604142189026, "learning_rate": 0.0005, "loss": 2.1269, "step": 58830 }, { "epoch": 0.22395956243386647, "grad_norm": 0.1314505636692047, "learning_rate": 0.0005, "loss": 2.1275, "step": 58840 }, { "epoch": 0.22399762490198916, "grad_norm": 0.12542065978050232, "learning_rate": 0.0005, "loss": 2.1295, "step": 58850 }, { "epoch": 0.22403568737011184, "grad_norm": 0.12298982590436935, "learning_rate": 0.0005, "loss": 2.1282, "step": 58860 }, { "epoch": 0.2240737498382345, "grad_norm": 0.12842786312103271, "learning_rate": 0.0005, "loss": 2.1226, "step": 58870 }, { "epoch": 0.22411181230635718, "grad_norm": 0.12505269050598145, "learning_rate": 0.0005, "loss": 2.1275, "step": 58880 }, { "epoch": 0.22414987477447987, "grad_norm": 0.13934390246868134, "learning_rate": 0.0005, "loss": 2.1335, "step": 58890 }, { "epoch": 0.22418793724260255, "grad_norm": 0.13097402453422546, "learning_rate": 0.0005, "loss": 2.1416, "step": 58900 }, { "epoch": 0.22422599971072524, "grad_norm": 0.1333012878894806, "learning_rate": 0.0005, "loss": 2.1388, "step": 58910 }, { "epoch": 0.22426406217884792, "grad_norm": 0.12261933088302612, "learning_rate": 0.0005, "loss": 2.1211, "step": 58920 }, { "epoch": 0.2243021246469706, "grad_norm": 0.13171008229255676, "learning_rate": 0.0005, "loss": 2.122, "step": 58930 }, { "epoch": 0.2243401871150933, "grad_norm": 0.12285832315683365, "learning_rate": 0.0005, "loss": 2.1278, "step": 58940 }, { "epoch": 0.22437824958321598, "grad_norm": 0.11739574372768402, "learning_rate": 0.0005, "loss": 2.1422, "step": 58950 }, { "epoch": 0.22441631205133866, "grad_norm": 0.12524369359016418, "learning_rate": 0.0005, "loss": 2.1373, "step": 58960 }, { "epoch": 0.22445437451946135, "grad_norm": 0.1167164146900177, "learning_rate": 0.0005, "loss": 2.1228, "step": 58970 }, { "epoch": 0.22449243698758403, "grad_norm": 0.12820903956890106, "learning_rate": 0.0005, "loss": 2.1294, "step": 58980 }, { "epoch": 0.22453049945570672, "grad_norm": 0.1264885663986206, "learning_rate": 0.0005, "loss": 2.1054, "step": 58990 }, { "epoch": 0.2245685619238294, "grad_norm": 0.12076272070407867, "learning_rate": 0.0005, "loss": 2.1398, "step": 59000 }, { "epoch": 0.22460662439195206, "grad_norm": 0.12537932395935059, "learning_rate": 0.0005, "loss": 2.1323, "step": 59010 }, { "epoch": 0.22464468686007474, "grad_norm": 0.1189693734049797, "learning_rate": 0.0005, "loss": 2.1138, "step": 59020 }, { "epoch": 0.22468274932819743, "grad_norm": 0.12680798768997192, "learning_rate": 0.0005, "loss": 2.129, "step": 59030 }, { "epoch": 0.2247208117963201, "grad_norm": 0.12540461122989655, "learning_rate": 0.0005, "loss": 2.1357, "step": 59040 }, { "epoch": 0.2247588742644428, "grad_norm": 0.12299077957868576, "learning_rate": 0.0005, "loss": 2.1311, "step": 59050 }, { "epoch": 0.22479693673256548, "grad_norm": 0.12139592319726944, "learning_rate": 0.0005, "loss": 2.1332, "step": 59060 }, { "epoch": 0.22483499920068817, "grad_norm": 0.12649716436862946, "learning_rate": 0.0005, "loss": 2.1331, "step": 59070 }, { "epoch": 0.22487306166881085, "grad_norm": 0.12666192650794983, "learning_rate": 0.0005, "loss": 2.1309, "step": 59080 }, { "epoch": 0.22491112413693354, "grad_norm": 0.11255883425474167, "learning_rate": 0.0005, "loss": 2.1384, "step": 59090 }, { "epoch": 0.22494918660505622, "grad_norm": 0.13696226477622986, "learning_rate": 0.0005, "loss": 2.1038, "step": 59100 }, { "epoch": 0.2249872490731789, "grad_norm": 0.12781773507595062, "learning_rate": 0.0005, "loss": 2.1307, "step": 59110 }, { "epoch": 0.2250253115413016, "grad_norm": 0.13813403248786926, "learning_rate": 0.0005, "loss": 2.1142, "step": 59120 }, { "epoch": 0.22506337400942428, "grad_norm": 0.12762613594532013, "learning_rate": 0.0005, "loss": 2.1258, "step": 59130 }, { "epoch": 0.22510143647754696, "grad_norm": 0.13078367710113525, "learning_rate": 0.0005, "loss": 2.1256, "step": 59140 }, { "epoch": 0.22513949894566962, "grad_norm": 0.11561431735754013, "learning_rate": 0.0005, "loss": 2.1502, "step": 59150 }, { "epoch": 0.2251775614137923, "grad_norm": 0.12774159014225006, "learning_rate": 0.0005, "loss": 2.1326, "step": 59160 }, { "epoch": 0.225215623881915, "grad_norm": 0.12542405724525452, "learning_rate": 0.0005, "loss": 2.1274, "step": 59170 }, { "epoch": 0.22525368635003767, "grad_norm": 0.1185683012008667, "learning_rate": 0.0005, "loss": 2.1222, "step": 59180 }, { "epoch": 0.22529174881816036, "grad_norm": 0.13364127278327942, "learning_rate": 0.0005, "loss": 2.1253, "step": 59190 }, { "epoch": 0.22532981128628304, "grad_norm": 0.14398641884326935, "learning_rate": 0.0005, "loss": 2.1357, "step": 59200 }, { "epoch": 0.22536787375440573, "grad_norm": 0.1276194155216217, "learning_rate": 0.0005, "loss": 2.1135, "step": 59210 }, { "epoch": 0.2254059362225284, "grad_norm": 0.11286783963441849, "learning_rate": 0.0005, "loss": 2.1267, "step": 59220 }, { "epoch": 0.2254439986906511, "grad_norm": 0.13871227204799652, "learning_rate": 0.0005, "loss": 2.1298, "step": 59230 }, { "epoch": 0.22548206115877378, "grad_norm": 0.11784695833921432, "learning_rate": 0.0005, "loss": 2.1365, "step": 59240 }, { "epoch": 0.22552012362689647, "grad_norm": 0.10824364423751831, "learning_rate": 0.0005, "loss": 2.134, "step": 59250 }, { "epoch": 0.22555818609501915, "grad_norm": 0.12316665053367615, "learning_rate": 0.0005, "loss": 2.1387, "step": 59260 }, { "epoch": 0.22559624856314184, "grad_norm": 0.12541303038597107, "learning_rate": 0.0005, "loss": 2.1366, "step": 59270 }, { "epoch": 0.22563431103126452, "grad_norm": 0.11731971800327301, "learning_rate": 0.0005, "loss": 2.1219, "step": 59280 }, { "epoch": 0.2256723734993872, "grad_norm": 0.13416261970996857, "learning_rate": 0.0005, "loss": 2.1301, "step": 59290 }, { "epoch": 0.22571043596750986, "grad_norm": 0.1211361289024353, "learning_rate": 0.0005, "loss": 2.1337, "step": 59300 }, { "epoch": 0.22574849843563255, "grad_norm": 0.11899378895759583, "learning_rate": 0.0005, "loss": 2.1163, "step": 59310 }, { "epoch": 0.22578656090375523, "grad_norm": 0.12704631686210632, "learning_rate": 0.0005, "loss": 2.125, "step": 59320 }, { "epoch": 0.22582462337187792, "grad_norm": 0.13148818910121918, "learning_rate": 0.0005, "loss": 2.1398, "step": 59330 }, { "epoch": 0.2258626858400006, "grad_norm": 0.12430557608604431, "learning_rate": 0.0005, "loss": 2.1426, "step": 59340 }, { "epoch": 0.2259007483081233, "grad_norm": 0.11459135264158249, "learning_rate": 0.0005, "loss": 2.1163, "step": 59350 }, { "epoch": 0.22593881077624597, "grad_norm": 0.13341468572616577, "learning_rate": 0.0005, "loss": 2.1218, "step": 59360 }, { "epoch": 0.22597687324436866, "grad_norm": 0.1272948682308197, "learning_rate": 0.0005, "loss": 2.133, "step": 59370 }, { "epoch": 0.22601493571249134, "grad_norm": 0.13307124376296997, "learning_rate": 0.0005, "loss": 2.1384, "step": 59380 }, { "epoch": 0.22605299818061403, "grad_norm": 0.12040016055107117, "learning_rate": 0.0005, "loss": 2.1237, "step": 59390 }, { "epoch": 0.2260910606487367, "grad_norm": 0.13233061134815216, "learning_rate": 0.0005, "loss": 2.1494, "step": 59400 }, { "epoch": 0.2261291231168594, "grad_norm": 0.12807966768741608, "learning_rate": 0.0005, "loss": 2.1348, "step": 59410 }, { "epoch": 0.22616718558498208, "grad_norm": 0.13461320102214813, "learning_rate": 0.0005, "loss": 2.1213, "step": 59420 }, { "epoch": 0.22620524805310477, "grad_norm": 0.13165999948978424, "learning_rate": 0.0005, "loss": 2.1316, "step": 59430 }, { "epoch": 0.22624331052122743, "grad_norm": 0.13798262178897858, "learning_rate": 0.0005, "loss": 2.1313, "step": 59440 }, { "epoch": 0.2262813729893501, "grad_norm": 0.11773668974637985, "learning_rate": 0.0005, "loss": 2.1357, "step": 59450 }, { "epoch": 0.2263194354574728, "grad_norm": 0.12125545740127563, "learning_rate": 0.0005, "loss": 2.1304, "step": 59460 }, { "epoch": 0.22635749792559548, "grad_norm": 0.13386575877666473, "learning_rate": 0.0005, "loss": 2.1359, "step": 59470 }, { "epoch": 0.22639556039371816, "grad_norm": 0.14520864188671112, "learning_rate": 0.0005, "loss": 2.1349, "step": 59480 }, { "epoch": 0.22643362286184085, "grad_norm": 0.1348080039024353, "learning_rate": 0.0005, "loss": 2.1312, "step": 59490 }, { "epoch": 0.22647168532996353, "grad_norm": 0.1281449943780899, "learning_rate": 0.0005, "loss": 2.1309, "step": 59500 }, { "epoch": 0.22650974779808622, "grad_norm": 0.11811570823192596, "learning_rate": 0.0005, "loss": 2.1231, "step": 59510 }, { "epoch": 0.2265478102662089, "grad_norm": 0.128734290599823, "learning_rate": 0.0005, "loss": 2.1147, "step": 59520 }, { "epoch": 0.2265858727343316, "grad_norm": 0.1225765272974968, "learning_rate": 0.0005, "loss": 2.1377, "step": 59530 }, { "epoch": 0.22662393520245427, "grad_norm": 0.12594637274742126, "learning_rate": 0.0005, "loss": 2.1384, "step": 59540 }, { "epoch": 0.22666199767057696, "grad_norm": 0.10884319245815277, "learning_rate": 0.0005, "loss": 2.1407, "step": 59550 }, { "epoch": 0.22670006013869964, "grad_norm": 0.1423255354166031, "learning_rate": 0.0005, "loss": 2.116, "step": 59560 }, { "epoch": 0.22673812260682233, "grad_norm": 0.12744948267936707, "learning_rate": 0.0005, "loss": 2.1417, "step": 59570 }, { "epoch": 0.22677618507494499, "grad_norm": 0.12403052300214767, "learning_rate": 0.0005, "loss": 2.15, "step": 59580 }, { "epoch": 0.22681424754306767, "grad_norm": 0.12405093014240265, "learning_rate": 0.0005, "loss": 2.1342, "step": 59590 }, { "epoch": 0.22685231001119036, "grad_norm": 0.12157925963401794, "learning_rate": 0.0005, "loss": 2.1143, "step": 59600 }, { "epoch": 0.22689037247931304, "grad_norm": 0.13508589565753937, "learning_rate": 0.0005, "loss": 2.1227, "step": 59610 }, { "epoch": 0.22692843494743573, "grad_norm": 0.13147228956222534, "learning_rate": 0.0005, "loss": 2.1307, "step": 59620 }, { "epoch": 0.2269664974155584, "grad_norm": 0.12281271070241928, "learning_rate": 0.0005, "loss": 2.1258, "step": 59630 }, { "epoch": 0.2270045598836811, "grad_norm": 0.11494458466768265, "learning_rate": 0.0005, "loss": 2.1573, "step": 59640 }, { "epoch": 0.22704262235180378, "grad_norm": 0.12068134546279907, "learning_rate": 0.0005, "loss": 2.1186, "step": 59650 }, { "epoch": 0.22708068481992646, "grad_norm": 0.1246868148446083, "learning_rate": 0.0005, "loss": 2.1183, "step": 59660 }, { "epoch": 0.22711874728804915, "grad_norm": 0.1122557520866394, "learning_rate": 0.0005, "loss": 2.1461, "step": 59670 }, { "epoch": 0.22715680975617183, "grad_norm": 0.1379440277814865, "learning_rate": 0.0005, "loss": 2.1258, "step": 59680 }, { "epoch": 0.22719487222429452, "grad_norm": 0.11093917489051819, "learning_rate": 0.0005, "loss": 2.1365, "step": 59690 }, { "epoch": 0.2272329346924172, "grad_norm": 0.13247358798980713, "learning_rate": 0.0005, "loss": 2.1226, "step": 59700 }, { "epoch": 0.2272709971605399, "grad_norm": 0.11783503741025925, "learning_rate": 0.0005, "loss": 2.1187, "step": 59710 }, { "epoch": 0.22730905962866257, "grad_norm": 0.13418060541152954, "learning_rate": 0.0005, "loss": 2.1245, "step": 59720 }, { "epoch": 0.22734712209678523, "grad_norm": 0.11433655768632889, "learning_rate": 0.0005, "loss": 2.1164, "step": 59730 }, { "epoch": 0.22738518456490792, "grad_norm": 0.13238011300563812, "learning_rate": 0.0005, "loss": 2.126, "step": 59740 }, { "epoch": 0.2274232470330306, "grad_norm": 0.13886448740959167, "learning_rate": 0.0005, "loss": 2.1379, "step": 59750 }, { "epoch": 0.22746130950115329, "grad_norm": 0.12896914780139923, "learning_rate": 0.0005, "loss": 2.1358, "step": 59760 }, { "epoch": 0.22749937196927597, "grad_norm": 0.13450363278388977, "learning_rate": 0.0005, "loss": 2.1166, "step": 59770 }, { "epoch": 0.22753743443739866, "grad_norm": 0.1338476538658142, "learning_rate": 0.0005, "loss": 2.1369, "step": 59780 }, { "epoch": 0.22757549690552134, "grad_norm": 0.1098618283867836, "learning_rate": 0.0005, "loss": 2.1193, "step": 59790 }, { "epoch": 0.22761355937364403, "grad_norm": 0.11677566915750504, "learning_rate": 0.0005, "loss": 2.1398, "step": 59800 }, { "epoch": 0.2276516218417667, "grad_norm": 0.11757448315620422, "learning_rate": 0.0005, "loss": 2.1292, "step": 59810 }, { "epoch": 0.2276896843098894, "grad_norm": 0.12055247277021408, "learning_rate": 0.0005, "loss": 2.1387, "step": 59820 }, { "epoch": 0.22772774677801208, "grad_norm": 0.12822188436985016, "learning_rate": 0.0005, "loss": 2.1446, "step": 59830 }, { "epoch": 0.22776580924613476, "grad_norm": 0.1212920993566513, "learning_rate": 0.0005, "loss": 2.1275, "step": 59840 }, { "epoch": 0.22780387171425745, "grad_norm": 0.1271173506975174, "learning_rate": 0.0005, "loss": 2.13, "step": 59850 }, { "epoch": 0.22784193418238013, "grad_norm": 0.12114972621202469, "learning_rate": 0.0005, "loss": 2.1279, "step": 59860 }, { "epoch": 0.2278799966505028, "grad_norm": 0.12875624001026154, "learning_rate": 0.0005, "loss": 2.1367, "step": 59870 }, { "epoch": 0.22791805911862548, "grad_norm": 0.1280161440372467, "learning_rate": 0.0005, "loss": 2.13, "step": 59880 }, { "epoch": 0.22795612158674816, "grad_norm": 0.12320836633443832, "learning_rate": 0.0005, "loss": 2.1292, "step": 59890 }, { "epoch": 0.22799418405487085, "grad_norm": 0.1314483880996704, "learning_rate": 0.0005, "loss": 2.1357, "step": 59900 }, { "epoch": 0.22803224652299353, "grad_norm": 0.13151994347572327, "learning_rate": 0.0005, "loss": 2.1344, "step": 59910 }, { "epoch": 0.22807030899111622, "grad_norm": 0.13985653221607208, "learning_rate": 0.0005, "loss": 2.1281, "step": 59920 }, { "epoch": 0.2281083714592389, "grad_norm": 0.12865976989269257, "learning_rate": 0.0005, "loss": 2.121, "step": 59930 }, { "epoch": 0.22814643392736159, "grad_norm": 0.12414994090795517, "learning_rate": 0.0005, "loss": 2.1468, "step": 59940 }, { "epoch": 0.22818449639548427, "grad_norm": 0.13476936519145966, "learning_rate": 0.0005, "loss": 2.1435, "step": 59950 }, { "epoch": 0.22822255886360696, "grad_norm": 0.12013564258813858, "learning_rate": 0.0005, "loss": 2.1263, "step": 59960 }, { "epoch": 0.22826062133172964, "grad_norm": 0.11777539551258087, "learning_rate": 0.0005, "loss": 2.1362, "step": 59970 }, { "epoch": 0.22829868379985233, "grad_norm": 0.1243869811296463, "learning_rate": 0.0005, "loss": 2.1184, "step": 59980 }, { "epoch": 0.228336746267975, "grad_norm": 0.11036140471696854, "learning_rate": 0.0005, "loss": 2.1281, "step": 59990 }, { "epoch": 0.2283748087360977, "grad_norm": 0.11138096451759338, "learning_rate": 0.0005, "loss": 2.1427, "step": 60000 }, { "epoch": 0.22841287120422038, "grad_norm": 0.11755473166704178, "learning_rate": 0.0005, "loss": 2.1407, "step": 60010 }, { "epoch": 0.22845093367234304, "grad_norm": 0.11770655959844589, "learning_rate": 0.0005, "loss": 2.14, "step": 60020 }, { "epoch": 0.22848899614046572, "grad_norm": 0.11558213084936142, "learning_rate": 0.0005, "loss": 2.1222, "step": 60030 }, { "epoch": 0.2285270586085884, "grad_norm": 0.12601837515830994, "learning_rate": 0.0005, "loss": 2.1494, "step": 60040 }, { "epoch": 0.2285651210767111, "grad_norm": 0.12481823563575745, "learning_rate": 0.0005, "loss": 2.1305, "step": 60050 }, { "epoch": 0.22860318354483378, "grad_norm": 0.11852145940065384, "learning_rate": 0.0005, "loss": 2.1333, "step": 60060 }, { "epoch": 0.22864124601295646, "grad_norm": 0.12432295083999634, "learning_rate": 0.0005, "loss": 2.1427, "step": 60070 }, { "epoch": 0.22867930848107915, "grad_norm": 0.13385199010372162, "learning_rate": 0.0005, "loss": 2.1138, "step": 60080 }, { "epoch": 0.22871737094920183, "grad_norm": 0.14017799496650696, "learning_rate": 0.0005, "loss": 2.125, "step": 60090 }, { "epoch": 0.22875543341732452, "grad_norm": 0.11703263968229294, "learning_rate": 0.0005, "loss": 2.1489, "step": 60100 }, { "epoch": 0.2287934958854472, "grad_norm": 0.11561126261949539, "learning_rate": 0.0005, "loss": 2.1306, "step": 60110 }, { "epoch": 0.22883155835356989, "grad_norm": 0.12033192813396454, "learning_rate": 0.0005, "loss": 2.1144, "step": 60120 }, { "epoch": 0.22886962082169257, "grad_norm": 0.14562615752220154, "learning_rate": 0.0005, "loss": 2.131, "step": 60130 }, { "epoch": 0.22890768328981526, "grad_norm": 0.11416006088256836, "learning_rate": 0.0005, "loss": 2.1287, "step": 60140 }, { "epoch": 0.22894574575793794, "grad_norm": 0.13422337174415588, "learning_rate": 0.0005, "loss": 2.1192, "step": 60150 }, { "epoch": 0.2289838082260606, "grad_norm": 0.1290620118379593, "learning_rate": 0.0005, "loss": 2.1251, "step": 60160 }, { "epoch": 0.22902187069418328, "grad_norm": 0.11742956936359406, "learning_rate": 0.0005, "loss": 2.1282, "step": 60170 }, { "epoch": 0.22905993316230597, "grad_norm": 0.11944698542356491, "learning_rate": 0.0005, "loss": 2.1338, "step": 60180 }, { "epoch": 0.22909799563042865, "grad_norm": 0.11954623460769653, "learning_rate": 0.0005, "loss": 2.1491, "step": 60190 }, { "epoch": 0.22913605809855134, "grad_norm": 0.12461133301258087, "learning_rate": 0.0005, "loss": 2.1275, "step": 60200 }, { "epoch": 0.22917412056667402, "grad_norm": 0.12100213766098022, "learning_rate": 0.0005, "loss": 2.1323, "step": 60210 }, { "epoch": 0.2292121830347967, "grad_norm": 0.14461475610733032, "learning_rate": 0.0005, "loss": 2.126, "step": 60220 }, { "epoch": 0.2292502455029194, "grad_norm": 0.11810819059610367, "learning_rate": 0.0005, "loss": 2.1412, "step": 60230 }, { "epoch": 0.22928830797104208, "grad_norm": 0.1200481727719307, "learning_rate": 0.0005, "loss": 2.1336, "step": 60240 }, { "epoch": 0.22932637043916476, "grad_norm": 0.12129189074039459, "learning_rate": 0.0005, "loss": 2.1348, "step": 60250 }, { "epoch": 0.22936443290728745, "grad_norm": 0.1197345107793808, "learning_rate": 0.0005, "loss": 2.1303, "step": 60260 }, { "epoch": 0.22940249537541013, "grad_norm": 0.13456512987613678, "learning_rate": 0.0005, "loss": 2.1322, "step": 60270 }, { "epoch": 0.22944055784353282, "grad_norm": 0.1252896934747696, "learning_rate": 0.0005, "loss": 2.1428, "step": 60280 }, { "epoch": 0.2294786203116555, "grad_norm": 0.11544041335582733, "learning_rate": 0.0005, "loss": 2.1275, "step": 60290 }, { "epoch": 0.22951668277977816, "grad_norm": 0.13014082610607147, "learning_rate": 0.0005, "loss": 2.1359, "step": 60300 }, { "epoch": 0.22955474524790084, "grad_norm": 0.11489399522542953, "learning_rate": 0.0005, "loss": 2.1265, "step": 60310 }, { "epoch": 0.22959280771602353, "grad_norm": 0.11891859769821167, "learning_rate": 0.0005, "loss": 2.1456, "step": 60320 }, { "epoch": 0.2296308701841462, "grad_norm": 0.11885011196136475, "learning_rate": 0.0005, "loss": 2.1348, "step": 60330 }, { "epoch": 0.2296689326522689, "grad_norm": 0.1199352964758873, "learning_rate": 0.0005, "loss": 2.1233, "step": 60340 }, { "epoch": 0.22970699512039158, "grad_norm": 0.14349274337291718, "learning_rate": 0.0005, "loss": 2.1255, "step": 60350 }, { "epoch": 0.22974505758851427, "grad_norm": 0.15010002255439758, "learning_rate": 0.0005, "loss": 2.1206, "step": 60360 }, { "epoch": 0.22978312005663695, "grad_norm": 0.13680097460746765, "learning_rate": 0.0005, "loss": 2.1419, "step": 60370 }, { "epoch": 0.22982118252475964, "grad_norm": 0.12735390663146973, "learning_rate": 0.0005, "loss": 2.1391, "step": 60380 }, { "epoch": 0.22985924499288232, "grad_norm": 0.13412439823150635, "learning_rate": 0.0005, "loss": 2.1381, "step": 60390 }, { "epoch": 0.229897307461005, "grad_norm": 0.11966119706630707, "learning_rate": 0.0005, "loss": 2.1357, "step": 60400 }, { "epoch": 0.2299353699291277, "grad_norm": 0.11467495560646057, "learning_rate": 0.0005, "loss": 2.134, "step": 60410 }, { "epoch": 0.22997343239725038, "grad_norm": 0.12715892493724823, "learning_rate": 0.0005, "loss": 2.1205, "step": 60420 }, { "epoch": 0.23001149486537306, "grad_norm": 0.13274915516376495, "learning_rate": 0.0005, "loss": 2.121, "step": 60430 }, { "epoch": 0.23004955733349575, "grad_norm": 0.12153349071741104, "learning_rate": 0.0005, "loss": 2.1327, "step": 60440 }, { "epoch": 0.2300876198016184, "grad_norm": 0.11797938495874405, "learning_rate": 0.0005, "loss": 2.1117, "step": 60450 }, { "epoch": 0.2301256822697411, "grad_norm": 0.1457214504480362, "learning_rate": 0.0005, "loss": 2.1303, "step": 60460 }, { "epoch": 0.23016374473786377, "grad_norm": 0.12316471338272095, "learning_rate": 0.0005, "loss": 2.1419, "step": 60470 }, { "epoch": 0.23020180720598646, "grad_norm": 0.11603264510631561, "learning_rate": 0.0005, "loss": 2.1197, "step": 60480 }, { "epoch": 0.23023986967410914, "grad_norm": 0.1149735152721405, "learning_rate": 0.0005, "loss": 2.1321, "step": 60490 }, { "epoch": 0.23027793214223183, "grad_norm": 0.1266094446182251, "learning_rate": 0.0005, "loss": 2.142, "step": 60500 }, { "epoch": 0.2303159946103545, "grad_norm": 0.13430431485176086, "learning_rate": 0.0005, "loss": 2.1257, "step": 60510 }, { "epoch": 0.2303540570784772, "grad_norm": 0.12228171527385712, "learning_rate": 0.0005, "loss": 2.1302, "step": 60520 }, { "epoch": 0.23039211954659988, "grad_norm": 0.12757080793380737, "learning_rate": 0.0005, "loss": 2.126, "step": 60530 }, { "epoch": 0.23043018201472257, "grad_norm": 0.12125714123249054, "learning_rate": 0.0005, "loss": 2.125, "step": 60540 }, { "epoch": 0.23046824448284525, "grad_norm": 0.11723601073026657, "learning_rate": 0.0005, "loss": 2.1347, "step": 60550 }, { "epoch": 0.23050630695096794, "grad_norm": 0.11978837847709656, "learning_rate": 0.0005, "loss": 2.1388, "step": 60560 }, { "epoch": 0.23054436941909062, "grad_norm": 0.12238425761461258, "learning_rate": 0.0005, "loss": 2.127, "step": 60570 }, { "epoch": 0.2305824318872133, "grad_norm": 0.13493356108665466, "learning_rate": 0.0005, "loss": 2.1335, "step": 60580 }, { "epoch": 0.23062049435533596, "grad_norm": 0.1277237832546234, "learning_rate": 0.0005, "loss": 2.132, "step": 60590 }, { "epoch": 0.23065855682345865, "grad_norm": 0.12236456573009491, "learning_rate": 0.0005, "loss": 2.1334, "step": 60600 }, { "epoch": 0.23069661929158133, "grad_norm": 0.14096243679523468, "learning_rate": 0.0005, "loss": 2.1248, "step": 60610 }, { "epoch": 0.23073468175970402, "grad_norm": 0.13399477303028107, "learning_rate": 0.0005, "loss": 2.1221, "step": 60620 }, { "epoch": 0.2307727442278267, "grad_norm": 0.12829075753688812, "learning_rate": 0.0005, "loss": 2.1174, "step": 60630 }, { "epoch": 0.2308108066959494, "grad_norm": 0.11446889489889145, "learning_rate": 0.0005, "loss": 2.1325, "step": 60640 }, { "epoch": 0.23084886916407207, "grad_norm": 0.12685668468475342, "learning_rate": 0.0005, "loss": 2.113, "step": 60650 }, { "epoch": 0.23088693163219476, "grad_norm": 0.13323184847831726, "learning_rate": 0.0005, "loss": 2.1397, "step": 60660 }, { "epoch": 0.23092499410031744, "grad_norm": 0.13773372769355774, "learning_rate": 0.0005, "loss": 2.1387, "step": 60670 }, { "epoch": 0.23096305656844013, "grad_norm": 0.1213175356388092, "learning_rate": 0.0005, "loss": 2.1228, "step": 60680 }, { "epoch": 0.2310011190365628, "grad_norm": 0.12439834326505661, "learning_rate": 0.0005, "loss": 2.1312, "step": 60690 }, { "epoch": 0.2310391815046855, "grad_norm": 0.11664614081382751, "learning_rate": 0.0005, "loss": 2.1352, "step": 60700 }, { "epoch": 0.23107724397280818, "grad_norm": 0.12157955765724182, "learning_rate": 0.0005, "loss": 2.132, "step": 60710 }, { "epoch": 0.23111530644093087, "grad_norm": 0.13951872289180756, "learning_rate": 0.0005, "loss": 2.1323, "step": 60720 }, { "epoch": 0.23115336890905352, "grad_norm": 0.12050854414701462, "learning_rate": 0.0005, "loss": 2.1339, "step": 60730 }, { "epoch": 0.2311914313771762, "grad_norm": 0.13324572145938873, "learning_rate": 0.0005, "loss": 2.1208, "step": 60740 }, { "epoch": 0.2312294938452989, "grad_norm": 0.13760831952095032, "learning_rate": 0.0005, "loss": 2.1293, "step": 60750 }, { "epoch": 0.23126755631342158, "grad_norm": 0.12362300604581833, "learning_rate": 0.0005, "loss": 2.1228, "step": 60760 }, { "epoch": 0.23130561878154426, "grad_norm": 0.12636949121952057, "learning_rate": 0.0005, "loss": 2.1469, "step": 60770 }, { "epoch": 0.23134368124966695, "grad_norm": 0.1140715703368187, "learning_rate": 0.0005, "loss": 2.107, "step": 60780 }, { "epoch": 0.23138174371778963, "grad_norm": 0.13143981993198395, "learning_rate": 0.0005, "loss": 2.1328, "step": 60790 }, { "epoch": 0.23141980618591232, "grad_norm": 0.1388804316520691, "learning_rate": 0.0005, "loss": 2.1185, "step": 60800 }, { "epoch": 0.231457868654035, "grad_norm": 0.15259552001953125, "learning_rate": 0.0005, "loss": 2.1391, "step": 60810 }, { "epoch": 0.2314959311221577, "grad_norm": 0.130338653922081, "learning_rate": 0.0005, "loss": 2.1299, "step": 60820 }, { "epoch": 0.23153399359028037, "grad_norm": 0.12489153444766998, "learning_rate": 0.0005, "loss": 2.1368, "step": 60830 }, { "epoch": 0.23157205605840306, "grad_norm": 0.133027121424675, "learning_rate": 0.0005, "loss": 2.1356, "step": 60840 }, { "epoch": 0.23161011852652574, "grad_norm": 0.13067150115966797, "learning_rate": 0.0005, "loss": 2.1219, "step": 60850 }, { "epoch": 0.23164818099464843, "grad_norm": 0.11416888982057571, "learning_rate": 0.0005, "loss": 2.1217, "step": 60860 }, { "epoch": 0.2316862434627711, "grad_norm": 0.1309850960969925, "learning_rate": 0.0005, "loss": 2.1411, "step": 60870 }, { "epoch": 0.23172430593089377, "grad_norm": 0.12706001102924347, "learning_rate": 0.0005, "loss": 2.1215, "step": 60880 }, { "epoch": 0.23176236839901646, "grad_norm": 0.12186629325151443, "learning_rate": 0.0005, "loss": 2.1262, "step": 60890 }, { "epoch": 0.23180043086713914, "grad_norm": 0.11978558450937271, "learning_rate": 0.0005, "loss": 2.1262, "step": 60900 }, { "epoch": 0.23183849333526182, "grad_norm": 0.13007833063602448, "learning_rate": 0.0005, "loss": 2.134, "step": 60910 }, { "epoch": 0.2318765558033845, "grad_norm": 0.12153036892414093, "learning_rate": 0.0005, "loss": 2.1405, "step": 60920 }, { "epoch": 0.2319146182715072, "grad_norm": 0.12330733239650726, "learning_rate": 0.0005, "loss": 2.1212, "step": 60930 }, { "epoch": 0.23195268073962988, "grad_norm": 0.11360269784927368, "learning_rate": 0.0005, "loss": 2.1346, "step": 60940 }, { "epoch": 0.23199074320775256, "grad_norm": 0.11848775297403336, "learning_rate": 0.0005, "loss": 2.1454, "step": 60950 }, { "epoch": 0.23202880567587525, "grad_norm": 0.1268378645181656, "learning_rate": 0.0005, "loss": 2.1304, "step": 60960 }, { "epoch": 0.23206686814399793, "grad_norm": 0.11616990715265274, "learning_rate": 0.0005, "loss": 2.134, "step": 60970 }, { "epoch": 0.23210493061212062, "grad_norm": 0.11881053447723389, "learning_rate": 0.0005, "loss": 2.1315, "step": 60980 }, { "epoch": 0.2321429930802433, "grad_norm": 0.12208161503076553, "learning_rate": 0.0005, "loss": 2.1303, "step": 60990 }, { "epoch": 0.232181055548366, "grad_norm": 0.12031666934490204, "learning_rate": 0.0005, "loss": 2.1262, "step": 61000 }, { "epoch": 0.23221911801648867, "grad_norm": 0.12577299773693085, "learning_rate": 0.0005, "loss": 2.1439, "step": 61010 }, { "epoch": 0.23225718048461133, "grad_norm": 0.12651118636131287, "learning_rate": 0.0005, "loss": 2.1157, "step": 61020 }, { "epoch": 0.23229524295273402, "grad_norm": 0.1322355568408966, "learning_rate": 0.0005, "loss": 2.1313, "step": 61030 }, { "epoch": 0.2323333054208567, "grad_norm": 0.11694405227899551, "learning_rate": 0.0005, "loss": 2.123, "step": 61040 }, { "epoch": 0.23237136788897939, "grad_norm": 0.12027653306722641, "learning_rate": 0.0005, "loss": 2.1236, "step": 61050 }, { "epoch": 0.23240943035710207, "grad_norm": 0.1370190531015396, "learning_rate": 0.0005, "loss": 2.139, "step": 61060 }, { "epoch": 0.23244749282522476, "grad_norm": 0.1162046492099762, "learning_rate": 0.0005, "loss": 2.1228, "step": 61070 }, { "epoch": 0.23248555529334744, "grad_norm": 0.13960926234722137, "learning_rate": 0.0005, "loss": 2.1145, "step": 61080 }, { "epoch": 0.23252361776147012, "grad_norm": 0.1295863837003708, "learning_rate": 0.0005, "loss": 2.1307, "step": 61090 }, { "epoch": 0.2325616802295928, "grad_norm": 0.140008807182312, "learning_rate": 0.0005, "loss": 2.1373, "step": 61100 }, { "epoch": 0.2325997426977155, "grad_norm": 0.13163819909095764, "learning_rate": 0.0005, "loss": 2.1382, "step": 61110 }, { "epoch": 0.23263780516583818, "grad_norm": 0.12477041780948639, "learning_rate": 0.0005, "loss": 2.1287, "step": 61120 }, { "epoch": 0.23267586763396086, "grad_norm": 0.1220005601644516, "learning_rate": 0.0005, "loss": 2.1306, "step": 61130 }, { "epoch": 0.23271393010208355, "grad_norm": 0.11905595660209656, "learning_rate": 0.0005, "loss": 2.1274, "step": 61140 }, { "epoch": 0.23275199257020623, "grad_norm": 0.11650003492832184, "learning_rate": 0.0005, "loss": 2.1392, "step": 61150 }, { "epoch": 0.23279005503832892, "grad_norm": 0.12131864577531815, "learning_rate": 0.0005, "loss": 2.1362, "step": 61160 }, { "epoch": 0.23282811750645158, "grad_norm": 0.12896190583705902, "learning_rate": 0.0005, "loss": 2.1269, "step": 61170 }, { "epoch": 0.23286617997457426, "grad_norm": 0.13321678340435028, "learning_rate": 0.0005, "loss": 2.1223, "step": 61180 }, { "epoch": 0.23290424244269695, "grad_norm": 0.11942190676927567, "learning_rate": 0.0005, "loss": 2.1383, "step": 61190 }, { "epoch": 0.23294230491081963, "grad_norm": 0.11913148313760757, "learning_rate": 0.0005, "loss": 2.1208, "step": 61200 }, { "epoch": 0.23298036737894232, "grad_norm": 0.4585500955581665, "learning_rate": 0.0005, "loss": 2.1338, "step": 61210 }, { "epoch": 0.233018429847065, "grad_norm": 0.12030050158500671, "learning_rate": 0.0005, "loss": 2.1294, "step": 61220 }, { "epoch": 0.23305649231518769, "grad_norm": 0.4939073324203491, "learning_rate": 0.0005, "loss": 2.1313, "step": 61230 }, { "epoch": 0.23309455478331037, "grad_norm": 0.13352811336517334, "learning_rate": 0.0005, "loss": 2.1316, "step": 61240 }, { "epoch": 0.23313261725143306, "grad_norm": 0.1354418694972992, "learning_rate": 0.0005, "loss": 2.1308, "step": 61250 }, { "epoch": 0.23317067971955574, "grad_norm": 0.13711479306221008, "learning_rate": 0.0005, "loss": 2.1379, "step": 61260 }, { "epoch": 0.23320874218767843, "grad_norm": 0.11538518220186234, "learning_rate": 0.0005, "loss": 2.143, "step": 61270 }, { "epoch": 0.2332468046558011, "grad_norm": 0.13111010193824768, "learning_rate": 0.0005, "loss": 2.1244, "step": 61280 }, { "epoch": 0.2332848671239238, "grad_norm": 0.13194629549980164, "learning_rate": 0.0005, "loss": 2.1325, "step": 61290 }, { "epoch": 0.23332292959204648, "grad_norm": 0.11781266331672668, "learning_rate": 0.0005, "loss": 2.1252, "step": 61300 }, { "epoch": 0.23336099206016914, "grad_norm": 0.12541015446186066, "learning_rate": 0.0005, "loss": 2.1157, "step": 61310 }, { "epoch": 0.23339905452829182, "grad_norm": 0.13061216473579407, "learning_rate": 0.0005, "loss": 2.1306, "step": 61320 }, { "epoch": 0.2334371169964145, "grad_norm": 0.12053248286247253, "learning_rate": 0.0005, "loss": 2.1233, "step": 61330 }, { "epoch": 0.2334751794645372, "grad_norm": 0.12311103194952011, "learning_rate": 0.0005, "loss": 2.1224, "step": 61340 }, { "epoch": 0.23351324193265988, "grad_norm": 0.13860678672790527, "learning_rate": 0.0005, "loss": 2.1365, "step": 61350 }, { "epoch": 0.23355130440078256, "grad_norm": 0.11981858313083649, "learning_rate": 0.0005, "loss": 2.12, "step": 61360 }, { "epoch": 0.23358936686890525, "grad_norm": 0.146539106965065, "learning_rate": 0.0005, "loss": 2.12, "step": 61370 }, { "epoch": 0.23362742933702793, "grad_norm": 0.1242060512304306, "learning_rate": 0.0005, "loss": 2.1111, "step": 61380 }, { "epoch": 0.23366549180515062, "grad_norm": 0.12461848556995392, "learning_rate": 0.0005, "loss": 2.1248, "step": 61390 }, { "epoch": 0.2337035542732733, "grad_norm": 0.12711970508098602, "learning_rate": 0.0005, "loss": 2.1347, "step": 61400 }, { "epoch": 0.23374161674139599, "grad_norm": 0.13660112023353577, "learning_rate": 0.0005, "loss": 2.112, "step": 61410 }, { "epoch": 0.23377967920951867, "grad_norm": 0.13711483776569366, "learning_rate": 0.0005, "loss": 2.1269, "step": 61420 }, { "epoch": 0.23381774167764136, "grad_norm": 0.11872289329767227, "learning_rate": 0.0005, "loss": 2.1216, "step": 61430 }, { "epoch": 0.23385580414576404, "grad_norm": 0.12258458882570267, "learning_rate": 0.0005, "loss": 2.1243, "step": 61440 }, { "epoch": 0.2338938666138867, "grad_norm": 0.1237039715051651, "learning_rate": 0.0005, "loss": 2.1262, "step": 61450 }, { "epoch": 0.23393192908200938, "grad_norm": 0.11179503053426743, "learning_rate": 0.0005, "loss": 2.1368, "step": 61460 }, { "epoch": 0.23396999155013207, "grad_norm": 0.12895576655864716, "learning_rate": 0.0005, "loss": 2.1372, "step": 61470 }, { "epoch": 0.23400805401825475, "grad_norm": 0.12361761927604675, "learning_rate": 0.0005, "loss": 2.1334, "step": 61480 }, { "epoch": 0.23404611648637744, "grad_norm": 0.11232542246580124, "learning_rate": 0.0005, "loss": 2.1388, "step": 61490 }, { "epoch": 0.23408417895450012, "grad_norm": 0.12916137278079987, "learning_rate": 0.0005, "loss": 2.1392, "step": 61500 }, { "epoch": 0.2341222414226228, "grad_norm": 0.12421401590108871, "learning_rate": 0.0005, "loss": 2.1256, "step": 61510 }, { "epoch": 0.2341603038907455, "grad_norm": 0.12263153493404388, "learning_rate": 0.0005, "loss": 2.1341, "step": 61520 }, { "epoch": 0.23419836635886818, "grad_norm": 0.11718087643384933, "learning_rate": 0.0005, "loss": 2.1366, "step": 61530 }, { "epoch": 0.23423642882699086, "grad_norm": 0.13158558309078217, "learning_rate": 0.0005, "loss": 2.1206, "step": 61540 }, { "epoch": 0.23427449129511355, "grad_norm": 0.13179059326648712, "learning_rate": 0.0005, "loss": 2.1292, "step": 61550 }, { "epoch": 0.23431255376323623, "grad_norm": 0.12416069954633713, "learning_rate": 0.0005, "loss": 2.1266, "step": 61560 }, { "epoch": 0.23435061623135892, "grad_norm": 0.12887023389339447, "learning_rate": 0.0005, "loss": 2.1221, "step": 61570 }, { "epoch": 0.2343886786994816, "grad_norm": 0.11714029312133789, "learning_rate": 0.0005, "loss": 2.1235, "step": 61580 }, { "epoch": 0.23442674116760429, "grad_norm": 0.8283962607383728, "learning_rate": 0.0005, "loss": 2.1265, "step": 61590 }, { "epoch": 0.23446480363572694, "grad_norm": 0.12651962041854858, "learning_rate": 0.0005, "loss": 2.1354, "step": 61600 }, { "epoch": 0.23450286610384963, "grad_norm": 0.11088352650403976, "learning_rate": 0.0005, "loss": 2.1359, "step": 61610 }, { "epoch": 0.2345409285719723, "grad_norm": 0.14070887863636017, "learning_rate": 0.0005, "loss": 2.1269, "step": 61620 }, { "epoch": 0.234578991040095, "grad_norm": 0.1269426941871643, "learning_rate": 0.0005, "loss": 2.1392, "step": 61630 }, { "epoch": 0.23461705350821768, "grad_norm": 0.11625342071056366, "learning_rate": 0.0005, "loss": 2.1214, "step": 61640 }, { "epoch": 0.23465511597634037, "grad_norm": 0.1273365020751953, "learning_rate": 0.0005, "loss": 2.1353, "step": 61650 }, { "epoch": 0.23469317844446305, "grad_norm": 0.1282196342945099, "learning_rate": 0.0005, "loss": 2.1406, "step": 61660 }, { "epoch": 0.23473124091258574, "grad_norm": 0.1189308762550354, "learning_rate": 0.0005, "loss": 2.1283, "step": 61670 }, { "epoch": 0.23476930338070842, "grad_norm": 0.12329194694757462, "learning_rate": 0.0005, "loss": 2.1115, "step": 61680 }, { "epoch": 0.2348073658488311, "grad_norm": 0.13189809024333954, "learning_rate": 0.0005, "loss": 2.1453, "step": 61690 }, { "epoch": 0.2348454283169538, "grad_norm": 0.12398454546928406, "learning_rate": 0.0005, "loss": 2.1268, "step": 61700 }, { "epoch": 0.23488349078507648, "grad_norm": 0.14037716388702393, "learning_rate": 0.0005, "loss": 2.1411, "step": 61710 }, { "epoch": 0.23492155325319916, "grad_norm": 0.12988056242465973, "learning_rate": 0.0005, "loss": 2.1395, "step": 61720 }, { "epoch": 0.23495961572132185, "grad_norm": 0.11807936429977417, "learning_rate": 0.0005, "loss": 2.1323, "step": 61730 }, { "epoch": 0.2349976781894445, "grad_norm": 0.12387745827436447, "learning_rate": 0.0005, "loss": 2.128, "step": 61740 }, { "epoch": 0.2350357406575672, "grad_norm": 0.1267496794462204, "learning_rate": 0.0005, "loss": 2.1322, "step": 61750 }, { "epoch": 0.23507380312568987, "grad_norm": 0.13048484921455383, "learning_rate": 0.0005, "loss": 2.132, "step": 61760 }, { "epoch": 0.23511186559381256, "grad_norm": 0.12397941201925278, "learning_rate": 0.0005, "loss": 2.1249, "step": 61770 }, { "epoch": 0.23514992806193524, "grad_norm": 0.1166549026966095, "learning_rate": 0.0005, "loss": 2.1196, "step": 61780 }, { "epoch": 0.23518799053005793, "grad_norm": 0.11860918253660202, "learning_rate": 0.0005, "loss": 2.1133, "step": 61790 }, { "epoch": 0.2352260529981806, "grad_norm": 0.1338878870010376, "learning_rate": 0.0005, "loss": 2.1268, "step": 61800 }, { "epoch": 0.2352641154663033, "grad_norm": 0.12876847386360168, "learning_rate": 0.0005, "loss": 2.1288, "step": 61810 }, { "epoch": 0.23530217793442598, "grad_norm": 0.12465507537126541, "learning_rate": 0.0005, "loss": 2.1385, "step": 61820 }, { "epoch": 0.23534024040254867, "grad_norm": 0.11806610971689224, "learning_rate": 0.0005, "loss": 2.1193, "step": 61830 }, { "epoch": 0.23537830287067135, "grad_norm": 0.19733448326587677, "learning_rate": 0.0005, "loss": 2.1299, "step": 61840 }, { "epoch": 0.23541636533879404, "grad_norm": 0.12105879187583923, "learning_rate": 0.0005, "loss": 2.1369, "step": 61850 }, { "epoch": 0.23545442780691672, "grad_norm": 0.11597780138254166, "learning_rate": 0.0005, "loss": 2.1248, "step": 61860 }, { "epoch": 0.2354924902750394, "grad_norm": 0.137285053730011, "learning_rate": 0.0005, "loss": 2.1181, "step": 61870 }, { "epoch": 0.23553055274316206, "grad_norm": 0.12559430301189423, "learning_rate": 0.0005, "loss": 2.1313, "step": 61880 }, { "epoch": 0.23556861521128475, "grad_norm": 0.12780259549617767, "learning_rate": 0.0005, "loss": 2.133, "step": 61890 }, { "epoch": 0.23560667767940743, "grad_norm": 0.13546954095363617, "learning_rate": 0.0005, "loss": 2.1251, "step": 61900 }, { "epoch": 0.23564474014753012, "grad_norm": 0.12587662041187286, "learning_rate": 0.0005, "loss": 2.1221, "step": 61910 }, { "epoch": 0.2356828026156528, "grad_norm": 0.14555040001869202, "learning_rate": 0.0005, "loss": 2.139, "step": 61920 }, { "epoch": 0.2357208650837755, "grad_norm": 0.11890752613544464, "learning_rate": 0.0005, "loss": 2.1229, "step": 61930 }, { "epoch": 0.23575892755189817, "grad_norm": 0.1446986049413681, "learning_rate": 0.0005, "loss": 2.14, "step": 61940 }, { "epoch": 0.23579699002002086, "grad_norm": 0.11749228090047836, "learning_rate": 0.0005, "loss": 2.148, "step": 61950 }, { "epoch": 0.23583505248814354, "grad_norm": 0.12498153746128082, "learning_rate": 0.0005, "loss": 2.1271, "step": 61960 }, { "epoch": 0.23587311495626623, "grad_norm": 0.13425499200820923, "learning_rate": 0.0005, "loss": 2.121, "step": 61970 }, { "epoch": 0.2359111774243889, "grad_norm": 0.13690124452114105, "learning_rate": 0.0005, "loss": 2.1293, "step": 61980 }, { "epoch": 0.2359492398925116, "grad_norm": 0.1303202509880066, "learning_rate": 0.0005, "loss": 2.1313, "step": 61990 }, { "epoch": 0.23598730236063428, "grad_norm": 0.12199945747852325, "learning_rate": 0.0005, "loss": 2.1178, "step": 62000 }, { "epoch": 0.23602536482875697, "grad_norm": 0.12263193726539612, "learning_rate": 0.0005, "loss": 2.1258, "step": 62010 }, { "epoch": 0.23606342729687965, "grad_norm": 0.12076544016599655, "learning_rate": 0.0005, "loss": 2.1226, "step": 62020 }, { "epoch": 0.2361014897650023, "grad_norm": 0.12237869948148727, "learning_rate": 0.0005, "loss": 2.1225, "step": 62030 }, { "epoch": 0.236139552233125, "grad_norm": 0.12361596524715424, "learning_rate": 0.0005, "loss": 2.1376, "step": 62040 }, { "epoch": 0.23617761470124768, "grad_norm": 0.12535211443901062, "learning_rate": 0.0005, "loss": 2.1528, "step": 62050 }, { "epoch": 0.23621567716937036, "grad_norm": 0.1265745609998703, "learning_rate": 0.0005, "loss": 2.1327, "step": 62060 }, { "epoch": 0.23625373963749305, "grad_norm": 0.11701487004756927, "learning_rate": 0.0005, "loss": 2.1288, "step": 62070 }, { "epoch": 0.23629180210561573, "grad_norm": 0.1229015588760376, "learning_rate": 0.0005, "loss": 2.1227, "step": 62080 }, { "epoch": 0.23632986457373842, "grad_norm": 0.1286887526512146, "learning_rate": 0.0005, "loss": 2.1242, "step": 62090 }, { "epoch": 0.2363679270418611, "grad_norm": 0.12128289043903351, "learning_rate": 0.0005, "loss": 2.1316, "step": 62100 }, { "epoch": 0.2364059895099838, "grad_norm": 0.12501858174800873, "learning_rate": 0.0005, "loss": 2.1164, "step": 62110 }, { "epoch": 0.23644405197810647, "grad_norm": 0.14402461051940918, "learning_rate": 0.0005, "loss": 2.1247, "step": 62120 }, { "epoch": 0.23648211444622916, "grad_norm": 0.13072051107883453, "learning_rate": 0.0005, "loss": 2.129, "step": 62130 }, { "epoch": 0.23652017691435184, "grad_norm": 0.11157186329364777, "learning_rate": 0.0005, "loss": 2.1232, "step": 62140 }, { "epoch": 0.23655823938247453, "grad_norm": 0.1368582844734192, "learning_rate": 0.0005, "loss": 2.1318, "step": 62150 }, { "epoch": 0.2365963018505972, "grad_norm": 0.12076527625322342, "learning_rate": 0.0005, "loss": 2.1244, "step": 62160 }, { "epoch": 0.23663436431871987, "grad_norm": 0.23976096510887146, "learning_rate": 0.0005, "loss": 2.1248, "step": 62170 }, { "epoch": 0.23667242678684255, "grad_norm": 0.14966896176338196, "learning_rate": 0.0005, "loss": 2.1285, "step": 62180 }, { "epoch": 0.23671048925496524, "grad_norm": 0.12820686399936676, "learning_rate": 0.0005, "loss": 2.1273, "step": 62190 }, { "epoch": 0.23674855172308792, "grad_norm": 0.12050171941518784, "learning_rate": 0.0005, "loss": 2.1288, "step": 62200 }, { "epoch": 0.2367866141912106, "grad_norm": 0.12259026616811752, "learning_rate": 0.0005, "loss": 2.1469, "step": 62210 }, { "epoch": 0.2368246766593333, "grad_norm": 0.13420963287353516, "learning_rate": 0.0005, "loss": 2.1189, "step": 62220 }, { "epoch": 0.23686273912745598, "grad_norm": 0.13222679495811462, "learning_rate": 0.0005, "loss": 2.1351, "step": 62230 }, { "epoch": 0.23690080159557866, "grad_norm": 0.11894519627094269, "learning_rate": 0.0005, "loss": 2.1254, "step": 62240 }, { "epoch": 0.23693886406370135, "grad_norm": 0.12254227697849274, "learning_rate": 0.0005, "loss": 2.1314, "step": 62250 }, { "epoch": 0.23697692653182403, "grad_norm": 0.12389254570007324, "learning_rate": 0.0005, "loss": 2.1377, "step": 62260 }, { "epoch": 0.23701498899994672, "grad_norm": 0.11126727610826492, "learning_rate": 0.0005, "loss": 2.1221, "step": 62270 }, { "epoch": 0.2370530514680694, "grad_norm": 0.1380900889635086, "learning_rate": 0.0005, "loss": 2.1363, "step": 62280 }, { "epoch": 0.2370911139361921, "grad_norm": 0.12606385350227356, "learning_rate": 0.0005, "loss": 2.1193, "step": 62290 }, { "epoch": 0.23712917640431477, "grad_norm": 0.12130409479141235, "learning_rate": 0.0005, "loss": 2.1407, "step": 62300 }, { "epoch": 0.23716723887243746, "grad_norm": 0.11956527084112167, "learning_rate": 0.0005, "loss": 2.1219, "step": 62310 }, { "epoch": 0.23720530134056012, "grad_norm": 0.11794503778219223, "learning_rate": 0.0005, "loss": 2.1329, "step": 62320 }, { "epoch": 0.2372433638086828, "grad_norm": 0.12638238072395325, "learning_rate": 0.0005, "loss": 2.1322, "step": 62330 }, { "epoch": 0.23728142627680548, "grad_norm": 0.1229153424501419, "learning_rate": 0.0005, "loss": 2.1312, "step": 62340 }, { "epoch": 0.23731948874492817, "grad_norm": 0.12465435266494751, "learning_rate": 0.0005, "loss": 2.1278, "step": 62350 }, { "epoch": 0.23735755121305085, "grad_norm": 0.11661525815725327, "learning_rate": 0.0005, "loss": 2.1386, "step": 62360 }, { "epoch": 0.23739561368117354, "grad_norm": 0.1256762146949768, "learning_rate": 0.0005, "loss": 2.1227, "step": 62370 }, { "epoch": 0.23743367614929622, "grad_norm": 0.13339322805404663, "learning_rate": 0.0005, "loss": 2.1342, "step": 62380 }, { "epoch": 0.2374717386174189, "grad_norm": 0.12632669508457184, "learning_rate": 0.0005, "loss": 2.133, "step": 62390 }, { "epoch": 0.2375098010855416, "grad_norm": 0.12359143048524857, "learning_rate": 0.0005, "loss": 2.1139, "step": 62400 }, { "epoch": 0.23754786355366428, "grad_norm": 0.12854132056236267, "learning_rate": 0.0005, "loss": 2.1317, "step": 62410 }, { "epoch": 0.23758592602178696, "grad_norm": 0.13429182767868042, "learning_rate": 0.0005, "loss": 2.1305, "step": 62420 }, { "epoch": 0.23762398848990965, "grad_norm": 0.12218470126390457, "learning_rate": 0.0005, "loss": 2.1401, "step": 62430 }, { "epoch": 0.23766205095803233, "grad_norm": 0.13051879405975342, "learning_rate": 0.0005, "loss": 2.1235, "step": 62440 }, { "epoch": 0.23770011342615502, "grad_norm": 0.12221650779247284, "learning_rate": 0.0005, "loss": 2.1395, "step": 62450 }, { "epoch": 0.23773817589427768, "grad_norm": 0.12356175482273102, "learning_rate": 0.0005, "loss": 2.1235, "step": 62460 }, { "epoch": 0.23777623836240036, "grad_norm": 0.12108529359102249, "learning_rate": 0.0005, "loss": 2.1241, "step": 62470 }, { "epoch": 0.23781430083052305, "grad_norm": 0.1264612227678299, "learning_rate": 0.0005, "loss": 2.1255, "step": 62480 }, { "epoch": 0.23785236329864573, "grad_norm": 0.12323181331157684, "learning_rate": 0.0005, "loss": 2.1272, "step": 62490 }, { "epoch": 0.23789042576676842, "grad_norm": 0.11964374780654907, "learning_rate": 0.0005, "loss": 2.1208, "step": 62500 }, { "epoch": 0.2379284882348911, "grad_norm": 0.12805040180683136, "learning_rate": 0.0005, "loss": 2.126, "step": 62510 }, { "epoch": 0.23796655070301378, "grad_norm": 0.14057007431983948, "learning_rate": 0.0005, "loss": 2.1141, "step": 62520 }, { "epoch": 0.23800461317113647, "grad_norm": 0.1295660436153412, "learning_rate": 0.0005, "loss": 2.1406, "step": 62530 }, { "epoch": 0.23804267563925915, "grad_norm": 0.14525508880615234, "learning_rate": 0.0005, "loss": 2.1281, "step": 62540 }, { "epoch": 0.23808073810738184, "grad_norm": 0.13063256442546844, "learning_rate": 0.0005, "loss": 2.1272, "step": 62550 }, { "epoch": 0.23811880057550452, "grad_norm": 0.11782248318195343, "learning_rate": 0.0005, "loss": 2.1222, "step": 62560 }, { "epoch": 0.2381568630436272, "grad_norm": 0.13702130317687988, "learning_rate": 0.0005, "loss": 2.1285, "step": 62570 }, { "epoch": 0.2381949255117499, "grad_norm": 0.11541248112916946, "learning_rate": 0.0005, "loss": 2.1179, "step": 62580 }, { "epoch": 0.23823298797987258, "grad_norm": 0.12375971674919128, "learning_rate": 0.0005, "loss": 2.1194, "step": 62590 }, { "epoch": 0.23827105044799524, "grad_norm": 0.1241425946354866, "learning_rate": 0.0005, "loss": 2.1378, "step": 62600 }, { "epoch": 0.23830911291611792, "grad_norm": 0.14192144572734833, "learning_rate": 0.0005, "loss": 2.1246, "step": 62610 }, { "epoch": 0.2383471753842406, "grad_norm": 0.12440304458141327, "learning_rate": 0.0005, "loss": 2.1526, "step": 62620 }, { "epoch": 0.2383852378523633, "grad_norm": 0.1185927540063858, "learning_rate": 0.0005, "loss": 2.1255, "step": 62630 }, { "epoch": 0.23842330032048598, "grad_norm": 0.11676016449928284, "learning_rate": 0.0005, "loss": 2.1184, "step": 62640 }, { "epoch": 0.23846136278860866, "grad_norm": 0.12082359194755554, "learning_rate": 0.0005, "loss": 2.1403, "step": 62650 }, { "epoch": 0.23849942525673135, "grad_norm": 0.12140568345785141, "learning_rate": 0.0005, "loss": 2.1281, "step": 62660 }, { "epoch": 0.23853748772485403, "grad_norm": 0.11932049691677094, "learning_rate": 0.0005, "loss": 2.1328, "step": 62670 }, { "epoch": 0.23857555019297672, "grad_norm": 0.13034158945083618, "learning_rate": 0.0005, "loss": 2.1333, "step": 62680 }, { "epoch": 0.2386136126610994, "grad_norm": 0.12311530858278275, "learning_rate": 0.0005, "loss": 2.1346, "step": 62690 }, { "epoch": 0.23865167512922209, "grad_norm": 0.1283925622701645, "learning_rate": 0.0005, "loss": 2.1248, "step": 62700 }, { "epoch": 0.23868973759734477, "grad_norm": 0.11840459704399109, "learning_rate": 0.0005, "loss": 2.1047, "step": 62710 }, { "epoch": 0.23872780006546745, "grad_norm": 0.12968389689922333, "learning_rate": 0.0005, "loss": 2.1188, "step": 62720 }, { "epoch": 0.23876586253359014, "grad_norm": 0.11802906543016434, "learning_rate": 0.0005, "loss": 2.1433, "step": 62730 }, { "epoch": 0.23880392500171282, "grad_norm": 0.12069299817085266, "learning_rate": 0.0005, "loss": 2.1218, "step": 62740 }, { "epoch": 0.23884198746983548, "grad_norm": 0.12242452800273895, "learning_rate": 0.0005, "loss": 2.1361, "step": 62750 }, { "epoch": 0.23888004993795817, "grad_norm": 0.13938666880130768, "learning_rate": 0.0005, "loss": 2.1259, "step": 62760 }, { "epoch": 0.23891811240608085, "grad_norm": 0.1349049061536789, "learning_rate": 0.0005, "loss": 2.1406, "step": 62770 }, { "epoch": 0.23895617487420354, "grad_norm": 0.12318526953458786, "learning_rate": 0.0005, "loss": 2.1293, "step": 62780 }, { "epoch": 0.23899423734232622, "grad_norm": 0.11634445190429688, "learning_rate": 0.0005, "loss": 2.1421, "step": 62790 }, { "epoch": 0.2390322998104489, "grad_norm": 0.13093838095664978, "learning_rate": 0.0005, "loss": 2.1397, "step": 62800 }, { "epoch": 0.2390703622785716, "grad_norm": 0.11626966297626495, "learning_rate": 0.0005, "loss": 2.1363, "step": 62810 }, { "epoch": 0.23910842474669428, "grad_norm": 0.12809191644191742, "learning_rate": 0.0005, "loss": 2.1314, "step": 62820 }, { "epoch": 0.23914648721481696, "grad_norm": 0.11999858915805817, "learning_rate": 0.0005, "loss": 2.1233, "step": 62830 }, { "epoch": 0.23918454968293965, "grad_norm": 0.13391542434692383, "learning_rate": 0.0005, "loss": 2.128, "step": 62840 }, { "epoch": 0.23922261215106233, "grad_norm": 0.12422390282154083, "learning_rate": 0.0005, "loss": 2.1386, "step": 62850 }, { "epoch": 0.23926067461918502, "grad_norm": 0.12316978722810745, "learning_rate": 0.0005, "loss": 2.1181, "step": 62860 }, { "epoch": 0.2392987370873077, "grad_norm": 0.132602259516716, "learning_rate": 0.0005, "loss": 2.135, "step": 62870 }, { "epoch": 0.23933679955543039, "grad_norm": 0.1147371381521225, "learning_rate": 0.0005, "loss": 2.1408, "step": 62880 }, { "epoch": 0.23937486202355304, "grad_norm": 0.11935495585203171, "learning_rate": 0.0005, "loss": 2.1168, "step": 62890 }, { "epoch": 0.23941292449167573, "grad_norm": 0.11032240092754364, "learning_rate": 0.0005, "loss": 2.1185, "step": 62900 }, { "epoch": 0.2394509869597984, "grad_norm": 0.12904879450798035, "learning_rate": 0.0005, "loss": 2.1407, "step": 62910 }, { "epoch": 0.2394890494279211, "grad_norm": 0.12383519113063812, "learning_rate": 0.0005, "loss": 2.1257, "step": 62920 }, { "epoch": 0.23952711189604378, "grad_norm": 0.1210516095161438, "learning_rate": 0.0005, "loss": 2.1238, "step": 62930 }, { "epoch": 0.23956517436416647, "grad_norm": 0.1306571364402771, "learning_rate": 0.0005, "loss": 2.1193, "step": 62940 }, { "epoch": 0.23960323683228915, "grad_norm": 0.12337831407785416, "learning_rate": 0.0005, "loss": 2.1405, "step": 62950 }, { "epoch": 0.23964129930041184, "grad_norm": 0.12242639809846878, "learning_rate": 0.0005, "loss": 2.1307, "step": 62960 }, { "epoch": 0.23967936176853452, "grad_norm": 0.14051468670368195, "learning_rate": 0.0005, "loss": 2.1377, "step": 62970 }, { "epoch": 0.2397174242366572, "grad_norm": 0.13228604197502136, "learning_rate": 0.0005, "loss": 2.123, "step": 62980 }, { "epoch": 0.2397554867047799, "grad_norm": 0.11779261380434036, "learning_rate": 0.0005, "loss": 2.1177, "step": 62990 }, { "epoch": 0.23979354917290258, "grad_norm": 0.12763015925884247, "learning_rate": 0.0005, "loss": 2.1387, "step": 63000 }, { "epoch": 0.23983161164102526, "grad_norm": 0.13263078033924103, "learning_rate": 0.0005, "loss": 2.1326, "step": 63010 }, { "epoch": 0.23986967410914795, "grad_norm": 0.12094102799892426, "learning_rate": 0.0005, "loss": 2.1269, "step": 63020 }, { "epoch": 0.2399077365772706, "grad_norm": 0.11784138530492783, "learning_rate": 0.0005, "loss": 2.1232, "step": 63030 }, { "epoch": 0.2399457990453933, "grad_norm": 0.1280038058757782, "learning_rate": 0.0005, "loss": 2.1208, "step": 63040 }, { "epoch": 0.23998386151351597, "grad_norm": 0.12300426512956619, "learning_rate": 0.0005, "loss": 2.1319, "step": 63050 }, { "epoch": 0.24002192398163866, "grad_norm": 0.1237001046538353, "learning_rate": 0.0005, "loss": 2.1275, "step": 63060 }, { "epoch": 0.24005998644976134, "grad_norm": 0.15138910710811615, "learning_rate": 0.0005, "loss": 2.1207, "step": 63070 }, { "epoch": 0.24009804891788403, "grad_norm": 0.15691739320755005, "learning_rate": 0.0005, "loss": 2.1287, "step": 63080 }, { "epoch": 0.2401361113860067, "grad_norm": 0.12051752209663391, "learning_rate": 0.0005, "loss": 2.1251, "step": 63090 }, { "epoch": 0.2401741738541294, "grad_norm": 0.12068517506122589, "learning_rate": 0.0005, "loss": 2.1289, "step": 63100 }, { "epoch": 0.24021223632225208, "grad_norm": 0.12128940969705582, "learning_rate": 0.0005, "loss": 2.1352, "step": 63110 }, { "epoch": 0.24025029879037477, "grad_norm": 0.1229841560125351, "learning_rate": 0.0005, "loss": 2.1248, "step": 63120 }, { "epoch": 0.24028836125849745, "grad_norm": 0.13807646930217743, "learning_rate": 0.0005, "loss": 2.1253, "step": 63130 }, { "epoch": 0.24032642372662014, "grad_norm": 0.13278523087501526, "learning_rate": 0.0005, "loss": 2.1393, "step": 63140 }, { "epoch": 0.24036448619474282, "grad_norm": 0.11325494199991226, "learning_rate": 0.0005, "loss": 2.1384, "step": 63150 }, { "epoch": 0.2404025486628655, "grad_norm": 0.11987940222024918, "learning_rate": 0.0005, "loss": 2.1209, "step": 63160 }, { "epoch": 0.2404406111309882, "grad_norm": 0.14351606369018555, "learning_rate": 0.0005, "loss": 2.1196, "step": 63170 }, { "epoch": 0.24047867359911085, "grad_norm": 0.1169760599732399, "learning_rate": 0.0005, "loss": 2.1295, "step": 63180 }, { "epoch": 0.24051673606723353, "grad_norm": 0.14521263539791107, "learning_rate": 0.0005, "loss": 2.1398, "step": 63190 }, { "epoch": 0.24055479853535622, "grad_norm": 0.14447267353534698, "learning_rate": 0.0005, "loss": 2.1305, "step": 63200 }, { "epoch": 0.2405928610034789, "grad_norm": 0.12431297451257706, "learning_rate": 0.0005, "loss": 2.1112, "step": 63210 }, { "epoch": 0.2406309234716016, "grad_norm": 0.1190367341041565, "learning_rate": 0.0005, "loss": 2.1322, "step": 63220 }, { "epoch": 0.24066898593972427, "grad_norm": 0.11925860494375229, "learning_rate": 0.0005, "loss": 2.1407, "step": 63230 }, { "epoch": 0.24070704840784696, "grad_norm": 0.11938245594501495, "learning_rate": 0.0005, "loss": 2.1218, "step": 63240 }, { "epoch": 0.24074511087596964, "grad_norm": 0.12791714072227478, "learning_rate": 0.0005, "loss": 2.1348, "step": 63250 }, { "epoch": 0.24078317334409233, "grad_norm": 0.12287945300340652, "learning_rate": 0.0005, "loss": 2.1329, "step": 63260 }, { "epoch": 0.240821235812215, "grad_norm": 0.13500581681728363, "learning_rate": 0.0005, "loss": 2.128, "step": 63270 }, { "epoch": 0.2408592982803377, "grad_norm": 0.14133571088314056, "learning_rate": 0.0005, "loss": 2.1444, "step": 63280 }, { "epoch": 0.24089736074846038, "grad_norm": 0.11892928928136826, "learning_rate": 0.0005, "loss": 2.1363, "step": 63290 }, { "epoch": 0.24093542321658307, "grad_norm": 0.13436049222946167, "learning_rate": 0.0005, "loss": 2.1238, "step": 63300 }, { "epoch": 0.24097348568470575, "grad_norm": 0.12387977540493011, "learning_rate": 0.0005, "loss": 2.111, "step": 63310 }, { "epoch": 0.2410115481528284, "grad_norm": 0.11793079972267151, "learning_rate": 0.0005, "loss": 2.1244, "step": 63320 }, { "epoch": 0.2410496106209511, "grad_norm": 0.12627609074115753, "learning_rate": 0.0005, "loss": 2.1327, "step": 63330 }, { "epoch": 0.24108767308907378, "grad_norm": 0.12230251729488373, "learning_rate": 0.0005, "loss": 2.131, "step": 63340 }, { "epoch": 0.24112573555719646, "grad_norm": 0.1256856471300125, "learning_rate": 0.0005, "loss": 2.1376, "step": 63350 }, { "epoch": 0.24116379802531915, "grad_norm": 0.14163659512996674, "learning_rate": 0.0005, "loss": 2.1393, "step": 63360 }, { "epoch": 0.24120186049344183, "grad_norm": 0.12445111572742462, "learning_rate": 0.0005, "loss": 2.1298, "step": 63370 }, { "epoch": 0.24123992296156452, "grad_norm": 0.13497750461101532, "learning_rate": 0.0005, "loss": 2.1381, "step": 63380 }, { "epoch": 0.2412779854296872, "grad_norm": 0.15080420672893524, "learning_rate": 0.0005, "loss": 2.1305, "step": 63390 }, { "epoch": 0.2413160478978099, "grad_norm": 0.1224711537361145, "learning_rate": 0.0005, "loss": 2.134, "step": 63400 }, { "epoch": 0.24135411036593257, "grad_norm": 0.1363290399312973, "learning_rate": 0.0005, "loss": 2.1193, "step": 63410 }, { "epoch": 0.24139217283405526, "grad_norm": 0.12231992930173874, "learning_rate": 0.0005, "loss": 2.1378, "step": 63420 }, { "epoch": 0.24143023530217794, "grad_norm": 0.12203171104192734, "learning_rate": 0.0005, "loss": 2.1294, "step": 63430 }, { "epoch": 0.24146829777030063, "grad_norm": 0.12021537870168686, "learning_rate": 0.0005, "loss": 2.1286, "step": 63440 }, { "epoch": 0.2415063602384233, "grad_norm": 0.11745815724134445, "learning_rate": 0.0005, "loss": 2.1235, "step": 63450 }, { "epoch": 0.241544422706546, "grad_norm": 0.14927086234092712, "learning_rate": 0.0005, "loss": 2.137, "step": 63460 }, { "epoch": 0.24158248517466865, "grad_norm": 0.13094308972358704, "learning_rate": 0.0005, "loss": 2.1368, "step": 63470 }, { "epoch": 0.24162054764279134, "grad_norm": 0.12097480893135071, "learning_rate": 0.0005, "loss": 2.1262, "step": 63480 }, { "epoch": 0.24165861011091402, "grad_norm": 0.12082047015428543, "learning_rate": 0.0005, "loss": 2.1372, "step": 63490 }, { "epoch": 0.2416966725790367, "grad_norm": 0.14711803197860718, "learning_rate": 0.0005, "loss": 2.1299, "step": 63500 }, { "epoch": 0.2417347350471594, "grad_norm": 0.13210797309875488, "learning_rate": 0.0005, "loss": 2.1345, "step": 63510 }, { "epoch": 0.24177279751528208, "grad_norm": 0.12244979292154312, "learning_rate": 0.0005, "loss": 2.1347, "step": 63520 }, { "epoch": 0.24181085998340476, "grad_norm": 0.12733928859233856, "learning_rate": 0.0005, "loss": 2.126, "step": 63530 }, { "epoch": 0.24184892245152745, "grad_norm": 0.14031542837619781, "learning_rate": 0.0005, "loss": 2.1265, "step": 63540 }, { "epoch": 0.24188698491965013, "grad_norm": 0.1467483937740326, "learning_rate": 0.0005, "loss": 2.1475, "step": 63550 }, { "epoch": 0.24192504738777282, "grad_norm": 0.11587478220462799, "learning_rate": 0.0005, "loss": 2.1153, "step": 63560 }, { "epoch": 0.2419631098558955, "grad_norm": 0.11463305354118347, "learning_rate": 0.0005, "loss": 2.1292, "step": 63570 }, { "epoch": 0.2420011723240182, "grad_norm": 0.1248435229063034, "learning_rate": 0.0005, "loss": 2.1441, "step": 63580 }, { "epoch": 0.24203923479214087, "grad_norm": 0.13611268997192383, "learning_rate": 0.0005, "loss": 2.1457, "step": 63590 }, { "epoch": 0.24207729726026356, "grad_norm": 0.14438386261463165, "learning_rate": 0.0005, "loss": 2.118, "step": 63600 }, { "epoch": 0.24211535972838621, "grad_norm": 0.13003243505954742, "learning_rate": 0.0005, "loss": 2.1316, "step": 63610 }, { "epoch": 0.2421534221965089, "grad_norm": 0.13023284077644348, "learning_rate": 0.0005, "loss": 2.1266, "step": 63620 }, { "epoch": 0.24219148466463158, "grad_norm": 0.12970775365829468, "learning_rate": 0.0005, "loss": 2.1473, "step": 63630 }, { "epoch": 0.24222954713275427, "grad_norm": 0.12701945006847382, "learning_rate": 0.0005, "loss": 2.1251, "step": 63640 }, { "epoch": 0.24226760960087695, "grad_norm": 0.12962621450424194, "learning_rate": 0.0005, "loss": 2.1254, "step": 63650 }, { "epoch": 0.24230567206899964, "grad_norm": 0.12591864168643951, "learning_rate": 0.0005, "loss": 2.1334, "step": 63660 }, { "epoch": 0.24234373453712232, "grad_norm": 0.11711875349283218, "learning_rate": 0.0005, "loss": 2.119, "step": 63670 }, { "epoch": 0.242381797005245, "grad_norm": 0.13258887827396393, "learning_rate": 0.0005, "loss": 2.1418, "step": 63680 }, { "epoch": 0.2424198594733677, "grad_norm": 0.12952230870723724, "learning_rate": 0.0005, "loss": 2.1319, "step": 63690 }, { "epoch": 0.24245792194149038, "grad_norm": 0.12011770904064178, "learning_rate": 0.0005, "loss": 2.1407, "step": 63700 }, { "epoch": 0.24249598440961306, "grad_norm": 0.11629187315702438, "learning_rate": 0.0005, "loss": 2.1309, "step": 63710 }, { "epoch": 0.24253404687773575, "grad_norm": 0.11819379776716232, "learning_rate": 0.0005, "loss": 2.1131, "step": 63720 }, { "epoch": 0.24257210934585843, "grad_norm": 0.14825862646102905, "learning_rate": 0.0005, "loss": 2.14, "step": 63730 }, { "epoch": 0.24261017181398112, "grad_norm": 0.1542445570230484, "learning_rate": 0.0005, "loss": 2.1044, "step": 63740 }, { "epoch": 0.24264823428210378, "grad_norm": 0.11805212497711182, "learning_rate": 0.0005, "loss": 2.1215, "step": 63750 }, { "epoch": 0.24268629675022646, "grad_norm": 0.14055785536766052, "learning_rate": 0.0005, "loss": 2.1294, "step": 63760 }, { "epoch": 0.24272435921834914, "grad_norm": 0.12182782590389252, "learning_rate": 0.0005, "loss": 2.1179, "step": 63770 }, { "epoch": 0.24276242168647183, "grad_norm": 0.11553092300891876, "learning_rate": 0.0005, "loss": 2.1274, "step": 63780 }, { "epoch": 0.24280048415459451, "grad_norm": 0.11161351203918457, "learning_rate": 0.0005, "loss": 2.141, "step": 63790 }, { "epoch": 0.2428385466227172, "grad_norm": 0.12397214770317078, "learning_rate": 0.0005, "loss": 2.1352, "step": 63800 }, { "epoch": 0.24287660909083988, "grad_norm": 0.12839215993881226, "learning_rate": 0.0005, "loss": 2.1314, "step": 63810 }, { "epoch": 0.24291467155896257, "grad_norm": 0.12061683088541031, "learning_rate": 0.0005, "loss": 2.1242, "step": 63820 }, { "epoch": 0.24295273402708525, "grad_norm": 0.11531350016593933, "learning_rate": 0.0005, "loss": 2.1048, "step": 63830 }, { "epoch": 0.24299079649520794, "grad_norm": 0.12071649730205536, "learning_rate": 0.0005, "loss": 2.1247, "step": 63840 }, { "epoch": 0.24302885896333062, "grad_norm": 0.11956681311130524, "learning_rate": 0.0005, "loss": 2.1283, "step": 63850 }, { "epoch": 0.2430669214314533, "grad_norm": 0.1328822672367096, "learning_rate": 0.0005, "loss": 2.13, "step": 63860 }, { "epoch": 0.243104983899576, "grad_norm": 0.13923752307891846, "learning_rate": 0.0005, "loss": 2.1415, "step": 63870 }, { "epoch": 0.24314304636769868, "grad_norm": 0.1264955997467041, "learning_rate": 0.0005, "loss": 2.129, "step": 63880 }, { "epoch": 0.24318110883582136, "grad_norm": 0.12596023082733154, "learning_rate": 0.0005, "loss": 2.1341, "step": 63890 }, { "epoch": 0.24321917130394402, "grad_norm": 0.11505821347236633, "learning_rate": 0.0005, "loss": 2.1227, "step": 63900 }, { "epoch": 0.2432572337720667, "grad_norm": 0.13088856637477875, "learning_rate": 0.0005, "loss": 2.1451, "step": 63910 }, { "epoch": 0.2432952962401894, "grad_norm": 0.12740778923034668, "learning_rate": 0.0005, "loss": 2.125, "step": 63920 }, { "epoch": 0.24333335870831208, "grad_norm": 0.12028442323207855, "learning_rate": 0.0005, "loss": 2.1239, "step": 63930 }, { "epoch": 0.24337142117643476, "grad_norm": 0.11218731105327606, "learning_rate": 0.0005, "loss": 2.1266, "step": 63940 }, { "epoch": 0.24340948364455745, "grad_norm": 0.1180717945098877, "learning_rate": 0.0005, "loss": 2.1343, "step": 63950 }, { "epoch": 0.24344754611268013, "grad_norm": 0.12286002188920975, "learning_rate": 0.0005, "loss": 2.1287, "step": 63960 }, { "epoch": 0.24348560858080281, "grad_norm": 0.11139651387929916, "learning_rate": 0.0005, "loss": 2.1154, "step": 63970 }, { "epoch": 0.2435236710489255, "grad_norm": 0.12381156533956528, "learning_rate": 0.0005, "loss": 2.1286, "step": 63980 }, { "epoch": 0.24356173351704818, "grad_norm": 0.14636299014091492, "learning_rate": 0.0005, "loss": 2.1327, "step": 63990 }, { "epoch": 0.24359979598517087, "grad_norm": 0.13266819715499878, "learning_rate": 0.0005, "loss": 2.1152, "step": 64000 }, { "epoch": 0.24363785845329355, "grad_norm": 0.12030017375946045, "learning_rate": 0.0005, "loss": 2.1294, "step": 64010 }, { "epoch": 0.24367592092141624, "grad_norm": 0.1427072137594223, "learning_rate": 0.0005, "loss": 2.1259, "step": 64020 }, { "epoch": 0.24371398338953892, "grad_norm": 0.12498658150434494, "learning_rate": 0.0005, "loss": 2.1227, "step": 64030 }, { "epoch": 0.24375204585766158, "grad_norm": 0.12415233999490738, "learning_rate": 0.0005, "loss": 2.1254, "step": 64040 }, { "epoch": 0.24379010832578427, "grad_norm": 0.12559351325035095, "learning_rate": 0.0005, "loss": 2.1198, "step": 64050 }, { "epoch": 0.24382817079390695, "grad_norm": 0.12085636705160141, "learning_rate": 0.0005, "loss": 2.1375, "step": 64060 }, { "epoch": 0.24386623326202964, "grad_norm": 0.12610945105552673, "learning_rate": 0.0005, "loss": 2.1334, "step": 64070 }, { "epoch": 0.24390429573015232, "grad_norm": 0.11584189534187317, "learning_rate": 0.0005, "loss": 2.1367, "step": 64080 }, { "epoch": 0.243942358198275, "grad_norm": 0.12166000157594681, "learning_rate": 0.0005, "loss": 2.1156, "step": 64090 }, { "epoch": 0.2439804206663977, "grad_norm": 0.1491120308637619, "learning_rate": 0.0005, "loss": 2.1269, "step": 64100 }, { "epoch": 0.24401848313452038, "grad_norm": 0.11442311853170395, "learning_rate": 0.0005, "loss": 2.1382, "step": 64110 }, { "epoch": 0.24405654560264306, "grad_norm": 0.118242047727108, "learning_rate": 0.0005, "loss": 2.1381, "step": 64120 }, { "epoch": 0.24409460807076575, "grad_norm": 0.12931834161281586, "learning_rate": 0.0005, "loss": 2.1303, "step": 64130 }, { "epoch": 0.24413267053888843, "grad_norm": 0.12151516228914261, "learning_rate": 0.0005, "loss": 2.13, "step": 64140 }, { "epoch": 0.24417073300701111, "grad_norm": 0.11908268928527832, "learning_rate": 0.0005, "loss": 2.138, "step": 64150 }, { "epoch": 0.2442087954751338, "grad_norm": 0.13682927191257477, "learning_rate": 0.0005, "loss": 2.1386, "step": 64160 }, { "epoch": 0.24424685794325648, "grad_norm": 0.13674937188625336, "learning_rate": 0.0005, "loss": 2.1165, "step": 64170 }, { "epoch": 0.24428492041137914, "grad_norm": 0.12140002101659775, "learning_rate": 0.0005, "loss": 2.1249, "step": 64180 }, { "epoch": 0.24432298287950183, "grad_norm": 0.13333989679813385, "learning_rate": 0.0005, "loss": 2.1365, "step": 64190 }, { "epoch": 0.2443610453476245, "grad_norm": 0.1255248337984085, "learning_rate": 0.0005, "loss": 2.1147, "step": 64200 }, { "epoch": 0.2443991078157472, "grad_norm": 0.11844970285892487, "learning_rate": 0.0005, "loss": 2.1355, "step": 64210 }, { "epoch": 0.24443717028386988, "grad_norm": 0.23616556823253632, "learning_rate": 0.0005, "loss": 2.1221, "step": 64220 }, { "epoch": 0.24447523275199257, "grad_norm": 0.11513097584247589, "learning_rate": 0.0005, "loss": 2.1234, "step": 64230 }, { "epoch": 0.24451329522011525, "grad_norm": 0.12044999748468399, "learning_rate": 0.0005, "loss": 2.1373, "step": 64240 }, { "epoch": 0.24455135768823794, "grad_norm": 0.133218914270401, "learning_rate": 0.0005, "loss": 2.1289, "step": 64250 }, { "epoch": 0.24458942015636062, "grad_norm": 0.15163981914520264, "learning_rate": 0.0005, "loss": 2.1107, "step": 64260 }, { "epoch": 0.2446274826244833, "grad_norm": 0.1269594132900238, "learning_rate": 0.0005, "loss": 2.1322, "step": 64270 }, { "epoch": 0.244665545092606, "grad_norm": 0.12632466852664948, "learning_rate": 0.0005, "loss": 2.1351, "step": 64280 }, { "epoch": 0.24470360756072868, "grad_norm": 0.12292557954788208, "learning_rate": 0.0005, "loss": 2.1348, "step": 64290 }, { "epoch": 0.24474167002885136, "grad_norm": 0.1207164004445076, "learning_rate": 0.0005, "loss": 2.1319, "step": 64300 }, { "epoch": 0.24477973249697405, "grad_norm": 0.1272284984588623, "learning_rate": 0.0005, "loss": 2.1378, "step": 64310 }, { "epoch": 0.24481779496509673, "grad_norm": 0.12717846035957336, "learning_rate": 0.0005, "loss": 2.1442, "step": 64320 }, { "epoch": 0.2448558574332194, "grad_norm": 0.12871286273002625, "learning_rate": 0.0005, "loss": 2.1274, "step": 64330 }, { "epoch": 0.24489391990134207, "grad_norm": 0.1255853921175003, "learning_rate": 0.0005, "loss": 2.1102, "step": 64340 }, { "epoch": 0.24493198236946476, "grad_norm": 0.12132584303617477, "learning_rate": 0.0005, "loss": 2.118, "step": 64350 }, { "epoch": 0.24497004483758744, "grad_norm": 0.12538164854049683, "learning_rate": 0.0005, "loss": 2.1244, "step": 64360 }, { "epoch": 0.24500810730571013, "grad_norm": 0.13736794888973236, "learning_rate": 0.0005, "loss": 2.1347, "step": 64370 }, { "epoch": 0.2450461697738328, "grad_norm": 0.12930458784103394, "learning_rate": 0.0005, "loss": 2.1333, "step": 64380 }, { "epoch": 0.2450842322419555, "grad_norm": 0.10916830599308014, "learning_rate": 0.0005, "loss": 2.1379, "step": 64390 }, { "epoch": 0.24512229471007818, "grad_norm": 0.128557026386261, "learning_rate": 0.0005, "loss": 2.1254, "step": 64400 }, { "epoch": 0.24516035717820087, "grad_norm": 0.12013334035873413, "learning_rate": 0.0005, "loss": 2.121, "step": 64410 }, { "epoch": 0.24519841964632355, "grad_norm": 0.13635292649269104, "learning_rate": 0.0005, "loss": 2.1201, "step": 64420 }, { "epoch": 0.24523648211444624, "grad_norm": 0.12600380182266235, "learning_rate": 0.0005, "loss": 2.1155, "step": 64430 }, { "epoch": 0.24527454458256892, "grad_norm": 0.1258964091539383, "learning_rate": 0.0005, "loss": 2.1263, "step": 64440 }, { "epoch": 0.2453126070506916, "grad_norm": 0.12330331653356552, "learning_rate": 0.0005, "loss": 2.1367, "step": 64450 }, { "epoch": 0.2453506695188143, "grad_norm": 0.12577103078365326, "learning_rate": 0.0005, "loss": 2.1382, "step": 64460 }, { "epoch": 0.24538873198693695, "grad_norm": 0.11576177924871445, "learning_rate": 0.0005, "loss": 2.1323, "step": 64470 }, { "epoch": 0.24542679445505963, "grad_norm": 0.169657900929451, "learning_rate": 0.0005, "loss": 2.1361, "step": 64480 }, { "epoch": 0.24546485692318232, "grad_norm": 0.12155858427286148, "learning_rate": 0.0005, "loss": 2.1258, "step": 64490 }, { "epoch": 0.245502919391305, "grad_norm": 0.11039765924215317, "learning_rate": 0.0005, "loss": 2.1193, "step": 64500 }, { "epoch": 0.2455409818594277, "grad_norm": 0.11560600250959396, "learning_rate": 0.0005, "loss": 2.1228, "step": 64510 }, { "epoch": 0.24557904432755037, "grad_norm": 0.12468001991510391, "learning_rate": 0.0005, "loss": 2.1249, "step": 64520 }, { "epoch": 0.24561710679567306, "grad_norm": 0.11813274025917053, "learning_rate": 0.0005, "loss": 2.1353, "step": 64530 }, { "epoch": 0.24565516926379574, "grad_norm": 0.11909537762403488, "learning_rate": 0.0005, "loss": 2.1272, "step": 64540 }, { "epoch": 0.24569323173191843, "grad_norm": 0.1262873411178589, "learning_rate": 0.0005, "loss": 2.1299, "step": 64550 }, { "epoch": 0.2457312942000411, "grad_norm": 0.13570715487003326, "learning_rate": 0.0005, "loss": 2.1276, "step": 64560 }, { "epoch": 0.2457693566681638, "grad_norm": 0.12662003934383392, "learning_rate": 0.0005, "loss": 2.1331, "step": 64570 }, { "epoch": 0.24580741913628648, "grad_norm": 0.12291039526462555, "learning_rate": 0.0005, "loss": 2.1422, "step": 64580 }, { "epoch": 0.24584548160440917, "grad_norm": 0.12123207002878189, "learning_rate": 0.0005, "loss": 2.13, "step": 64590 }, { "epoch": 0.24588354407253185, "grad_norm": 0.12253370881080627, "learning_rate": 0.0005, "loss": 2.1205, "step": 64600 }, { "epoch": 0.24592160654065454, "grad_norm": 0.1316492259502411, "learning_rate": 0.0005, "loss": 2.1232, "step": 64610 }, { "epoch": 0.2459596690087772, "grad_norm": 0.1356193870306015, "learning_rate": 0.0005, "loss": 2.1486, "step": 64620 }, { "epoch": 0.24599773147689988, "grad_norm": 0.132023423910141, "learning_rate": 0.0005, "loss": 2.134, "step": 64630 }, { "epoch": 0.24603579394502256, "grad_norm": 0.12114515900611877, "learning_rate": 0.0005, "loss": 2.1336, "step": 64640 }, { "epoch": 0.24607385641314525, "grad_norm": 0.12968000769615173, "learning_rate": 0.0005, "loss": 2.1192, "step": 64650 }, { "epoch": 0.24611191888126793, "grad_norm": 0.13613907992839813, "learning_rate": 0.0005, "loss": 2.126, "step": 64660 }, { "epoch": 0.24614998134939062, "grad_norm": 0.12374147772789001, "learning_rate": 0.0005, "loss": 2.1276, "step": 64670 }, { "epoch": 0.2461880438175133, "grad_norm": 0.12278860062360764, "learning_rate": 0.0005, "loss": 2.1236, "step": 64680 }, { "epoch": 0.246226106285636, "grad_norm": 0.12701402604579926, "learning_rate": 0.0005, "loss": 2.1229, "step": 64690 }, { "epoch": 0.24626416875375867, "grad_norm": 0.11772578209638596, "learning_rate": 0.0005, "loss": 2.1303, "step": 64700 }, { "epoch": 0.24630223122188136, "grad_norm": 0.11964793503284454, "learning_rate": 0.0005, "loss": 2.1274, "step": 64710 }, { "epoch": 0.24634029369000404, "grad_norm": 0.12465240061283112, "learning_rate": 0.0005, "loss": 2.1437, "step": 64720 }, { "epoch": 0.24637835615812673, "grad_norm": 0.14138975739479065, "learning_rate": 0.0005, "loss": 2.144, "step": 64730 }, { "epoch": 0.2464164186262494, "grad_norm": 0.13955868780612946, "learning_rate": 0.0005, "loss": 2.1203, "step": 64740 }, { "epoch": 0.2464544810943721, "grad_norm": 0.1234760656952858, "learning_rate": 0.0005, "loss": 2.1311, "step": 64750 }, { "epoch": 0.24649254356249475, "grad_norm": 0.11192571371793747, "learning_rate": 0.0005, "loss": 2.1252, "step": 64760 }, { "epoch": 0.24653060603061744, "grad_norm": 0.11702840775251389, "learning_rate": 0.0005, "loss": 2.1341, "step": 64770 }, { "epoch": 0.24656866849874012, "grad_norm": 0.1265072077512741, "learning_rate": 0.0005, "loss": 2.1443, "step": 64780 }, { "epoch": 0.2466067309668628, "grad_norm": 0.12466669827699661, "learning_rate": 0.0005, "loss": 2.1398, "step": 64790 }, { "epoch": 0.2466447934349855, "grad_norm": 0.12184906005859375, "learning_rate": 0.0005, "loss": 2.1303, "step": 64800 }, { "epoch": 0.24668285590310818, "grad_norm": 0.12810151278972626, "learning_rate": 0.0005, "loss": 2.1265, "step": 64810 }, { "epoch": 0.24672091837123086, "grad_norm": 0.12840422987937927, "learning_rate": 0.0005, "loss": 2.1392, "step": 64820 }, { "epoch": 0.24675898083935355, "grad_norm": 0.11984486877918243, "learning_rate": 0.0005, "loss": 2.1027, "step": 64830 }, { "epoch": 0.24679704330747623, "grad_norm": 0.12411165237426758, "learning_rate": 0.0005, "loss": 2.1431, "step": 64840 }, { "epoch": 0.24683510577559892, "grad_norm": 0.14512589573860168, "learning_rate": 0.0005, "loss": 2.1201, "step": 64850 }, { "epoch": 0.2468731682437216, "grad_norm": 0.4416506886482239, "learning_rate": 0.0005, "loss": 2.1372, "step": 64860 }, { "epoch": 0.2469112307118443, "grad_norm": 0.12932956218719482, "learning_rate": 0.0005, "loss": 2.1248, "step": 64870 }, { "epoch": 0.24694929317996697, "grad_norm": 0.1224246621131897, "learning_rate": 0.0005, "loss": 2.1229, "step": 64880 }, { "epoch": 0.24698735564808966, "grad_norm": 0.11386513710021973, "learning_rate": 0.0005, "loss": 2.1442, "step": 64890 }, { "epoch": 0.24702541811621231, "grad_norm": 0.11869112402200699, "learning_rate": 0.0005, "loss": 2.1193, "step": 64900 }, { "epoch": 0.247063480584335, "grad_norm": 0.11242227256298065, "learning_rate": 0.0005, "loss": 2.1264, "step": 64910 }, { "epoch": 0.24710154305245768, "grad_norm": 0.11348606646060944, "learning_rate": 0.0005, "loss": 2.1218, "step": 64920 }, { "epoch": 0.24713960552058037, "grad_norm": 0.13171638548374176, "learning_rate": 0.0005, "loss": 2.1182, "step": 64930 }, { "epoch": 0.24717766798870305, "grad_norm": 0.12162895500659943, "learning_rate": 0.0005, "loss": 2.1406, "step": 64940 }, { "epoch": 0.24721573045682574, "grad_norm": 0.1277979016304016, "learning_rate": 0.0005, "loss": 2.115, "step": 64950 }, { "epoch": 0.24725379292494842, "grad_norm": 0.12796761095523834, "learning_rate": 0.0005, "loss": 2.1182, "step": 64960 }, { "epoch": 0.2472918553930711, "grad_norm": 0.11699617654085159, "learning_rate": 0.0005, "loss": 2.151, "step": 64970 }, { "epoch": 0.2473299178611938, "grad_norm": 0.113719142973423, "learning_rate": 0.0005, "loss": 2.1381, "step": 64980 }, { "epoch": 0.24736798032931648, "grad_norm": 0.12022378295660019, "learning_rate": 0.0005, "loss": 2.1288, "step": 64990 }, { "epoch": 0.24740604279743916, "grad_norm": 0.11734028160572052, "learning_rate": 0.0005, "loss": 2.1397, "step": 65000 }, { "epoch": 0.24744410526556185, "grad_norm": 0.13308216631412506, "learning_rate": 0.0005, "loss": 2.127, "step": 65010 }, { "epoch": 0.24748216773368453, "grad_norm": 0.1400652527809143, "learning_rate": 0.0005, "loss": 2.1356, "step": 65020 }, { "epoch": 0.24752023020180722, "grad_norm": 0.12651506066322327, "learning_rate": 0.0005, "loss": 2.1316, "step": 65030 }, { "epoch": 0.2475582926699299, "grad_norm": 0.1413552612066269, "learning_rate": 0.0005, "loss": 2.1212, "step": 65040 }, { "epoch": 0.24759635513805256, "grad_norm": 0.1348857581615448, "learning_rate": 0.0005, "loss": 2.1238, "step": 65050 }, { "epoch": 0.24763441760617524, "grad_norm": 0.12505334615707397, "learning_rate": 0.0005, "loss": 2.12, "step": 65060 }, { "epoch": 0.24767248007429793, "grad_norm": 0.12925513088703156, "learning_rate": 0.0005, "loss": 2.1233, "step": 65070 }, { "epoch": 0.24771054254242061, "grad_norm": 0.12137854844331741, "learning_rate": 0.0005, "loss": 2.1259, "step": 65080 }, { "epoch": 0.2477486050105433, "grad_norm": 0.12658438086509705, "learning_rate": 0.0005, "loss": 2.1302, "step": 65090 }, { "epoch": 0.24778666747866598, "grad_norm": 0.11648210138082504, "learning_rate": 0.0005, "loss": 2.1366, "step": 65100 }, { "epoch": 0.24782472994678867, "grad_norm": 0.12034095823764801, "learning_rate": 0.0005, "loss": 2.1418, "step": 65110 }, { "epoch": 0.24786279241491135, "grad_norm": 0.1281271129846573, "learning_rate": 0.0005, "loss": 2.1281, "step": 65120 }, { "epoch": 0.24790085488303404, "grad_norm": 0.12572212517261505, "learning_rate": 0.0005, "loss": 2.1262, "step": 65130 }, { "epoch": 0.24793891735115672, "grad_norm": 0.14182905852794647, "learning_rate": 0.0005, "loss": 2.1176, "step": 65140 }, { "epoch": 0.2479769798192794, "grad_norm": 0.12417514622211456, "learning_rate": 0.0005, "loss": 2.1256, "step": 65150 }, { "epoch": 0.2480150422874021, "grad_norm": 0.1255902200937271, "learning_rate": 0.0005, "loss": 2.1253, "step": 65160 }, { "epoch": 0.24805310475552478, "grad_norm": 0.12469867616891861, "learning_rate": 0.0005, "loss": 2.1183, "step": 65170 }, { "epoch": 0.24809116722364746, "grad_norm": 0.12777365744113922, "learning_rate": 0.0005, "loss": 2.1336, "step": 65180 }, { "epoch": 0.24812922969177012, "grad_norm": 0.12414289265871048, "learning_rate": 0.0005, "loss": 2.1163, "step": 65190 }, { "epoch": 0.2481672921598928, "grad_norm": 0.13034619390964508, "learning_rate": 0.0005, "loss": 2.1332, "step": 65200 }, { "epoch": 0.2482053546280155, "grad_norm": 0.1196286529302597, "learning_rate": 0.0005, "loss": 2.1115, "step": 65210 }, { "epoch": 0.24824341709613817, "grad_norm": 0.11457592993974686, "learning_rate": 0.0005, "loss": 2.1337, "step": 65220 }, { "epoch": 0.24828147956426086, "grad_norm": 0.13046908378601074, "learning_rate": 0.0005, "loss": 2.1251, "step": 65230 }, { "epoch": 0.24831954203238354, "grad_norm": 0.12603069841861725, "learning_rate": 0.0005, "loss": 2.1177, "step": 65240 }, { "epoch": 0.24835760450050623, "grad_norm": 0.12878099083900452, "learning_rate": 0.0005, "loss": 2.1155, "step": 65250 }, { "epoch": 0.24839566696862891, "grad_norm": 0.14651422202587128, "learning_rate": 0.0005, "loss": 2.1376, "step": 65260 }, { "epoch": 0.2484337294367516, "grad_norm": 0.12332681566476822, "learning_rate": 0.0005, "loss": 2.1361, "step": 65270 }, { "epoch": 0.24847179190487428, "grad_norm": 0.11837499588727951, "learning_rate": 0.0005, "loss": 2.1228, "step": 65280 }, { "epoch": 0.24850985437299697, "grad_norm": 0.11317744106054306, "learning_rate": 0.0005, "loss": 2.1156, "step": 65290 }, { "epoch": 0.24854791684111965, "grad_norm": 0.13026000559329987, "learning_rate": 0.0005, "loss": 2.1436, "step": 65300 }, { "epoch": 0.24858597930924234, "grad_norm": 0.12601986527442932, "learning_rate": 0.0005, "loss": 2.1292, "step": 65310 }, { "epoch": 0.24862404177736502, "grad_norm": 0.12494723498821259, "learning_rate": 0.0005, "loss": 2.1256, "step": 65320 }, { "epoch": 0.24866210424548768, "grad_norm": 0.1308157593011856, "learning_rate": 0.0005, "loss": 2.1342, "step": 65330 }, { "epoch": 0.24870016671361037, "grad_norm": 0.1284460723400116, "learning_rate": 0.0005, "loss": 2.1316, "step": 65340 }, { "epoch": 0.24873822918173305, "grad_norm": 0.13919104635715485, "learning_rate": 0.0005, "loss": 2.1358, "step": 65350 }, { "epoch": 0.24877629164985574, "grad_norm": 0.1327711045742035, "learning_rate": 0.0005, "loss": 2.126, "step": 65360 }, { "epoch": 0.24881435411797842, "grad_norm": 0.1231195405125618, "learning_rate": 0.0005, "loss": 2.131, "step": 65370 }, { "epoch": 0.2488524165861011, "grad_norm": 0.11069672554731369, "learning_rate": 0.0005, "loss": 2.1242, "step": 65380 }, { "epoch": 0.2488904790542238, "grad_norm": 0.14658358693122864, "learning_rate": 0.0005, "loss": 2.126, "step": 65390 }, { "epoch": 0.24892854152234647, "grad_norm": 0.14038780331611633, "learning_rate": 0.0005, "loss": 2.1164, "step": 65400 }, { "epoch": 0.24896660399046916, "grad_norm": 0.11526990681886673, "learning_rate": 0.0005, "loss": 2.1259, "step": 65410 }, { "epoch": 0.24900466645859184, "grad_norm": 0.12075836211442947, "learning_rate": 0.0005, "loss": 2.1318, "step": 65420 }, { "epoch": 0.24904272892671453, "grad_norm": 0.12481142580509186, "learning_rate": 0.0005, "loss": 2.1323, "step": 65430 }, { "epoch": 0.24908079139483721, "grad_norm": 0.12299896031618118, "learning_rate": 0.0005, "loss": 2.1315, "step": 65440 }, { "epoch": 0.2491188538629599, "grad_norm": 0.13120685517787933, "learning_rate": 0.0005, "loss": 2.1362, "step": 65450 }, { "epoch": 0.24915691633108258, "grad_norm": 0.12218866497278214, "learning_rate": 0.0005, "loss": 2.1316, "step": 65460 }, { "epoch": 0.24919497879920527, "grad_norm": 0.1156734749674797, "learning_rate": 0.0005, "loss": 2.1345, "step": 65470 }, { "epoch": 0.24923304126732793, "grad_norm": 0.12150178849697113, "learning_rate": 0.0005, "loss": 2.1301, "step": 65480 }, { "epoch": 0.2492711037354506, "grad_norm": 0.12679104506969452, "learning_rate": 0.0005, "loss": 2.1215, "step": 65490 }, { "epoch": 0.2493091662035733, "grad_norm": 0.12691758573055267, "learning_rate": 0.0005, "loss": 2.1285, "step": 65500 }, { "epoch": 0.24934722867169598, "grad_norm": 0.11793230473995209, "learning_rate": 0.0005, "loss": 2.1259, "step": 65510 }, { "epoch": 0.24938529113981867, "grad_norm": 0.11975978314876556, "learning_rate": 0.0005, "loss": 2.1309, "step": 65520 }, { "epoch": 0.24942335360794135, "grad_norm": 0.1233128160238266, "learning_rate": 0.0005, "loss": 2.1342, "step": 65530 }, { "epoch": 0.24946141607606404, "grad_norm": 0.13914133608341217, "learning_rate": 0.0005, "loss": 2.1277, "step": 65540 }, { "epoch": 0.24949947854418672, "grad_norm": 0.1286463737487793, "learning_rate": 0.0005, "loss": 2.1374, "step": 65550 }, { "epoch": 0.2495375410123094, "grad_norm": 0.13319668173789978, "learning_rate": 0.0005, "loss": 2.1209, "step": 65560 }, { "epoch": 0.2495756034804321, "grad_norm": 0.1314106285572052, "learning_rate": 0.0005, "loss": 2.1297, "step": 65570 }, { "epoch": 0.24961366594855477, "grad_norm": 0.1226097047328949, "learning_rate": 0.0005, "loss": 2.1401, "step": 65580 }, { "epoch": 0.24965172841667746, "grad_norm": 0.1222294494509697, "learning_rate": 0.0005, "loss": 2.1248, "step": 65590 }, { "epoch": 0.24968979088480014, "grad_norm": 0.1437918245792389, "learning_rate": 0.0005, "loss": 2.1169, "step": 65600 }, { "epoch": 0.24972785335292283, "grad_norm": 0.11622069776058197, "learning_rate": 0.0005, "loss": 2.1363, "step": 65610 }, { "epoch": 0.2497659158210455, "grad_norm": 0.11489139497280121, "learning_rate": 0.0005, "loss": 2.1297, "step": 65620 }, { "epoch": 0.24980397828916817, "grad_norm": 0.12246556580066681, "learning_rate": 0.0005, "loss": 2.1239, "step": 65630 }, { "epoch": 0.24984204075729086, "grad_norm": 0.11942298710346222, "learning_rate": 0.0005, "loss": 2.1215, "step": 65640 }, { "epoch": 0.24988010322541354, "grad_norm": 0.12547263503074646, "learning_rate": 0.0005, "loss": 2.1287, "step": 65650 }, { "epoch": 0.24991816569353623, "grad_norm": 0.12355095148086548, "learning_rate": 0.0005, "loss": 2.1326, "step": 65660 }, { "epoch": 0.2499562281616589, "grad_norm": 0.14005330204963684, "learning_rate": 0.0005, "loss": 2.1189, "step": 65670 }, { "epoch": 0.2499942906297816, "grad_norm": 0.12415273487567902, "learning_rate": 0.0005, "loss": 2.1189, "step": 65680 }, { "epoch": 0.2500323530979043, "grad_norm": 0.1350027173757553, "learning_rate": 0.0005, "loss": 2.133, "step": 65690 }, { "epoch": 0.25007041556602694, "grad_norm": 0.12149950861930847, "learning_rate": 0.0005, "loss": 2.1275, "step": 65700 }, { "epoch": 0.25010847803414965, "grad_norm": 0.1218823567032814, "learning_rate": 0.0005, "loss": 2.139, "step": 65710 }, { "epoch": 0.2501465405022723, "grad_norm": 0.1255420595407486, "learning_rate": 0.0005, "loss": 2.1216, "step": 65720 }, { "epoch": 0.250184602970395, "grad_norm": 0.12200768291950226, "learning_rate": 0.0005, "loss": 2.1193, "step": 65730 }, { "epoch": 0.2502226654385177, "grad_norm": 0.11212620884180069, "learning_rate": 0.0005, "loss": 2.1281, "step": 65740 }, { "epoch": 0.2502607279066404, "grad_norm": 0.11211491376161575, "learning_rate": 0.0005, "loss": 2.1138, "step": 65750 }, { "epoch": 0.25029879037476305, "grad_norm": 0.11531613022089005, "learning_rate": 0.0005, "loss": 2.1028, "step": 65760 }, { "epoch": 0.25033685284288576, "grad_norm": 0.1244373545050621, "learning_rate": 0.0005, "loss": 2.1238, "step": 65770 }, { "epoch": 0.2503749153110084, "grad_norm": 0.12171068042516708, "learning_rate": 0.0005, "loss": 2.1331, "step": 65780 }, { "epoch": 0.25041297777913113, "grad_norm": 0.1351395547389984, "learning_rate": 0.0005, "loss": 2.1294, "step": 65790 }, { "epoch": 0.2504510402472538, "grad_norm": 0.12298998981714249, "learning_rate": 0.0005, "loss": 2.1267, "step": 65800 }, { "epoch": 0.2504891027153765, "grad_norm": 0.1542890965938568, "learning_rate": 0.0005, "loss": 2.1316, "step": 65810 }, { "epoch": 0.25052716518349916, "grad_norm": 0.13249491155147552, "learning_rate": 0.0005, "loss": 2.1296, "step": 65820 }, { "epoch": 0.25056522765162187, "grad_norm": 0.12993395328521729, "learning_rate": 0.0005, "loss": 2.1325, "step": 65830 }, { "epoch": 0.2506032901197445, "grad_norm": 0.12128250300884247, "learning_rate": 0.0005, "loss": 2.1046, "step": 65840 }, { "epoch": 0.2506413525878672, "grad_norm": 0.12215390056371689, "learning_rate": 0.0005, "loss": 2.1214, "step": 65850 }, { "epoch": 0.2506794150559899, "grad_norm": 0.12768031656742096, "learning_rate": 0.0005, "loss": 2.1372, "step": 65860 }, { "epoch": 0.25071747752411255, "grad_norm": 0.1210104376077652, "learning_rate": 0.0005, "loss": 2.1233, "step": 65870 }, { "epoch": 0.25075553999223527, "grad_norm": 0.14039009809494019, "learning_rate": 0.0005, "loss": 2.1268, "step": 65880 }, { "epoch": 0.2507936024603579, "grad_norm": 0.1367824524641037, "learning_rate": 0.0005, "loss": 2.1253, "step": 65890 }, { "epoch": 0.25083166492848064, "grad_norm": 0.11305456608533859, "learning_rate": 0.0005, "loss": 2.1416, "step": 65900 }, { "epoch": 0.2508697273966033, "grad_norm": 0.1420050412416458, "learning_rate": 0.0005, "loss": 2.1298, "step": 65910 }, { "epoch": 0.250907789864726, "grad_norm": 0.13083982467651367, "learning_rate": 0.0005, "loss": 2.1307, "step": 65920 }, { "epoch": 0.25094585233284866, "grad_norm": 0.1243060901761055, "learning_rate": 0.0005, "loss": 2.13, "step": 65930 }, { "epoch": 0.2509839148009714, "grad_norm": 0.13240239024162292, "learning_rate": 0.0005, "loss": 2.1282, "step": 65940 }, { "epoch": 0.25102197726909403, "grad_norm": 0.1207834780216217, "learning_rate": 0.0005, "loss": 2.1289, "step": 65950 }, { "epoch": 0.25106003973721674, "grad_norm": 0.12781934440135956, "learning_rate": 0.0005, "loss": 2.1377, "step": 65960 }, { "epoch": 0.2510981022053394, "grad_norm": 0.11338264495134354, "learning_rate": 0.0005, "loss": 2.1307, "step": 65970 }, { "epoch": 0.25113616467346206, "grad_norm": 0.12367227673530579, "learning_rate": 0.0005, "loss": 2.1277, "step": 65980 }, { "epoch": 0.25117422714158477, "grad_norm": 0.12069065868854523, "learning_rate": 0.0005, "loss": 2.1275, "step": 65990 }, { "epoch": 0.25121228960970743, "grad_norm": 0.1273607313632965, "learning_rate": 0.0005, "loss": 2.1362, "step": 66000 }, { "epoch": 0.25125035207783014, "grad_norm": 0.13285642862319946, "learning_rate": 0.0005, "loss": 2.11, "step": 66010 }, { "epoch": 0.2512884145459528, "grad_norm": 0.12723097205162048, "learning_rate": 0.0005, "loss": 2.1378, "step": 66020 }, { "epoch": 0.2513264770140755, "grad_norm": 0.1333588808774948, "learning_rate": 0.0005, "loss": 2.1348, "step": 66030 }, { "epoch": 0.25136453948219817, "grad_norm": 0.13743548095226288, "learning_rate": 0.0005, "loss": 2.1287, "step": 66040 }, { "epoch": 0.2514026019503209, "grad_norm": 0.11628317832946777, "learning_rate": 0.0005, "loss": 2.1335, "step": 66050 }, { "epoch": 0.25144066441844354, "grad_norm": 0.14928993582725525, "learning_rate": 0.0005, "loss": 2.1236, "step": 66060 }, { "epoch": 0.25147872688656625, "grad_norm": 0.12406531721353531, "learning_rate": 0.0005, "loss": 2.1369, "step": 66070 }, { "epoch": 0.2515167893546889, "grad_norm": 0.12148601561784744, "learning_rate": 0.0005, "loss": 2.1272, "step": 66080 }, { "epoch": 0.2515548518228116, "grad_norm": 0.11786960810422897, "learning_rate": 0.0005, "loss": 2.1273, "step": 66090 }, { "epoch": 0.2515929142909343, "grad_norm": 0.12175590544939041, "learning_rate": 0.0005, "loss": 2.1425, "step": 66100 }, { "epoch": 0.251630976759057, "grad_norm": 0.1423157900571823, "learning_rate": 0.0005, "loss": 2.1371, "step": 66110 }, { "epoch": 0.25166903922717965, "grad_norm": 0.24454635381698608, "learning_rate": 0.0005, "loss": 2.1206, "step": 66120 }, { "epoch": 0.2517071016953023, "grad_norm": 0.1412813365459442, "learning_rate": 0.0005, "loss": 2.131, "step": 66130 }, { "epoch": 0.251745164163425, "grad_norm": 0.11888179928064346, "learning_rate": 0.0005, "loss": 2.1143, "step": 66140 }, { "epoch": 0.2517832266315477, "grad_norm": 0.13959814608097076, "learning_rate": 0.0005, "loss": 2.1397, "step": 66150 }, { "epoch": 0.2518212890996704, "grad_norm": 0.12186913937330246, "learning_rate": 0.0005, "loss": 2.1213, "step": 66160 }, { "epoch": 0.25185935156779304, "grad_norm": 0.11685211956501007, "learning_rate": 0.0005, "loss": 2.1475, "step": 66170 }, { "epoch": 0.25189741403591576, "grad_norm": 0.11924509704113007, "learning_rate": 0.0005, "loss": 2.1335, "step": 66180 }, { "epoch": 0.2519354765040384, "grad_norm": 0.1242474839091301, "learning_rate": 0.0005, "loss": 2.1486, "step": 66190 }, { "epoch": 0.2519735389721611, "grad_norm": 0.1427517682313919, "learning_rate": 0.0005, "loss": 2.1524, "step": 66200 }, { "epoch": 0.2520116014402838, "grad_norm": 0.11653508245944977, "learning_rate": 0.0005, "loss": 2.1171, "step": 66210 }, { "epoch": 0.2520496639084065, "grad_norm": 0.13120757043361664, "learning_rate": 0.0005, "loss": 2.1091, "step": 66220 }, { "epoch": 0.25208772637652915, "grad_norm": 0.12150296568870544, "learning_rate": 0.0005, "loss": 2.1268, "step": 66230 }, { "epoch": 0.25212578884465187, "grad_norm": 0.134052112698555, "learning_rate": 0.0005, "loss": 2.1278, "step": 66240 }, { "epoch": 0.2521638513127745, "grad_norm": 0.12599046528339386, "learning_rate": 0.0005, "loss": 2.1197, "step": 66250 }, { "epoch": 0.25220191378089724, "grad_norm": 0.1288243979215622, "learning_rate": 0.0005, "loss": 2.1349, "step": 66260 }, { "epoch": 0.2522399762490199, "grad_norm": 0.12849846482276917, "learning_rate": 0.0005, "loss": 2.1199, "step": 66270 }, { "epoch": 0.25227803871714255, "grad_norm": 0.1309838891029358, "learning_rate": 0.0005, "loss": 2.1245, "step": 66280 }, { "epoch": 0.25231610118526526, "grad_norm": 0.11939046531915665, "learning_rate": 0.0005, "loss": 2.12, "step": 66290 }, { "epoch": 0.2523541636533879, "grad_norm": 0.11373452842235565, "learning_rate": 0.0005, "loss": 2.145, "step": 66300 }, { "epoch": 0.25239222612151063, "grad_norm": 0.12958891689777374, "learning_rate": 0.0005, "loss": 2.1435, "step": 66310 }, { "epoch": 0.2524302885896333, "grad_norm": 0.12233640998601913, "learning_rate": 0.0005, "loss": 2.1258, "step": 66320 }, { "epoch": 0.252468351057756, "grad_norm": 0.1465737372636795, "learning_rate": 0.0005, "loss": 2.1253, "step": 66330 }, { "epoch": 0.25250641352587866, "grad_norm": 0.1597142219543457, "learning_rate": 0.0005, "loss": 2.1163, "step": 66340 }, { "epoch": 0.25254447599400137, "grad_norm": 0.14479586482048035, "learning_rate": 0.0005, "loss": 2.134, "step": 66350 }, { "epoch": 0.25258253846212403, "grad_norm": 0.12330485880374908, "learning_rate": 0.0005, "loss": 2.1411, "step": 66360 }, { "epoch": 0.25262060093024674, "grad_norm": 0.12251732498407364, "learning_rate": 0.0005, "loss": 2.1384, "step": 66370 }, { "epoch": 0.2526586633983694, "grad_norm": 0.1360514760017395, "learning_rate": 0.0005, "loss": 2.1331, "step": 66380 }, { "epoch": 0.2526967258664921, "grad_norm": 0.12024963647127151, "learning_rate": 0.0005, "loss": 2.1274, "step": 66390 }, { "epoch": 0.25273478833461477, "grad_norm": 0.11666081100702286, "learning_rate": 0.0005, "loss": 2.1336, "step": 66400 }, { "epoch": 0.2527728508027374, "grad_norm": 0.11214052885770798, "learning_rate": 0.0005, "loss": 2.1203, "step": 66410 }, { "epoch": 0.25281091327086014, "grad_norm": 0.13760724663734436, "learning_rate": 0.0005, "loss": 2.1484, "step": 66420 }, { "epoch": 0.2528489757389828, "grad_norm": 0.11763881146907806, "learning_rate": 0.0005, "loss": 2.1335, "step": 66430 }, { "epoch": 0.2528870382071055, "grad_norm": 0.11611486971378326, "learning_rate": 0.0005, "loss": 2.1424, "step": 66440 }, { "epoch": 0.25292510067522816, "grad_norm": 0.13202884793281555, "learning_rate": 0.0005, "loss": 2.1258, "step": 66450 }, { "epoch": 0.2529631631433509, "grad_norm": 0.13429687917232513, "learning_rate": 0.0005, "loss": 2.134, "step": 66460 }, { "epoch": 0.25300122561147353, "grad_norm": 0.1115928664803505, "learning_rate": 0.0005, "loss": 2.1429, "step": 66470 }, { "epoch": 0.25303928807959625, "grad_norm": 0.11740121245384216, "learning_rate": 0.0005, "loss": 2.1329, "step": 66480 }, { "epoch": 0.2530773505477189, "grad_norm": 0.12334487587213516, "learning_rate": 0.0005, "loss": 2.1137, "step": 66490 }, { "epoch": 0.2531154130158416, "grad_norm": 0.12216371297836304, "learning_rate": 0.0005, "loss": 2.1156, "step": 66500 }, { "epoch": 0.2531534754839643, "grad_norm": 0.13560952246189117, "learning_rate": 0.0005, "loss": 2.127, "step": 66510 }, { "epoch": 0.253191537952087, "grad_norm": 0.11898113042116165, "learning_rate": 0.0005, "loss": 2.1208, "step": 66520 }, { "epoch": 0.25322960042020964, "grad_norm": 0.1410965621471405, "learning_rate": 0.0005, "loss": 2.1431, "step": 66530 }, { "epoch": 0.25326766288833236, "grad_norm": 0.12651461362838745, "learning_rate": 0.0005, "loss": 2.1456, "step": 66540 }, { "epoch": 0.253305725356455, "grad_norm": 0.12641902267932892, "learning_rate": 0.0005, "loss": 2.1186, "step": 66550 }, { "epoch": 0.25334378782457767, "grad_norm": 0.87174391746521, "learning_rate": 0.0005, "loss": 2.1288, "step": 66560 }, { "epoch": 0.2533818502927004, "grad_norm": 0.12635809183120728, "learning_rate": 0.0005, "loss": 2.1234, "step": 66570 }, { "epoch": 0.25341991276082304, "grad_norm": 0.12377549707889557, "learning_rate": 0.0005, "loss": 2.1304, "step": 66580 }, { "epoch": 0.25345797522894575, "grad_norm": 0.1181359812617302, "learning_rate": 0.0005, "loss": 2.1289, "step": 66590 }, { "epoch": 0.2534960376970684, "grad_norm": 0.1183367669582367, "learning_rate": 0.0005, "loss": 2.1287, "step": 66600 }, { "epoch": 0.2535341001651911, "grad_norm": 0.12796492874622345, "learning_rate": 0.0005, "loss": 2.1308, "step": 66610 }, { "epoch": 0.2535721626333138, "grad_norm": 0.12470392882823944, "learning_rate": 0.0005, "loss": 2.1315, "step": 66620 }, { "epoch": 0.2536102251014365, "grad_norm": 0.12096191942691803, "learning_rate": 0.0005, "loss": 2.1398, "step": 66630 }, { "epoch": 0.25364828756955915, "grad_norm": 0.13148650527000427, "learning_rate": 0.0005, "loss": 2.1258, "step": 66640 }, { "epoch": 0.25368635003768186, "grad_norm": 0.13177184760570526, "learning_rate": 0.0005, "loss": 2.1408, "step": 66650 }, { "epoch": 0.2537244125058045, "grad_norm": 0.1254524439573288, "learning_rate": 0.0005, "loss": 2.1182, "step": 66660 }, { "epoch": 0.25376247497392723, "grad_norm": 0.12147463113069534, "learning_rate": 0.0005, "loss": 2.1383, "step": 66670 }, { "epoch": 0.2538005374420499, "grad_norm": 0.11849434673786163, "learning_rate": 0.0005, "loss": 2.1159, "step": 66680 }, { "epoch": 0.2538385999101726, "grad_norm": 0.12578263878822327, "learning_rate": 0.0005, "loss": 2.1379, "step": 66690 }, { "epoch": 0.25387666237829526, "grad_norm": 0.13222847878932953, "learning_rate": 0.0005, "loss": 2.1272, "step": 66700 }, { "epoch": 0.2539147248464179, "grad_norm": 0.1336478441953659, "learning_rate": 0.0005, "loss": 2.1329, "step": 66710 }, { "epoch": 0.25395278731454063, "grad_norm": 0.12106378376483917, "learning_rate": 0.0005, "loss": 2.122, "step": 66720 }, { "epoch": 0.2539908497826633, "grad_norm": 0.12488880008459091, "learning_rate": 0.0005, "loss": 2.1247, "step": 66730 }, { "epoch": 0.254028912250786, "grad_norm": 0.1426115781068802, "learning_rate": 0.0005, "loss": 2.1305, "step": 66740 }, { "epoch": 0.25406697471890866, "grad_norm": 0.1336047649383545, "learning_rate": 0.0005, "loss": 2.1103, "step": 66750 }, { "epoch": 0.25410503718703137, "grad_norm": 0.12687437236309052, "learning_rate": 0.0005, "loss": 2.1354, "step": 66760 }, { "epoch": 0.254143099655154, "grad_norm": 0.12486867606639862, "learning_rate": 0.0005, "loss": 2.1214, "step": 66770 }, { "epoch": 0.25418116212327674, "grad_norm": 0.12586849927902222, "learning_rate": 0.0005, "loss": 2.1196, "step": 66780 }, { "epoch": 0.2542192245913994, "grad_norm": 0.12848053872585297, "learning_rate": 0.0005, "loss": 2.1224, "step": 66790 }, { "epoch": 0.2542572870595221, "grad_norm": 0.12215209752321243, "learning_rate": 0.0005, "loss": 2.1378, "step": 66800 }, { "epoch": 0.25429534952764477, "grad_norm": 0.12931972742080688, "learning_rate": 0.0005, "loss": 2.1224, "step": 66810 }, { "epoch": 0.2543334119957675, "grad_norm": 0.12214656919240952, "learning_rate": 0.0005, "loss": 2.1331, "step": 66820 }, { "epoch": 0.25437147446389013, "grad_norm": 0.12178391218185425, "learning_rate": 0.0005, "loss": 2.1276, "step": 66830 }, { "epoch": 0.2544095369320128, "grad_norm": 0.134334996342659, "learning_rate": 0.0005, "loss": 2.1227, "step": 66840 }, { "epoch": 0.2544475994001355, "grad_norm": 0.12711933255195618, "learning_rate": 0.0005, "loss": 2.1183, "step": 66850 }, { "epoch": 0.25448566186825816, "grad_norm": 0.12724703550338745, "learning_rate": 0.0005, "loss": 2.1442, "step": 66860 }, { "epoch": 0.2545237243363809, "grad_norm": 0.15072989463806152, "learning_rate": 0.0005, "loss": 2.1338, "step": 66870 }, { "epoch": 0.25456178680450353, "grad_norm": 0.1301760971546173, "learning_rate": 0.0005, "loss": 2.1338, "step": 66880 }, { "epoch": 0.25459984927262624, "grad_norm": 0.13474169373512268, "learning_rate": 0.0005, "loss": 2.1353, "step": 66890 }, { "epoch": 0.2546379117407489, "grad_norm": 0.1241116151213646, "learning_rate": 0.0005, "loss": 2.1278, "step": 66900 }, { "epoch": 0.2546759742088716, "grad_norm": 0.1309492588043213, "learning_rate": 0.0005, "loss": 2.1328, "step": 66910 }, { "epoch": 0.25471403667699427, "grad_norm": 0.11885924637317657, "learning_rate": 0.0005, "loss": 2.1504, "step": 66920 }, { "epoch": 0.254752099145117, "grad_norm": 0.127330020070076, "learning_rate": 0.0005, "loss": 2.1279, "step": 66930 }, { "epoch": 0.25479016161323964, "grad_norm": 0.13746142387390137, "learning_rate": 0.0005, "loss": 2.1437, "step": 66940 }, { "epoch": 0.25482822408136235, "grad_norm": 0.12805262207984924, "learning_rate": 0.0005, "loss": 2.1296, "step": 66950 }, { "epoch": 0.254866286549485, "grad_norm": 0.13134385645389557, "learning_rate": 0.0005, "loss": 2.1306, "step": 66960 }, { "epoch": 0.2549043490176077, "grad_norm": 0.12819813191890717, "learning_rate": 0.0005, "loss": 2.1352, "step": 66970 }, { "epoch": 0.2549424114857304, "grad_norm": 0.12185374647378922, "learning_rate": 0.0005, "loss": 2.119, "step": 66980 }, { "epoch": 0.25498047395385304, "grad_norm": 0.12614475190639496, "learning_rate": 0.0005, "loss": 2.131, "step": 66990 }, { "epoch": 0.25501853642197575, "grad_norm": 0.114668108522892, "learning_rate": 0.0005, "loss": 2.1241, "step": 67000 }, { "epoch": 0.2550565988900984, "grad_norm": 0.13993407785892487, "learning_rate": 0.0005, "loss": 2.1218, "step": 67010 }, { "epoch": 0.2550946613582211, "grad_norm": 0.129254549741745, "learning_rate": 0.0005, "loss": 2.14, "step": 67020 }, { "epoch": 0.2551327238263438, "grad_norm": 0.11793850362300873, "learning_rate": 0.0005, "loss": 2.1247, "step": 67030 }, { "epoch": 0.2551707862944665, "grad_norm": 0.4932312071323395, "learning_rate": 0.0005, "loss": 2.1221, "step": 67040 }, { "epoch": 0.25520884876258915, "grad_norm": 0.12364540249109268, "learning_rate": 0.0005, "loss": 2.126, "step": 67050 }, { "epoch": 0.25524691123071186, "grad_norm": 0.12063073366880417, "learning_rate": 0.0005, "loss": 2.1174, "step": 67060 }, { "epoch": 0.2552849736988345, "grad_norm": 0.11659274995326996, "learning_rate": 0.0005, "loss": 2.1246, "step": 67070 }, { "epoch": 0.25532303616695723, "grad_norm": 0.1296776533126831, "learning_rate": 0.0005, "loss": 2.1227, "step": 67080 }, { "epoch": 0.2553610986350799, "grad_norm": 0.1239800825715065, "learning_rate": 0.0005, "loss": 2.1243, "step": 67090 }, { "epoch": 0.2553991611032026, "grad_norm": 0.12165100127458572, "learning_rate": 0.0005, "loss": 2.1323, "step": 67100 }, { "epoch": 0.25543722357132526, "grad_norm": 0.11917266994714737, "learning_rate": 0.0005, "loss": 2.1189, "step": 67110 }, { "epoch": 0.25547528603944797, "grad_norm": 0.13199719786643982, "learning_rate": 0.0005, "loss": 2.1281, "step": 67120 }, { "epoch": 0.2555133485075706, "grad_norm": 0.1282149851322174, "learning_rate": 0.0005, "loss": 2.1105, "step": 67130 }, { "epoch": 0.2555514109756933, "grad_norm": 0.12987367808818817, "learning_rate": 0.0005, "loss": 2.1239, "step": 67140 }, { "epoch": 0.255589473443816, "grad_norm": 0.13135084509849548, "learning_rate": 0.0005, "loss": 2.1208, "step": 67150 }, { "epoch": 0.25562753591193865, "grad_norm": 0.1275607943534851, "learning_rate": 0.0005, "loss": 2.1141, "step": 67160 }, { "epoch": 0.25566559838006137, "grad_norm": 0.11255636066198349, "learning_rate": 0.0005, "loss": 2.1127, "step": 67170 }, { "epoch": 0.255703660848184, "grad_norm": 0.12683634459972382, "learning_rate": 0.0005, "loss": 2.1279, "step": 67180 }, { "epoch": 0.25574172331630673, "grad_norm": 0.11311507225036621, "learning_rate": 0.0005, "loss": 2.1169, "step": 67190 }, { "epoch": 0.2557797857844294, "grad_norm": 0.12024758756160736, "learning_rate": 0.0005, "loss": 2.1228, "step": 67200 }, { "epoch": 0.2558178482525521, "grad_norm": 0.12955109775066376, "learning_rate": 0.0005, "loss": 2.1291, "step": 67210 }, { "epoch": 0.25585591072067476, "grad_norm": 0.1267477422952652, "learning_rate": 0.0005, "loss": 2.1262, "step": 67220 }, { "epoch": 0.2558939731887975, "grad_norm": 0.1404080092906952, "learning_rate": 0.0005, "loss": 2.1192, "step": 67230 }, { "epoch": 0.25593203565692013, "grad_norm": 0.1284545212984085, "learning_rate": 0.0005, "loss": 2.1283, "step": 67240 }, { "epoch": 0.25597009812504284, "grad_norm": 0.11248300969600677, "learning_rate": 0.0005, "loss": 2.1394, "step": 67250 }, { "epoch": 0.2560081605931655, "grad_norm": 0.11709735542535782, "learning_rate": 0.0005, "loss": 2.1203, "step": 67260 }, { "epoch": 0.25604622306128816, "grad_norm": 0.1246451884508133, "learning_rate": 0.0005, "loss": 2.1152, "step": 67270 }, { "epoch": 0.25608428552941087, "grad_norm": 0.12055303901433945, "learning_rate": 0.0005, "loss": 2.129, "step": 67280 }, { "epoch": 0.25612234799753353, "grad_norm": 0.1246945932507515, "learning_rate": 0.0005, "loss": 2.1429, "step": 67290 }, { "epoch": 0.25616041046565624, "grad_norm": 0.1278133988380432, "learning_rate": 0.0005, "loss": 2.1155, "step": 67300 }, { "epoch": 0.2561984729337789, "grad_norm": 0.14413084089756012, "learning_rate": 0.0005, "loss": 2.1224, "step": 67310 }, { "epoch": 0.2562365354019016, "grad_norm": 0.13160665333271027, "learning_rate": 0.0005, "loss": 2.123, "step": 67320 }, { "epoch": 0.25627459787002427, "grad_norm": 0.12283961474895477, "learning_rate": 0.0005, "loss": 2.1369, "step": 67330 }, { "epoch": 0.256312660338147, "grad_norm": 0.1365843117237091, "learning_rate": 0.0005, "loss": 2.1347, "step": 67340 }, { "epoch": 0.25635072280626964, "grad_norm": 0.1217200830578804, "learning_rate": 0.0005, "loss": 2.1471, "step": 67350 }, { "epoch": 0.25638878527439235, "grad_norm": 0.12052513659000397, "learning_rate": 0.0005, "loss": 2.1287, "step": 67360 }, { "epoch": 0.256426847742515, "grad_norm": 0.125356063246727, "learning_rate": 0.0005, "loss": 2.1308, "step": 67370 }, { "epoch": 0.2564649102106377, "grad_norm": 0.11431968957185745, "learning_rate": 0.0005, "loss": 2.1405, "step": 67380 }, { "epoch": 0.2565029726787604, "grad_norm": 0.11986550688743591, "learning_rate": 0.0005, "loss": 2.1178, "step": 67390 }, { "epoch": 0.2565410351468831, "grad_norm": 0.12903617322444916, "learning_rate": 0.0005, "loss": 2.1152, "step": 67400 }, { "epoch": 0.25657909761500575, "grad_norm": 0.14601320028305054, "learning_rate": 0.0005, "loss": 2.1305, "step": 67410 }, { "epoch": 0.2566171600831284, "grad_norm": 0.12710809707641602, "learning_rate": 0.0005, "loss": 2.121, "step": 67420 }, { "epoch": 0.2566552225512511, "grad_norm": 0.11787423491477966, "learning_rate": 0.0005, "loss": 2.1267, "step": 67430 }, { "epoch": 0.2566932850193738, "grad_norm": 0.1185850203037262, "learning_rate": 0.0005, "loss": 2.129, "step": 67440 }, { "epoch": 0.2567313474874965, "grad_norm": 0.1283193826675415, "learning_rate": 0.0005, "loss": 2.1344, "step": 67450 }, { "epoch": 0.25676940995561914, "grad_norm": 0.12702149152755737, "learning_rate": 0.0005, "loss": 2.124, "step": 67460 }, { "epoch": 0.25680747242374186, "grad_norm": 0.12243399769067764, "learning_rate": 0.0005, "loss": 2.1461, "step": 67470 }, { "epoch": 0.2568455348918645, "grad_norm": 0.12209542095661163, "learning_rate": 0.0005, "loss": 2.1213, "step": 67480 }, { "epoch": 0.2568835973599872, "grad_norm": 0.11669564247131348, "learning_rate": 0.0005, "loss": 2.141, "step": 67490 }, { "epoch": 0.2569216598281099, "grad_norm": 0.12182539701461792, "learning_rate": 0.0005, "loss": 2.1343, "step": 67500 }, { "epoch": 0.2569597222962326, "grad_norm": 0.1310204267501831, "learning_rate": 0.0005, "loss": 2.1257, "step": 67510 }, { "epoch": 0.25699778476435525, "grad_norm": 0.12333172559738159, "learning_rate": 0.0005, "loss": 2.1111, "step": 67520 }, { "epoch": 0.25703584723247797, "grad_norm": 0.13070839643478394, "learning_rate": 0.0005, "loss": 2.1127, "step": 67530 }, { "epoch": 0.2570739097006006, "grad_norm": 0.13536477088928223, "learning_rate": 0.0005, "loss": 2.1294, "step": 67540 }, { "epoch": 0.25711197216872334, "grad_norm": 0.11995254456996918, "learning_rate": 0.0005, "loss": 2.1276, "step": 67550 }, { "epoch": 0.257150034636846, "grad_norm": 0.12875477969646454, "learning_rate": 0.0005, "loss": 2.1381, "step": 67560 }, { "epoch": 0.25718809710496865, "grad_norm": 0.1281040757894516, "learning_rate": 0.0005, "loss": 2.1295, "step": 67570 }, { "epoch": 0.25722615957309136, "grad_norm": 0.12523114681243896, "learning_rate": 0.0005, "loss": 2.1403, "step": 67580 }, { "epoch": 0.257264222041214, "grad_norm": 0.12587867677211761, "learning_rate": 0.0005, "loss": 2.1307, "step": 67590 }, { "epoch": 0.25730228450933673, "grad_norm": 0.12696507573127747, "learning_rate": 0.0005, "loss": 2.134, "step": 67600 }, { "epoch": 0.2573403469774594, "grad_norm": 0.12220026552677155, "learning_rate": 0.0005, "loss": 2.1352, "step": 67610 }, { "epoch": 0.2573784094455821, "grad_norm": 0.1330202966928482, "learning_rate": 0.0005, "loss": 2.1267, "step": 67620 }, { "epoch": 0.25741647191370476, "grad_norm": 0.12621164321899414, "learning_rate": 0.0005, "loss": 2.1316, "step": 67630 }, { "epoch": 0.25745453438182747, "grad_norm": 0.12764981389045715, "learning_rate": 0.0005, "loss": 2.1274, "step": 67640 }, { "epoch": 0.25749259684995013, "grad_norm": 0.11342509835958481, "learning_rate": 0.0005, "loss": 2.1211, "step": 67650 }, { "epoch": 0.25753065931807284, "grad_norm": 0.11275508254766464, "learning_rate": 0.0005, "loss": 2.1224, "step": 67660 }, { "epoch": 0.2575687217861955, "grad_norm": 0.12244296073913574, "learning_rate": 0.0005, "loss": 2.1352, "step": 67670 }, { "epoch": 0.2576067842543182, "grad_norm": 0.12804259359836578, "learning_rate": 0.0005, "loss": 2.1382, "step": 67680 }, { "epoch": 0.25764484672244087, "grad_norm": 0.1230596974492073, "learning_rate": 0.0005, "loss": 2.1182, "step": 67690 }, { "epoch": 0.2576829091905636, "grad_norm": 0.12710057199001312, "learning_rate": 0.0005, "loss": 2.1399, "step": 67700 }, { "epoch": 0.25772097165868624, "grad_norm": 0.12617017328739166, "learning_rate": 0.0005, "loss": 2.1158, "step": 67710 }, { "epoch": 0.2577590341268089, "grad_norm": 0.17379063367843628, "learning_rate": 0.0005, "loss": 2.1129, "step": 67720 }, { "epoch": 0.2577970965949316, "grad_norm": 0.1185583844780922, "learning_rate": 0.0005, "loss": 2.1323, "step": 67730 }, { "epoch": 0.25783515906305426, "grad_norm": 0.14808182418346405, "learning_rate": 0.0005, "loss": 2.121, "step": 67740 }, { "epoch": 0.257873221531177, "grad_norm": 0.12773369252681732, "learning_rate": 0.0005, "loss": 2.1242, "step": 67750 }, { "epoch": 0.25791128399929963, "grad_norm": 0.12895694375038147, "learning_rate": 0.0005, "loss": 2.1182, "step": 67760 }, { "epoch": 0.25794934646742235, "grad_norm": 0.12679018080234528, "learning_rate": 0.0005, "loss": 2.1255, "step": 67770 }, { "epoch": 0.257987408935545, "grad_norm": 0.13147412240505219, "learning_rate": 0.0005, "loss": 2.1316, "step": 67780 }, { "epoch": 0.2580254714036677, "grad_norm": 0.12539303302764893, "learning_rate": 0.0005, "loss": 2.1149, "step": 67790 }, { "epoch": 0.2580635338717904, "grad_norm": 0.11887650191783905, "learning_rate": 0.0005, "loss": 2.1118, "step": 67800 }, { "epoch": 0.2581015963399131, "grad_norm": 0.11561664193868637, "learning_rate": 0.0005, "loss": 2.1115, "step": 67810 }, { "epoch": 0.25813965880803574, "grad_norm": 0.13444383442401886, "learning_rate": 0.0005, "loss": 2.1381, "step": 67820 }, { "epoch": 0.25817772127615846, "grad_norm": 0.13082697987556458, "learning_rate": 0.0005, "loss": 2.1289, "step": 67830 }, { "epoch": 0.2582157837442811, "grad_norm": 0.13210126757621765, "learning_rate": 0.0005, "loss": 2.1234, "step": 67840 }, { "epoch": 0.25825384621240377, "grad_norm": 0.13368277251720428, "learning_rate": 0.0005, "loss": 2.1078, "step": 67850 }, { "epoch": 0.2582919086805265, "grad_norm": 0.13177311420440674, "learning_rate": 0.0005, "loss": 2.119, "step": 67860 }, { "epoch": 0.25832997114864914, "grad_norm": 0.12315039336681366, "learning_rate": 0.0005, "loss": 2.1304, "step": 67870 }, { "epoch": 0.25836803361677185, "grad_norm": 0.11096197366714478, "learning_rate": 0.0005, "loss": 2.1353, "step": 67880 }, { "epoch": 0.2584060960848945, "grad_norm": 0.11734460294246674, "learning_rate": 0.0005, "loss": 2.1256, "step": 67890 }, { "epoch": 0.2584441585530172, "grad_norm": 0.12845294177532196, "learning_rate": 0.0005, "loss": 2.1119, "step": 67900 }, { "epoch": 0.2584822210211399, "grad_norm": 0.12121350318193436, "learning_rate": 0.0005, "loss": 2.122, "step": 67910 }, { "epoch": 0.2585202834892626, "grad_norm": 0.12107622623443604, "learning_rate": 0.0005, "loss": 2.1185, "step": 67920 }, { "epoch": 0.25855834595738525, "grad_norm": 0.12794913351535797, "learning_rate": 0.0005, "loss": 2.1374, "step": 67930 }, { "epoch": 0.25859640842550796, "grad_norm": 0.11874082684516907, "learning_rate": 0.0005, "loss": 2.12, "step": 67940 }, { "epoch": 0.2586344708936306, "grad_norm": 0.158302441239357, "learning_rate": 0.0005, "loss": 2.1347, "step": 67950 }, { "epoch": 0.25867253336175333, "grad_norm": 0.13007745146751404, "learning_rate": 0.0005, "loss": 2.1204, "step": 67960 }, { "epoch": 0.258710595829876, "grad_norm": 0.13091380894184113, "learning_rate": 0.0005, "loss": 2.1081, "step": 67970 }, { "epoch": 0.2587486582979987, "grad_norm": 0.1220938190817833, "learning_rate": 0.0005, "loss": 2.1378, "step": 67980 }, { "epoch": 0.25878672076612136, "grad_norm": 0.12740594148635864, "learning_rate": 0.0005, "loss": 2.1451, "step": 67990 }, { "epoch": 0.258824783234244, "grad_norm": 0.11739125847816467, "learning_rate": 0.0005, "loss": 2.1223, "step": 68000 }, { "epoch": 0.25886284570236673, "grad_norm": 0.12394890189170837, "learning_rate": 0.0005, "loss": 2.1383, "step": 68010 }, { "epoch": 0.2589009081704894, "grad_norm": 0.12760695815086365, "learning_rate": 0.0005, "loss": 2.1184, "step": 68020 }, { "epoch": 0.2589389706386121, "grad_norm": 0.1216312125325203, "learning_rate": 0.0005, "loss": 2.1265, "step": 68030 }, { "epoch": 0.25897703310673476, "grad_norm": 0.1176496222615242, "learning_rate": 0.0005, "loss": 2.1184, "step": 68040 }, { "epoch": 0.25901509557485747, "grad_norm": 0.1290826052427292, "learning_rate": 0.0005, "loss": 2.1219, "step": 68050 }, { "epoch": 0.2590531580429801, "grad_norm": 0.13374052941799164, "learning_rate": 0.0005, "loss": 2.1171, "step": 68060 }, { "epoch": 0.25909122051110284, "grad_norm": 0.11010095477104187, "learning_rate": 0.0005, "loss": 2.1207, "step": 68070 }, { "epoch": 0.2591292829792255, "grad_norm": 0.13147957623004913, "learning_rate": 0.0005, "loss": 2.1419, "step": 68080 }, { "epoch": 0.2591673454473482, "grad_norm": 0.12701791524887085, "learning_rate": 0.0005, "loss": 2.1306, "step": 68090 }, { "epoch": 0.25920540791547086, "grad_norm": 0.1303391009569168, "learning_rate": 0.0005, "loss": 2.1115, "step": 68100 }, { "epoch": 0.2592434703835936, "grad_norm": 0.29294759035110474, "learning_rate": 0.0005, "loss": 2.1133, "step": 68110 }, { "epoch": 0.25928153285171623, "grad_norm": 0.11597706377506256, "learning_rate": 0.0005, "loss": 2.1199, "step": 68120 }, { "epoch": 0.25931959531983895, "grad_norm": 0.1320647895336151, "learning_rate": 0.0005, "loss": 2.1307, "step": 68130 }, { "epoch": 0.2593576577879616, "grad_norm": 0.11850161850452423, "learning_rate": 0.0005, "loss": 2.1221, "step": 68140 }, { "epoch": 0.25939572025608426, "grad_norm": 0.12103667855262756, "learning_rate": 0.0005, "loss": 2.1249, "step": 68150 }, { "epoch": 0.259433782724207, "grad_norm": 0.12830379605293274, "learning_rate": 0.0005, "loss": 2.1478, "step": 68160 }, { "epoch": 0.25947184519232963, "grad_norm": 0.1356051117181778, "learning_rate": 0.0005, "loss": 2.1172, "step": 68170 }, { "epoch": 0.25950990766045234, "grad_norm": 0.11961928755044937, "learning_rate": 0.0005, "loss": 2.1211, "step": 68180 }, { "epoch": 0.259547970128575, "grad_norm": 0.12661005556583405, "learning_rate": 0.0005, "loss": 2.1149, "step": 68190 }, { "epoch": 0.2595860325966977, "grad_norm": 0.11801932007074356, "learning_rate": 0.0005, "loss": 2.1171, "step": 68200 }, { "epoch": 0.25962409506482037, "grad_norm": 0.11936034262180328, "learning_rate": 0.0005, "loss": 2.1265, "step": 68210 }, { "epoch": 0.2596621575329431, "grad_norm": 0.15334779024124146, "learning_rate": 0.0005, "loss": 2.1363, "step": 68220 }, { "epoch": 0.25970022000106574, "grad_norm": 0.12236060202121735, "learning_rate": 0.0005, "loss": 2.127, "step": 68230 }, { "epoch": 0.25973828246918845, "grad_norm": 0.11557623744010925, "learning_rate": 0.0005, "loss": 2.1359, "step": 68240 }, { "epoch": 0.2597763449373111, "grad_norm": 0.12068971246480942, "learning_rate": 0.0005, "loss": 2.1167, "step": 68250 }, { "epoch": 0.2598144074054338, "grad_norm": 0.1296335607767105, "learning_rate": 0.0005, "loss": 2.1293, "step": 68260 }, { "epoch": 0.2598524698735565, "grad_norm": 0.13720114529132843, "learning_rate": 0.0005, "loss": 2.1261, "step": 68270 }, { "epoch": 0.25989053234167914, "grad_norm": 0.13136669993400574, "learning_rate": 0.0005, "loss": 2.1073, "step": 68280 }, { "epoch": 0.25992859480980185, "grad_norm": 0.1193876564502716, "learning_rate": 0.0005, "loss": 2.1143, "step": 68290 }, { "epoch": 0.2599666572779245, "grad_norm": 0.11309941112995148, "learning_rate": 0.0005, "loss": 2.128, "step": 68300 }, { "epoch": 0.2600047197460472, "grad_norm": 0.14278921484947205, "learning_rate": 0.0005, "loss": 2.1387, "step": 68310 }, { "epoch": 0.2600427822141699, "grad_norm": 0.12418635934591293, "learning_rate": 0.0005, "loss": 2.1207, "step": 68320 }, { "epoch": 0.2600808446822926, "grad_norm": 0.12227904051542282, "learning_rate": 0.0005, "loss": 2.119, "step": 68330 }, { "epoch": 0.26011890715041525, "grad_norm": 0.12280824035406113, "learning_rate": 0.0005, "loss": 2.1307, "step": 68340 }, { "epoch": 0.26015696961853796, "grad_norm": 0.1240249052643776, "learning_rate": 0.0005, "loss": 2.1224, "step": 68350 }, { "epoch": 0.2601950320866606, "grad_norm": 0.13094522058963776, "learning_rate": 0.0005, "loss": 2.1235, "step": 68360 }, { "epoch": 0.26023309455478333, "grad_norm": 0.1325329691171646, "learning_rate": 0.0005, "loss": 2.1216, "step": 68370 }, { "epoch": 0.260271157022906, "grad_norm": 0.12583765387535095, "learning_rate": 0.0005, "loss": 2.132, "step": 68380 }, { "epoch": 0.2603092194910287, "grad_norm": 0.13313855230808258, "learning_rate": 0.0005, "loss": 2.1397, "step": 68390 }, { "epoch": 0.26034728195915136, "grad_norm": 0.11284197121858597, "learning_rate": 0.0005, "loss": 2.1392, "step": 68400 }, { "epoch": 0.26038534442727407, "grad_norm": 0.11301718652248383, "learning_rate": 0.0005, "loss": 2.1331, "step": 68410 }, { "epoch": 0.2604234068953967, "grad_norm": 0.12848864495754242, "learning_rate": 0.0005, "loss": 2.1267, "step": 68420 }, { "epoch": 0.2604614693635194, "grad_norm": 0.1173487976193428, "learning_rate": 0.0005, "loss": 2.1255, "step": 68430 }, { "epoch": 0.2604995318316421, "grad_norm": 0.12169645726680756, "learning_rate": 0.0005, "loss": 2.1214, "step": 68440 }, { "epoch": 0.26053759429976475, "grad_norm": 0.12304247915744781, "learning_rate": 0.0005, "loss": 2.1275, "step": 68450 }, { "epoch": 0.26057565676788746, "grad_norm": 0.12181452661752701, "learning_rate": 0.0005, "loss": 2.114, "step": 68460 }, { "epoch": 0.2606137192360101, "grad_norm": 0.1285940557718277, "learning_rate": 0.0005, "loss": 2.1354, "step": 68470 }, { "epoch": 0.26065178170413283, "grad_norm": 0.1419471800327301, "learning_rate": 0.0005, "loss": 2.1207, "step": 68480 }, { "epoch": 0.2606898441722555, "grad_norm": 0.1256123185157776, "learning_rate": 0.0005, "loss": 2.1213, "step": 68490 }, { "epoch": 0.2607279066403782, "grad_norm": 0.11510717868804932, "learning_rate": 0.0005, "loss": 2.1198, "step": 68500 }, { "epoch": 0.26076596910850086, "grad_norm": 0.12129577994346619, "learning_rate": 0.0005, "loss": 2.1293, "step": 68510 }, { "epoch": 0.2608040315766236, "grad_norm": 0.13086970150470734, "learning_rate": 0.0005, "loss": 2.1299, "step": 68520 }, { "epoch": 0.26084209404474623, "grad_norm": 0.13211217522621155, "learning_rate": 0.0005, "loss": 2.1409, "step": 68530 }, { "epoch": 0.26088015651286894, "grad_norm": 0.13078242540359497, "learning_rate": 0.0005, "loss": 2.1186, "step": 68540 }, { "epoch": 0.2609182189809916, "grad_norm": 0.11888111382722855, "learning_rate": 0.0005, "loss": 2.1617, "step": 68550 }, { "epoch": 0.2609562814491143, "grad_norm": 0.14845970273017883, "learning_rate": 0.0005, "loss": 2.1259, "step": 68560 }, { "epoch": 0.26099434391723697, "grad_norm": 0.12593623995780945, "learning_rate": 0.0005, "loss": 2.1302, "step": 68570 }, { "epoch": 0.2610324063853596, "grad_norm": 0.1280219405889511, "learning_rate": 0.0005, "loss": 2.1329, "step": 68580 }, { "epoch": 0.26107046885348234, "grad_norm": 0.12807269394397736, "learning_rate": 0.0005, "loss": 2.1245, "step": 68590 }, { "epoch": 0.261108531321605, "grad_norm": 0.12324277311563492, "learning_rate": 0.0005, "loss": 2.1455, "step": 68600 }, { "epoch": 0.2611465937897277, "grad_norm": 0.13317665457725525, "learning_rate": 0.0005, "loss": 2.1265, "step": 68610 }, { "epoch": 0.26118465625785037, "grad_norm": 0.12222850322723389, "learning_rate": 0.0005, "loss": 2.1238, "step": 68620 }, { "epoch": 0.2612227187259731, "grad_norm": 0.12694650888442993, "learning_rate": 0.0005, "loss": 2.1216, "step": 68630 }, { "epoch": 0.26126078119409574, "grad_norm": 0.12431015074253082, "learning_rate": 0.0005, "loss": 2.1136, "step": 68640 }, { "epoch": 0.26129884366221845, "grad_norm": 0.12812888622283936, "learning_rate": 0.0005, "loss": 2.1224, "step": 68650 }, { "epoch": 0.2613369061303411, "grad_norm": 0.13000817596912384, "learning_rate": 0.0005, "loss": 2.1321, "step": 68660 }, { "epoch": 0.2613749685984638, "grad_norm": 0.1278466284275055, "learning_rate": 0.0005, "loss": 2.1276, "step": 68670 }, { "epoch": 0.2614130310665865, "grad_norm": 0.1243181899189949, "learning_rate": 0.0005, "loss": 2.1285, "step": 68680 }, { "epoch": 0.2614510935347092, "grad_norm": 0.11554887890815735, "learning_rate": 0.0005, "loss": 2.1271, "step": 68690 }, { "epoch": 0.26148915600283185, "grad_norm": 0.12728217244148254, "learning_rate": 0.0005, "loss": 2.1109, "step": 68700 }, { "epoch": 0.2615272184709545, "grad_norm": 0.11840543150901794, "learning_rate": 0.0005, "loss": 2.1371, "step": 68710 }, { "epoch": 0.2615652809390772, "grad_norm": 0.12170374393463135, "learning_rate": 0.0005, "loss": 2.1369, "step": 68720 }, { "epoch": 0.2616033434071999, "grad_norm": 0.1255207061767578, "learning_rate": 0.0005, "loss": 2.1254, "step": 68730 }, { "epoch": 0.2616414058753226, "grad_norm": 0.11496064066886902, "learning_rate": 0.0005, "loss": 2.1279, "step": 68740 }, { "epoch": 0.26167946834344524, "grad_norm": 0.11680759489536285, "learning_rate": 0.0005, "loss": 2.1215, "step": 68750 }, { "epoch": 0.26171753081156796, "grad_norm": 0.14723053574562073, "learning_rate": 0.0005, "loss": 2.1476, "step": 68760 }, { "epoch": 0.2617555932796906, "grad_norm": 0.11675665527582169, "learning_rate": 0.0005, "loss": 2.118, "step": 68770 }, { "epoch": 0.2617936557478133, "grad_norm": 0.11647092550992966, "learning_rate": 0.0005, "loss": 2.1407, "step": 68780 }, { "epoch": 0.261831718215936, "grad_norm": 0.12418477982282639, "learning_rate": 0.0005, "loss": 2.1325, "step": 68790 }, { "epoch": 0.2618697806840587, "grad_norm": 0.12684816122055054, "learning_rate": 0.0005, "loss": 2.1295, "step": 68800 }, { "epoch": 0.26190784315218135, "grad_norm": 0.12460498511791229, "learning_rate": 0.0005, "loss": 2.1189, "step": 68810 }, { "epoch": 0.26194590562030406, "grad_norm": 0.11982579529285431, "learning_rate": 0.0005, "loss": 2.1312, "step": 68820 }, { "epoch": 0.2619839680884267, "grad_norm": 0.12077322602272034, "learning_rate": 0.0005, "loss": 2.1215, "step": 68830 }, { "epoch": 0.26202203055654943, "grad_norm": 0.1211499571800232, "learning_rate": 0.0005, "loss": 2.115, "step": 68840 }, { "epoch": 0.2620600930246721, "grad_norm": 0.12279724329710007, "learning_rate": 0.0005, "loss": 2.1258, "step": 68850 }, { "epoch": 0.26209815549279475, "grad_norm": 0.14473022520542145, "learning_rate": 0.0005, "loss": 2.1289, "step": 68860 }, { "epoch": 0.26213621796091746, "grad_norm": 0.12201548367738724, "learning_rate": 0.0005, "loss": 2.1242, "step": 68870 }, { "epoch": 0.2621742804290401, "grad_norm": 0.15855537354946136, "learning_rate": 0.0005, "loss": 2.1391, "step": 68880 }, { "epoch": 0.26221234289716283, "grad_norm": 0.1251879632472992, "learning_rate": 0.0005, "loss": 2.1176, "step": 68890 }, { "epoch": 0.2622504053652855, "grad_norm": 0.15588484704494476, "learning_rate": 0.0005, "loss": 2.1311, "step": 68900 }, { "epoch": 0.2622884678334082, "grad_norm": 0.11355319619178772, "learning_rate": 0.0005, "loss": 2.1166, "step": 68910 }, { "epoch": 0.26232653030153086, "grad_norm": 0.12741701304912567, "learning_rate": 0.0005, "loss": 2.1237, "step": 68920 }, { "epoch": 0.26236459276965357, "grad_norm": 0.12800206243991852, "learning_rate": 0.0005, "loss": 2.1344, "step": 68930 }, { "epoch": 0.26240265523777623, "grad_norm": 0.12004125118255615, "learning_rate": 0.0005, "loss": 2.141, "step": 68940 }, { "epoch": 0.26244071770589894, "grad_norm": 0.14113670587539673, "learning_rate": 0.0005, "loss": 2.1277, "step": 68950 }, { "epoch": 0.2624787801740216, "grad_norm": 0.1426374763250351, "learning_rate": 0.0005, "loss": 2.1305, "step": 68960 }, { "epoch": 0.2625168426421443, "grad_norm": 0.12280486524105072, "learning_rate": 0.0005, "loss": 2.1318, "step": 68970 }, { "epoch": 0.26255490511026697, "grad_norm": 0.13465885818004608, "learning_rate": 0.0005, "loss": 2.14, "step": 68980 }, { "epoch": 0.2625929675783897, "grad_norm": 0.13163095712661743, "learning_rate": 0.0005, "loss": 2.1416, "step": 68990 }, { "epoch": 0.26263103004651234, "grad_norm": 0.18279962241649628, "learning_rate": 0.0005, "loss": 2.1295, "step": 69000 }, { "epoch": 0.262669092514635, "grad_norm": 0.11554229259490967, "learning_rate": 0.0005, "loss": 2.1213, "step": 69010 }, { "epoch": 0.2627071549827577, "grad_norm": 0.12765353918075562, "learning_rate": 0.0005, "loss": 2.1373, "step": 69020 }, { "epoch": 0.26274521745088036, "grad_norm": 0.11855091154575348, "learning_rate": 0.0005, "loss": 2.1184, "step": 69030 }, { "epoch": 0.2627832799190031, "grad_norm": 0.13786230981349945, "learning_rate": 0.0005, "loss": 2.1333, "step": 69040 }, { "epoch": 0.26282134238712573, "grad_norm": 0.11385150998830795, "learning_rate": 0.0005, "loss": 2.1228, "step": 69050 }, { "epoch": 0.26285940485524845, "grad_norm": 0.11940497159957886, "learning_rate": 0.0005, "loss": 2.1478, "step": 69060 }, { "epoch": 0.2628974673233711, "grad_norm": 0.12230822443962097, "learning_rate": 0.0005, "loss": 2.118, "step": 69070 }, { "epoch": 0.2629355297914938, "grad_norm": 0.12506653368473053, "learning_rate": 0.0005, "loss": 2.1287, "step": 69080 }, { "epoch": 0.2629735922596165, "grad_norm": 0.12588395178318024, "learning_rate": 0.0005, "loss": 2.1466, "step": 69090 }, { "epoch": 0.2630116547277392, "grad_norm": 0.12764878571033478, "learning_rate": 0.0005, "loss": 2.1312, "step": 69100 }, { "epoch": 0.26304971719586184, "grad_norm": 0.15151946246623993, "learning_rate": 0.0005, "loss": 2.1187, "step": 69110 }, { "epoch": 0.26308777966398456, "grad_norm": 0.1337091028690338, "learning_rate": 0.0005, "loss": 2.1171, "step": 69120 }, { "epoch": 0.2631258421321072, "grad_norm": 0.11346838623285294, "learning_rate": 0.0005, "loss": 2.1262, "step": 69130 }, { "epoch": 0.26316390460022987, "grad_norm": 0.11807180941104889, "learning_rate": 0.0005, "loss": 2.1241, "step": 69140 }, { "epoch": 0.2632019670683526, "grad_norm": 0.12256387621164322, "learning_rate": 0.0005, "loss": 2.1268, "step": 69150 }, { "epoch": 0.26324002953647524, "grad_norm": 0.11840704083442688, "learning_rate": 0.0005, "loss": 2.1239, "step": 69160 }, { "epoch": 0.26327809200459795, "grad_norm": 0.12220650166273117, "learning_rate": 0.0005, "loss": 2.128, "step": 69170 }, { "epoch": 0.2633161544727206, "grad_norm": 0.13918541371822357, "learning_rate": 0.0005, "loss": 2.1433, "step": 69180 }, { "epoch": 0.2633542169408433, "grad_norm": 0.11918140202760696, "learning_rate": 0.0005, "loss": 2.1147, "step": 69190 }, { "epoch": 0.263392279408966, "grad_norm": 0.12479212880134583, "learning_rate": 0.0005, "loss": 2.1263, "step": 69200 }, { "epoch": 0.2634303418770887, "grad_norm": 0.13087302446365356, "learning_rate": 0.0005, "loss": 2.1299, "step": 69210 }, { "epoch": 0.26346840434521135, "grad_norm": 0.1416410207748413, "learning_rate": 0.0005, "loss": 2.1242, "step": 69220 }, { "epoch": 0.26350646681333406, "grad_norm": 0.12037502229213715, "learning_rate": 0.0005, "loss": 2.1178, "step": 69230 }, { "epoch": 0.2635445292814567, "grad_norm": 0.13915680348873138, "learning_rate": 0.0005, "loss": 2.1232, "step": 69240 }, { "epoch": 0.26358259174957943, "grad_norm": 0.14051872491836548, "learning_rate": 0.0005, "loss": 2.1349, "step": 69250 }, { "epoch": 0.2636206542177021, "grad_norm": 0.12612906098365784, "learning_rate": 0.0005, "loss": 2.1431, "step": 69260 }, { "epoch": 0.2636587166858248, "grad_norm": 0.13273148238658905, "learning_rate": 0.0005, "loss": 2.1225, "step": 69270 }, { "epoch": 0.26369677915394746, "grad_norm": 0.13362795114517212, "learning_rate": 0.0005, "loss": 2.1114, "step": 69280 }, { "epoch": 0.2637348416220701, "grad_norm": 0.12257708609104156, "learning_rate": 0.0005, "loss": 2.124, "step": 69290 }, { "epoch": 0.26377290409019283, "grad_norm": 0.12788087129592896, "learning_rate": 0.0005, "loss": 2.1279, "step": 69300 }, { "epoch": 0.2638109665583155, "grad_norm": 0.11019985377788544, "learning_rate": 0.0005, "loss": 2.1274, "step": 69310 }, { "epoch": 0.2638490290264382, "grad_norm": 0.12560158967971802, "learning_rate": 0.0005, "loss": 2.1192, "step": 69320 }, { "epoch": 0.26388709149456085, "grad_norm": 0.12579011917114258, "learning_rate": 0.0005, "loss": 2.1131, "step": 69330 }, { "epoch": 0.26392515396268357, "grad_norm": 0.1318081021308899, "learning_rate": 0.0005, "loss": 2.1424, "step": 69340 }, { "epoch": 0.2639632164308062, "grad_norm": 0.12909837067127228, "learning_rate": 0.0005, "loss": 2.1297, "step": 69350 }, { "epoch": 0.26400127889892894, "grad_norm": 0.12274816632270813, "learning_rate": 0.0005, "loss": 2.1252, "step": 69360 }, { "epoch": 0.2640393413670516, "grad_norm": 0.11799930781126022, "learning_rate": 0.0005, "loss": 2.1255, "step": 69370 }, { "epoch": 0.2640774038351743, "grad_norm": 0.13099320232868195, "learning_rate": 0.0005, "loss": 2.1266, "step": 69380 }, { "epoch": 0.26411546630329696, "grad_norm": 0.118326835334301, "learning_rate": 0.0005, "loss": 2.1381, "step": 69390 }, { "epoch": 0.2641535287714197, "grad_norm": 0.12781855463981628, "learning_rate": 0.0005, "loss": 2.1315, "step": 69400 }, { "epoch": 0.26419159123954233, "grad_norm": 0.1174471378326416, "learning_rate": 0.0005, "loss": 2.1304, "step": 69410 }, { "epoch": 0.26422965370766505, "grad_norm": 0.12137634307146072, "learning_rate": 0.0005, "loss": 2.1309, "step": 69420 }, { "epoch": 0.2642677161757877, "grad_norm": 0.12340341508388519, "learning_rate": 0.0005, "loss": 2.1362, "step": 69430 }, { "epoch": 0.26430577864391036, "grad_norm": 0.14514338970184326, "learning_rate": 0.0005, "loss": 2.1259, "step": 69440 }, { "epoch": 0.2643438411120331, "grad_norm": 0.13454660773277283, "learning_rate": 0.0005, "loss": 2.1275, "step": 69450 }, { "epoch": 0.26438190358015573, "grad_norm": 0.13224738836288452, "learning_rate": 0.0005, "loss": 2.1187, "step": 69460 }, { "epoch": 0.26441996604827844, "grad_norm": 0.11716333031654358, "learning_rate": 0.0005, "loss": 2.1306, "step": 69470 }, { "epoch": 0.2644580285164011, "grad_norm": 0.12512919306755066, "learning_rate": 0.0005, "loss": 2.1312, "step": 69480 }, { "epoch": 0.2644960909845238, "grad_norm": 0.12434687465429306, "learning_rate": 0.0005, "loss": 2.121, "step": 69490 }, { "epoch": 0.26453415345264647, "grad_norm": 0.12177757173776627, "learning_rate": 0.0005, "loss": 2.121, "step": 69500 }, { "epoch": 0.2645722159207692, "grad_norm": 0.1328718662261963, "learning_rate": 0.0005, "loss": 2.1158, "step": 69510 }, { "epoch": 0.26461027838889184, "grad_norm": 0.12692506611347198, "learning_rate": 0.0005, "loss": 2.1291, "step": 69520 }, { "epoch": 0.26464834085701455, "grad_norm": 0.1363808661699295, "learning_rate": 0.0005, "loss": 2.1187, "step": 69530 }, { "epoch": 0.2646864033251372, "grad_norm": 0.1261531114578247, "learning_rate": 0.0005, "loss": 2.126, "step": 69540 }, { "epoch": 0.2647244657932599, "grad_norm": 0.11819518357515335, "learning_rate": 0.0005, "loss": 2.1388, "step": 69550 }, { "epoch": 0.2647625282613826, "grad_norm": 0.12026696652173996, "learning_rate": 0.0005, "loss": 2.1226, "step": 69560 }, { "epoch": 0.26480059072950524, "grad_norm": 0.12217105180025101, "learning_rate": 0.0005, "loss": 2.1265, "step": 69570 }, { "epoch": 0.26483865319762795, "grad_norm": 0.11567086726427078, "learning_rate": 0.0005, "loss": 2.1221, "step": 69580 }, { "epoch": 0.2648767156657506, "grad_norm": 0.1255672425031662, "learning_rate": 0.0005, "loss": 2.1288, "step": 69590 }, { "epoch": 0.2649147781338733, "grad_norm": 0.14222608506679535, "learning_rate": 0.0005, "loss": 2.1267, "step": 69600 }, { "epoch": 0.264952840601996, "grad_norm": 0.11926767975091934, "learning_rate": 0.0005, "loss": 2.1177, "step": 69610 }, { "epoch": 0.2649909030701187, "grad_norm": 0.11993763595819473, "learning_rate": 0.0005, "loss": 2.1263, "step": 69620 }, { "epoch": 0.26502896553824135, "grad_norm": 0.13194534182548523, "learning_rate": 0.0005, "loss": 2.1381, "step": 69630 }, { "epoch": 0.26506702800636406, "grad_norm": 0.11919394135475159, "learning_rate": 0.0005, "loss": 2.1297, "step": 69640 }, { "epoch": 0.2651050904744867, "grad_norm": 0.1282067596912384, "learning_rate": 0.0005, "loss": 2.1305, "step": 69650 }, { "epoch": 0.26514315294260943, "grad_norm": 0.12731339037418365, "learning_rate": 0.0005, "loss": 2.1398, "step": 69660 }, { "epoch": 0.2651812154107321, "grad_norm": 0.1202862411737442, "learning_rate": 0.0005, "loss": 2.1179, "step": 69670 }, { "epoch": 0.2652192778788548, "grad_norm": 0.13804534077644348, "learning_rate": 0.0005, "loss": 2.1272, "step": 69680 }, { "epoch": 0.26525734034697745, "grad_norm": 0.12254875898361206, "learning_rate": 0.0005, "loss": 2.1174, "step": 69690 }, { "epoch": 0.26529540281510017, "grad_norm": 0.13759438693523407, "learning_rate": 0.0005, "loss": 2.117, "step": 69700 }, { "epoch": 0.2653334652832228, "grad_norm": 0.1366581916809082, "learning_rate": 0.0005, "loss": 2.1241, "step": 69710 }, { "epoch": 0.2653715277513455, "grad_norm": 0.1233244389295578, "learning_rate": 0.0005, "loss": 2.114, "step": 69720 }, { "epoch": 0.2654095902194682, "grad_norm": 0.13405732810497284, "learning_rate": 0.0005, "loss": 2.1265, "step": 69730 }, { "epoch": 0.26544765268759085, "grad_norm": 0.11756180226802826, "learning_rate": 0.0005, "loss": 2.1178, "step": 69740 }, { "epoch": 0.26548571515571356, "grad_norm": 0.1275683045387268, "learning_rate": 0.0005, "loss": 2.1264, "step": 69750 }, { "epoch": 0.2655237776238362, "grad_norm": 0.1181812584400177, "learning_rate": 0.0005, "loss": 2.1385, "step": 69760 }, { "epoch": 0.26556184009195893, "grad_norm": 0.12255300581455231, "learning_rate": 0.0005, "loss": 2.1326, "step": 69770 }, { "epoch": 0.2655999025600816, "grad_norm": 0.12513133883476257, "learning_rate": 0.0005, "loss": 2.1404, "step": 69780 }, { "epoch": 0.2656379650282043, "grad_norm": 0.13193869590759277, "learning_rate": 0.0005, "loss": 2.1212, "step": 69790 }, { "epoch": 0.26567602749632696, "grad_norm": 0.12380549311637878, "learning_rate": 0.0005, "loss": 2.108, "step": 69800 }, { "epoch": 0.2657140899644497, "grad_norm": 0.15247705578804016, "learning_rate": 0.0005, "loss": 2.1261, "step": 69810 }, { "epoch": 0.26575215243257233, "grad_norm": 0.14371612668037415, "learning_rate": 0.0005, "loss": 2.1325, "step": 69820 }, { "epoch": 0.26579021490069504, "grad_norm": 0.11494036763906479, "learning_rate": 0.0005, "loss": 2.1316, "step": 69830 }, { "epoch": 0.2658282773688177, "grad_norm": 0.12889176607131958, "learning_rate": 0.0005, "loss": 2.1192, "step": 69840 }, { "epoch": 0.2658663398369404, "grad_norm": 0.1366003006696701, "learning_rate": 0.0005, "loss": 2.1064, "step": 69850 }, { "epoch": 0.26590440230506307, "grad_norm": 0.1334492415189743, "learning_rate": 0.0005, "loss": 2.1171, "step": 69860 }, { "epoch": 0.2659424647731857, "grad_norm": 0.12129637598991394, "learning_rate": 0.0005, "loss": 2.1445, "step": 69870 }, { "epoch": 0.26598052724130844, "grad_norm": 0.12232821434736252, "learning_rate": 0.0005, "loss": 2.1332, "step": 69880 }, { "epoch": 0.2660185897094311, "grad_norm": 0.14795225858688354, "learning_rate": 0.0005, "loss": 2.1257, "step": 69890 }, { "epoch": 0.2660566521775538, "grad_norm": 0.13824805617332458, "learning_rate": 0.0005, "loss": 2.1183, "step": 69900 }, { "epoch": 0.26609471464567647, "grad_norm": 0.1196480542421341, "learning_rate": 0.0005, "loss": 2.1383, "step": 69910 }, { "epoch": 0.2661327771137992, "grad_norm": 0.12577514350414276, "learning_rate": 0.0005, "loss": 2.1267, "step": 69920 }, { "epoch": 0.26617083958192184, "grad_norm": 0.1174229308962822, "learning_rate": 0.0005, "loss": 2.1209, "step": 69930 }, { "epoch": 0.26620890205004455, "grad_norm": 0.12444499135017395, "learning_rate": 0.0005, "loss": 2.1317, "step": 69940 }, { "epoch": 0.2662469645181672, "grad_norm": 0.1242179125547409, "learning_rate": 0.0005, "loss": 2.1309, "step": 69950 }, { "epoch": 0.2662850269862899, "grad_norm": 0.14606353640556335, "learning_rate": 0.0005, "loss": 2.1179, "step": 69960 }, { "epoch": 0.2663230894544126, "grad_norm": 0.11545176059007645, "learning_rate": 0.0005, "loss": 2.1407, "step": 69970 }, { "epoch": 0.2663611519225353, "grad_norm": 0.12576058506965637, "learning_rate": 0.0005, "loss": 2.1163, "step": 69980 }, { "epoch": 0.26639921439065795, "grad_norm": 0.12525852024555206, "learning_rate": 0.0005, "loss": 2.1286, "step": 69990 }, { "epoch": 0.26643727685878066, "grad_norm": 0.12224716693162918, "learning_rate": 0.0005, "loss": 2.1262, "step": 70000 }, { "epoch": 0.2664753393269033, "grad_norm": 0.13207238912582397, "learning_rate": 0.0005, "loss": 2.122, "step": 70010 }, { "epoch": 0.266513401795026, "grad_norm": 0.1505841761827469, "learning_rate": 0.0005, "loss": 2.1304, "step": 70020 }, { "epoch": 0.2665514642631487, "grad_norm": 0.14168912172317505, "learning_rate": 0.0005, "loss": 2.1296, "step": 70030 }, { "epoch": 0.26658952673127134, "grad_norm": 0.12297537177801132, "learning_rate": 0.0005, "loss": 2.1205, "step": 70040 }, { "epoch": 0.26662758919939405, "grad_norm": 0.14183282852172852, "learning_rate": 0.0005, "loss": 2.1345, "step": 70050 }, { "epoch": 0.2666656516675167, "grad_norm": 0.1262883096933365, "learning_rate": 0.0005, "loss": 2.125, "step": 70060 }, { "epoch": 0.2667037141356394, "grad_norm": 0.13181641697883606, "learning_rate": 0.0005, "loss": 2.1246, "step": 70070 }, { "epoch": 0.2667417766037621, "grad_norm": 0.12226633727550507, "learning_rate": 0.0005, "loss": 2.1273, "step": 70080 }, { "epoch": 0.2667798390718848, "grad_norm": 0.12573277950286865, "learning_rate": 0.0005, "loss": 2.1365, "step": 70090 }, { "epoch": 0.26681790154000745, "grad_norm": 0.13281308114528656, "learning_rate": 0.0005, "loss": 2.1259, "step": 70100 }, { "epoch": 0.26685596400813016, "grad_norm": 0.12960529327392578, "learning_rate": 0.0005, "loss": 2.1175, "step": 70110 }, { "epoch": 0.2668940264762528, "grad_norm": 0.11388522386550903, "learning_rate": 0.0005, "loss": 2.1196, "step": 70120 }, { "epoch": 0.26693208894437553, "grad_norm": 0.12484312802553177, "learning_rate": 0.0005, "loss": 2.1321, "step": 70130 }, { "epoch": 0.2669701514124982, "grad_norm": 0.11258337646722794, "learning_rate": 0.0005, "loss": 2.1298, "step": 70140 }, { "epoch": 0.26700821388062085, "grad_norm": 0.11932699382305145, "learning_rate": 0.0005, "loss": 2.1174, "step": 70150 }, { "epoch": 0.26704627634874356, "grad_norm": 0.12339174002408981, "learning_rate": 0.0005, "loss": 2.1374, "step": 70160 }, { "epoch": 0.2670843388168662, "grad_norm": 0.12048283219337463, "learning_rate": 0.0005, "loss": 2.1322, "step": 70170 }, { "epoch": 0.26712240128498893, "grad_norm": 0.11754105240106583, "learning_rate": 0.0005, "loss": 2.1297, "step": 70180 }, { "epoch": 0.2671604637531116, "grad_norm": 0.12378121167421341, "learning_rate": 0.0005, "loss": 2.1312, "step": 70190 }, { "epoch": 0.2671985262212343, "grad_norm": 0.12185100466012955, "learning_rate": 0.0005, "loss": 2.13, "step": 70200 }, { "epoch": 0.26723658868935696, "grad_norm": 0.13541337847709656, "learning_rate": 0.0005, "loss": 2.1235, "step": 70210 }, { "epoch": 0.26727465115747967, "grad_norm": 0.12544508278369904, "learning_rate": 0.0005, "loss": 2.1436, "step": 70220 }, { "epoch": 0.2673127136256023, "grad_norm": 0.12439633905887604, "learning_rate": 0.0005, "loss": 2.1444, "step": 70230 }, { "epoch": 0.26735077609372504, "grad_norm": 0.13282085955142975, "learning_rate": 0.0005, "loss": 2.1376, "step": 70240 }, { "epoch": 0.2673888385618477, "grad_norm": 0.11832325905561447, "learning_rate": 0.0005, "loss": 2.1196, "step": 70250 }, { "epoch": 0.2674269010299704, "grad_norm": 0.12853141129016876, "learning_rate": 0.0005, "loss": 2.1163, "step": 70260 }, { "epoch": 0.26746496349809307, "grad_norm": 0.11417865008115768, "learning_rate": 0.0005, "loss": 2.1148, "step": 70270 }, { "epoch": 0.2675030259662158, "grad_norm": 0.12991313636302948, "learning_rate": 0.0005, "loss": 2.1189, "step": 70280 }, { "epoch": 0.26754108843433844, "grad_norm": 0.12928979098796844, "learning_rate": 0.0005, "loss": 2.1314, "step": 70290 }, { "epoch": 0.2675791509024611, "grad_norm": 0.13762488961219788, "learning_rate": 0.0005, "loss": 2.112, "step": 70300 }, { "epoch": 0.2676172133705838, "grad_norm": 0.14076592028141022, "learning_rate": 0.0005, "loss": 2.1165, "step": 70310 }, { "epoch": 0.26765527583870646, "grad_norm": 0.136482372879982, "learning_rate": 0.0005, "loss": 2.1203, "step": 70320 }, { "epoch": 0.2676933383068292, "grad_norm": 0.13098162412643433, "learning_rate": 0.0005, "loss": 2.1248, "step": 70330 }, { "epoch": 0.26773140077495183, "grad_norm": 0.10979632288217545, "learning_rate": 0.0005, "loss": 2.1342, "step": 70340 }, { "epoch": 0.26776946324307455, "grad_norm": 0.1365809589624405, "learning_rate": 0.0005, "loss": 2.1436, "step": 70350 }, { "epoch": 0.2678075257111972, "grad_norm": 0.12630967795848846, "learning_rate": 0.0005, "loss": 2.1282, "step": 70360 }, { "epoch": 0.2678455881793199, "grad_norm": 0.12642823159694672, "learning_rate": 0.0005, "loss": 2.1377, "step": 70370 }, { "epoch": 0.2678836506474426, "grad_norm": 0.12485835701227188, "learning_rate": 0.0005, "loss": 2.1428, "step": 70380 }, { "epoch": 0.2679217131155653, "grad_norm": 0.12378615885972977, "learning_rate": 0.0005, "loss": 2.125, "step": 70390 }, { "epoch": 0.26795977558368794, "grad_norm": 0.11807256191968918, "learning_rate": 0.0005, "loss": 2.1292, "step": 70400 }, { "epoch": 0.26799783805181066, "grad_norm": 0.1206602081656456, "learning_rate": 0.0005, "loss": 2.1218, "step": 70410 }, { "epoch": 0.2680359005199333, "grad_norm": 0.13767731189727783, "learning_rate": 0.0005, "loss": 2.1266, "step": 70420 }, { "epoch": 0.268073962988056, "grad_norm": 0.11438094079494476, "learning_rate": 0.0005, "loss": 2.1332, "step": 70430 }, { "epoch": 0.2681120254561787, "grad_norm": 0.15731793642044067, "learning_rate": 0.0005, "loss": 2.105, "step": 70440 }, { "epoch": 0.26815008792430134, "grad_norm": 0.13722564280033112, "learning_rate": 0.0005, "loss": 2.1201, "step": 70450 }, { "epoch": 0.26818815039242405, "grad_norm": 0.12116382271051407, "learning_rate": 0.0005, "loss": 2.1163, "step": 70460 }, { "epoch": 0.2682262128605467, "grad_norm": 0.125070720911026, "learning_rate": 0.0005, "loss": 2.1402, "step": 70470 }, { "epoch": 0.2682642753286694, "grad_norm": 0.13829511404037476, "learning_rate": 0.0005, "loss": 2.1321, "step": 70480 }, { "epoch": 0.2683023377967921, "grad_norm": 0.1141280084848404, "learning_rate": 0.0005, "loss": 2.112, "step": 70490 }, { "epoch": 0.2683404002649148, "grad_norm": 0.17980065941810608, "learning_rate": 0.0005, "loss": 2.122, "step": 70500 }, { "epoch": 0.26837846273303745, "grad_norm": 0.14300209283828735, "learning_rate": 0.0005, "loss": 2.1207, "step": 70510 }, { "epoch": 0.26841652520116016, "grad_norm": 0.15837660431861877, "learning_rate": 0.0005, "loss": 2.1264, "step": 70520 }, { "epoch": 0.2684545876692828, "grad_norm": 0.12317508459091187, "learning_rate": 0.0005, "loss": 2.1442, "step": 70530 }, { "epoch": 0.26849265013740553, "grad_norm": 0.11688899248838425, "learning_rate": 0.0005, "loss": 2.1369, "step": 70540 }, { "epoch": 0.2685307126055282, "grad_norm": 0.12119658291339874, "learning_rate": 0.0005, "loss": 2.1266, "step": 70550 }, { "epoch": 0.2685687750736509, "grad_norm": 0.1074419841170311, "learning_rate": 0.0005, "loss": 2.1247, "step": 70560 }, { "epoch": 0.26860683754177356, "grad_norm": 0.12809164822101593, "learning_rate": 0.0005, "loss": 2.1258, "step": 70570 }, { "epoch": 0.2686449000098962, "grad_norm": 0.13653728365898132, "learning_rate": 0.0005, "loss": 2.1307, "step": 70580 }, { "epoch": 0.2686829624780189, "grad_norm": 0.13394500315189362, "learning_rate": 0.0005, "loss": 2.1314, "step": 70590 }, { "epoch": 0.2687210249461416, "grad_norm": 0.1318061202764511, "learning_rate": 0.0005, "loss": 2.1448, "step": 70600 }, { "epoch": 0.2687590874142643, "grad_norm": 0.24545541405677795, "learning_rate": 0.0005, "loss": 2.1109, "step": 70610 }, { "epoch": 0.26879714988238695, "grad_norm": 0.12553299963474274, "learning_rate": 0.0005, "loss": 2.1075, "step": 70620 }, { "epoch": 0.26883521235050967, "grad_norm": 0.12328213453292847, "learning_rate": 0.0005, "loss": 2.1148, "step": 70630 }, { "epoch": 0.2688732748186323, "grad_norm": 0.125651016831398, "learning_rate": 0.0005, "loss": 2.1315, "step": 70640 }, { "epoch": 0.26891133728675504, "grad_norm": 0.13269735872745514, "learning_rate": 0.0005, "loss": 2.1183, "step": 70650 }, { "epoch": 0.2689493997548777, "grad_norm": 0.12147653847932816, "learning_rate": 0.0005, "loss": 2.1249, "step": 70660 }, { "epoch": 0.2689874622230004, "grad_norm": 0.12299970537424088, "learning_rate": 0.0005, "loss": 2.1144, "step": 70670 }, { "epoch": 0.26902552469112306, "grad_norm": 0.12026827782392502, "learning_rate": 0.0005, "loss": 2.1154, "step": 70680 }, { "epoch": 0.2690635871592458, "grad_norm": 0.13033393025398254, "learning_rate": 0.0005, "loss": 2.1243, "step": 70690 }, { "epoch": 0.26910164962736843, "grad_norm": 0.1141572818160057, "learning_rate": 0.0005, "loss": 2.1316, "step": 70700 }, { "epoch": 0.26913971209549115, "grad_norm": 0.11448405683040619, "learning_rate": 0.0005, "loss": 2.128, "step": 70710 }, { "epoch": 0.2691777745636138, "grad_norm": 0.1274835169315338, "learning_rate": 0.0005, "loss": 2.1445, "step": 70720 }, { "epoch": 0.26921583703173646, "grad_norm": 0.12299812585115433, "learning_rate": 0.0005, "loss": 2.1173, "step": 70730 }, { "epoch": 0.2692538994998592, "grad_norm": 0.11992338299751282, "learning_rate": 0.0005, "loss": 2.1309, "step": 70740 }, { "epoch": 0.26929196196798183, "grad_norm": 0.11715205758810043, "learning_rate": 0.0005, "loss": 2.1299, "step": 70750 }, { "epoch": 0.26933002443610454, "grad_norm": 0.1122170016169548, "learning_rate": 0.0005, "loss": 2.1209, "step": 70760 }, { "epoch": 0.2693680869042272, "grad_norm": 0.1312478929758072, "learning_rate": 0.0005, "loss": 2.1177, "step": 70770 }, { "epoch": 0.2694061493723499, "grad_norm": 0.12616901099681854, "learning_rate": 0.0005, "loss": 2.1056, "step": 70780 }, { "epoch": 0.26944421184047257, "grad_norm": 0.12851281464099884, "learning_rate": 0.0005, "loss": 2.1065, "step": 70790 }, { "epoch": 0.2694822743085953, "grad_norm": 0.11669650673866272, "learning_rate": 0.0005, "loss": 2.1243, "step": 70800 }, { "epoch": 0.26952033677671794, "grad_norm": 0.12413670867681503, "learning_rate": 0.0005, "loss": 2.1383, "step": 70810 }, { "epoch": 0.26955839924484065, "grad_norm": 0.12353887408971786, "learning_rate": 0.0005, "loss": 2.1351, "step": 70820 }, { "epoch": 0.2695964617129633, "grad_norm": 0.12555578351020813, "learning_rate": 0.0005, "loss": 2.1167, "step": 70830 }, { "epoch": 0.269634524181086, "grad_norm": 0.12032879889011383, "learning_rate": 0.0005, "loss": 2.1162, "step": 70840 }, { "epoch": 0.2696725866492087, "grad_norm": 0.12755469977855682, "learning_rate": 0.0005, "loss": 2.1034, "step": 70850 }, { "epoch": 0.2697106491173314, "grad_norm": 0.12309958040714264, "learning_rate": 0.0005, "loss": 2.1114, "step": 70860 }, { "epoch": 0.26974871158545405, "grad_norm": 0.1292559802532196, "learning_rate": 0.0005, "loss": 2.1235, "step": 70870 }, { "epoch": 0.2697867740535767, "grad_norm": 0.11725734174251556, "learning_rate": 0.0005, "loss": 2.1297, "step": 70880 }, { "epoch": 0.2698248365216994, "grad_norm": 0.12600664794445038, "learning_rate": 0.0005, "loss": 2.1154, "step": 70890 }, { "epoch": 0.2698628989898221, "grad_norm": 0.1374066025018692, "learning_rate": 0.0005, "loss": 2.1304, "step": 70900 }, { "epoch": 0.2699009614579448, "grad_norm": 0.13278096914291382, "learning_rate": 0.0005, "loss": 2.1146, "step": 70910 }, { "epoch": 0.26993902392606745, "grad_norm": 0.125087708234787, "learning_rate": 0.0005, "loss": 2.1214, "step": 70920 }, { "epoch": 0.26997708639419016, "grad_norm": 0.13760273158550262, "learning_rate": 0.0005, "loss": 2.127, "step": 70930 }, { "epoch": 0.2700151488623128, "grad_norm": 0.1145314872264862, "learning_rate": 0.0005, "loss": 2.1289, "step": 70940 }, { "epoch": 0.2700532113304355, "grad_norm": 0.12293320894241333, "learning_rate": 0.0005, "loss": 2.1156, "step": 70950 }, { "epoch": 0.2700912737985582, "grad_norm": 0.12370413541793823, "learning_rate": 0.0005, "loss": 2.1184, "step": 70960 }, { "epoch": 0.2701293362666809, "grad_norm": 0.12029270082712173, "learning_rate": 0.0005, "loss": 2.1225, "step": 70970 }, { "epoch": 0.27016739873480355, "grad_norm": 0.13946790993213654, "learning_rate": 0.0005, "loss": 2.1282, "step": 70980 }, { "epoch": 0.27020546120292627, "grad_norm": 0.13315635919570923, "learning_rate": 0.0005, "loss": 2.1294, "step": 70990 }, { "epoch": 0.2702435236710489, "grad_norm": 0.1230052262544632, "learning_rate": 0.0005, "loss": 2.1287, "step": 71000 }, { "epoch": 0.2702815861391716, "grad_norm": 0.11799309402704239, "learning_rate": 0.0005, "loss": 2.1224, "step": 71010 }, { "epoch": 0.2703196486072943, "grad_norm": 0.12883540987968445, "learning_rate": 0.0005, "loss": 2.1274, "step": 71020 }, { "epoch": 0.27035771107541695, "grad_norm": 0.12491268664598465, "learning_rate": 0.0005, "loss": 2.1344, "step": 71030 }, { "epoch": 0.27039577354353966, "grad_norm": 0.120393306016922, "learning_rate": 0.0005, "loss": 2.1238, "step": 71040 }, { "epoch": 0.2704338360116623, "grad_norm": 0.11560060828924179, "learning_rate": 0.0005, "loss": 2.1328, "step": 71050 }, { "epoch": 0.27047189847978503, "grad_norm": 0.11167508363723755, "learning_rate": 0.0005, "loss": 2.1262, "step": 71060 }, { "epoch": 0.2705099609479077, "grad_norm": 0.1285915970802307, "learning_rate": 0.0005, "loss": 2.1306, "step": 71070 }, { "epoch": 0.2705480234160304, "grad_norm": 0.13339774310588837, "learning_rate": 0.0005, "loss": 2.1139, "step": 71080 }, { "epoch": 0.27058608588415306, "grad_norm": 0.13140417635440826, "learning_rate": 0.0005, "loss": 2.1101, "step": 71090 }, { "epoch": 0.2706241483522758, "grad_norm": 0.11885833740234375, "learning_rate": 0.0005, "loss": 2.1286, "step": 71100 }, { "epoch": 0.27066221082039843, "grad_norm": 0.12500646710395813, "learning_rate": 0.0005, "loss": 2.0974, "step": 71110 }, { "epoch": 0.27070027328852114, "grad_norm": 0.12917517125606537, "learning_rate": 0.0005, "loss": 2.1246, "step": 71120 }, { "epoch": 0.2707383357566438, "grad_norm": 0.11975311487913132, "learning_rate": 0.0005, "loss": 2.1229, "step": 71130 }, { "epoch": 0.2707763982247665, "grad_norm": 0.12097851186990738, "learning_rate": 0.0005, "loss": 2.129, "step": 71140 }, { "epoch": 0.27081446069288917, "grad_norm": 0.12181615084409714, "learning_rate": 0.0005, "loss": 2.1363, "step": 71150 }, { "epoch": 0.2708525231610118, "grad_norm": 0.13859118521213531, "learning_rate": 0.0005, "loss": 2.1214, "step": 71160 }, { "epoch": 0.27089058562913454, "grad_norm": 0.133628249168396, "learning_rate": 0.0005, "loss": 2.1143, "step": 71170 }, { "epoch": 0.2709286480972572, "grad_norm": 0.11724966019392014, "learning_rate": 0.0005, "loss": 2.1054, "step": 71180 }, { "epoch": 0.2709667105653799, "grad_norm": 0.12641079723834991, "learning_rate": 0.0005, "loss": 2.116, "step": 71190 }, { "epoch": 0.27100477303350257, "grad_norm": 0.1277732402086258, "learning_rate": 0.0005, "loss": 2.1448, "step": 71200 }, { "epoch": 0.2710428355016253, "grad_norm": 0.13520966470241547, "learning_rate": 0.0005, "loss": 2.1262, "step": 71210 }, { "epoch": 0.27108089796974794, "grad_norm": 0.13680309057235718, "learning_rate": 0.0005, "loss": 2.1186, "step": 71220 }, { "epoch": 0.27111896043787065, "grad_norm": 0.13837707042694092, "learning_rate": 0.0005, "loss": 2.1162, "step": 71230 }, { "epoch": 0.2711570229059933, "grad_norm": 0.12964460253715515, "learning_rate": 0.0005, "loss": 2.1244, "step": 71240 }, { "epoch": 0.271195085374116, "grad_norm": 0.13072986900806427, "learning_rate": 0.0005, "loss": 2.1288, "step": 71250 }, { "epoch": 0.2712331478422387, "grad_norm": 0.12068648636341095, "learning_rate": 0.0005, "loss": 2.14, "step": 71260 }, { "epoch": 0.2712712103103614, "grad_norm": 0.12971381843090057, "learning_rate": 0.0005, "loss": 2.1372, "step": 71270 }, { "epoch": 0.27130927277848405, "grad_norm": 0.12840324640274048, "learning_rate": 0.0005, "loss": 2.1071, "step": 71280 }, { "epoch": 0.27134733524660676, "grad_norm": 0.12420535832643509, "learning_rate": 0.0005, "loss": 2.1326, "step": 71290 }, { "epoch": 0.2713853977147294, "grad_norm": 0.12429869174957275, "learning_rate": 0.0005, "loss": 2.1167, "step": 71300 }, { "epoch": 0.27142346018285207, "grad_norm": 0.12602099776268005, "learning_rate": 0.0005, "loss": 2.1301, "step": 71310 }, { "epoch": 0.2714615226509748, "grad_norm": 0.1185278370976448, "learning_rate": 0.0005, "loss": 2.1087, "step": 71320 }, { "epoch": 0.27149958511909744, "grad_norm": 0.13878685235977173, "learning_rate": 0.0005, "loss": 2.1155, "step": 71330 }, { "epoch": 0.27153764758722015, "grad_norm": 0.12120170891284943, "learning_rate": 0.0005, "loss": 2.1342, "step": 71340 }, { "epoch": 0.2715757100553428, "grad_norm": 0.12688519060611725, "learning_rate": 0.0005, "loss": 2.1329, "step": 71350 }, { "epoch": 0.2716137725234655, "grad_norm": 0.13000376522541046, "learning_rate": 0.0005, "loss": 2.1195, "step": 71360 }, { "epoch": 0.2716518349915882, "grad_norm": 0.12218218296766281, "learning_rate": 0.0005, "loss": 2.1201, "step": 71370 }, { "epoch": 0.2716898974597109, "grad_norm": 0.14565366506576538, "learning_rate": 0.0005, "loss": 2.1243, "step": 71380 }, { "epoch": 0.27172795992783355, "grad_norm": 0.13229680061340332, "learning_rate": 0.0005, "loss": 2.1322, "step": 71390 }, { "epoch": 0.27176602239595626, "grad_norm": 0.138389453291893, "learning_rate": 0.0005, "loss": 2.128, "step": 71400 }, { "epoch": 0.2718040848640789, "grad_norm": 0.136601060628891, "learning_rate": 0.0005, "loss": 2.1139, "step": 71410 }, { "epoch": 0.27184214733220163, "grad_norm": 0.13356034457683563, "learning_rate": 0.0005, "loss": 2.1442, "step": 71420 }, { "epoch": 0.2718802098003243, "grad_norm": 0.12265916913747787, "learning_rate": 0.0005, "loss": 2.1287, "step": 71430 }, { "epoch": 0.27191827226844695, "grad_norm": 0.12345866113901138, "learning_rate": 0.0005, "loss": 2.1446, "step": 71440 }, { "epoch": 0.27195633473656966, "grad_norm": 0.12076914310455322, "learning_rate": 0.0005, "loss": 2.1218, "step": 71450 }, { "epoch": 0.2719943972046923, "grad_norm": 0.12351701408624649, "learning_rate": 0.0005, "loss": 2.1197, "step": 71460 }, { "epoch": 0.27203245967281503, "grad_norm": 0.1288938671350479, "learning_rate": 0.0005, "loss": 2.143, "step": 71470 }, { "epoch": 0.2720705221409377, "grad_norm": 0.13546644151210785, "learning_rate": 0.0005, "loss": 2.1321, "step": 71480 }, { "epoch": 0.2721085846090604, "grad_norm": 0.11406464874744415, "learning_rate": 0.0005, "loss": 2.1135, "step": 71490 }, { "epoch": 0.27214664707718306, "grad_norm": 0.12579847872257233, "learning_rate": 0.0005, "loss": 2.1165, "step": 71500 }, { "epoch": 0.27218470954530577, "grad_norm": 0.12819981575012207, "learning_rate": 0.0005, "loss": 2.1201, "step": 71510 }, { "epoch": 0.2722227720134284, "grad_norm": 0.12256570160388947, "learning_rate": 0.0005, "loss": 2.1411, "step": 71520 }, { "epoch": 0.27226083448155114, "grad_norm": 0.11458420753479004, "learning_rate": 0.0005, "loss": 2.136, "step": 71530 }, { "epoch": 0.2722988969496738, "grad_norm": 0.12950736284255981, "learning_rate": 0.0005, "loss": 2.1244, "step": 71540 }, { "epoch": 0.2723369594177965, "grad_norm": 0.11344198882579803, "learning_rate": 0.0005, "loss": 2.1231, "step": 71550 }, { "epoch": 0.27237502188591917, "grad_norm": 0.11599477380514145, "learning_rate": 0.0005, "loss": 2.1396, "step": 71560 }, { "epoch": 0.2724130843540419, "grad_norm": 0.11852739006280899, "learning_rate": 0.0005, "loss": 2.1268, "step": 71570 }, { "epoch": 0.27245114682216454, "grad_norm": 0.11747030168771744, "learning_rate": 0.0005, "loss": 2.1321, "step": 71580 }, { "epoch": 0.2724892092902872, "grad_norm": 0.13440634310245514, "learning_rate": 0.0005, "loss": 2.1306, "step": 71590 }, { "epoch": 0.2725272717584099, "grad_norm": 0.12431393563747406, "learning_rate": 0.0005, "loss": 2.1356, "step": 71600 }, { "epoch": 0.27256533422653256, "grad_norm": 0.1287822425365448, "learning_rate": 0.0005, "loss": 2.1294, "step": 71610 }, { "epoch": 0.2726033966946553, "grad_norm": 0.12533940374851227, "learning_rate": 0.0005, "loss": 2.1288, "step": 71620 }, { "epoch": 0.27264145916277793, "grad_norm": 0.1331218034029007, "learning_rate": 0.0005, "loss": 2.1249, "step": 71630 }, { "epoch": 0.27267952163090065, "grad_norm": 0.11935290694236755, "learning_rate": 0.0005, "loss": 2.1261, "step": 71640 }, { "epoch": 0.2727175840990233, "grad_norm": 0.12688718736171722, "learning_rate": 0.0005, "loss": 2.1225, "step": 71650 }, { "epoch": 0.272755646567146, "grad_norm": 0.12275033444166183, "learning_rate": 0.0005, "loss": 2.1143, "step": 71660 }, { "epoch": 0.27279370903526867, "grad_norm": 0.14148494601249695, "learning_rate": 0.0005, "loss": 2.1247, "step": 71670 }, { "epoch": 0.2728317715033914, "grad_norm": 0.13131959736347198, "learning_rate": 0.0005, "loss": 2.1307, "step": 71680 }, { "epoch": 0.27286983397151404, "grad_norm": 0.12532196938991547, "learning_rate": 0.0005, "loss": 2.1125, "step": 71690 }, { "epoch": 0.27290789643963675, "grad_norm": 0.1329348236322403, "learning_rate": 0.0005, "loss": 2.1225, "step": 71700 }, { "epoch": 0.2729459589077594, "grad_norm": 0.1316693127155304, "learning_rate": 0.0005, "loss": 2.1337, "step": 71710 }, { "epoch": 0.2729840213758821, "grad_norm": 0.13339757919311523, "learning_rate": 0.0005, "loss": 2.1197, "step": 71720 }, { "epoch": 0.2730220838440048, "grad_norm": 0.1378227323293686, "learning_rate": 0.0005, "loss": 2.1325, "step": 71730 }, { "epoch": 0.27306014631212744, "grad_norm": 0.12965930998325348, "learning_rate": 0.0005, "loss": 2.1374, "step": 71740 }, { "epoch": 0.27309820878025015, "grad_norm": 0.11927662044763565, "learning_rate": 0.0005, "loss": 2.1301, "step": 71750 }, { "epoch": 0.2731362712483728, "grad_norm": 0.12863266468048096, "learning_rate": 0.0005, "loss": 2.1196, "step": 71760 }, { "epoch": 0.2731743337164955, "grad_norm": 0.13583002984523773, "learning_rate": 0.0005, "loss": 2.1098, "step": 71770 }, { "epoch": 0.2732123961846182, "grad_norm": 0.132333904504776, "learning_rate": 0.0005, "loss": 2.1228, "step": 71780 }, { "epoch": 0.2732504586527409, "grad_norm": 0.12701696157455444, "learning_rate": 0.0005, "loss": 2.115, "step": 71790 }, { "epoch": 0.27328852112086355, "grad_norm": 0.13651852309703827, "learning_rate": 0.0005, "loss": 2.1286, "step": 71800 }, { "epoch": 0.27332658358898626, "grad_norm": 0.13848647475242615, "learning_rate": 0.0005, "loss": 2.1282, "step": 71810 }, { "epoch": 0.2733646460571089, "grad_norm": 0.11856270581483841, "learning_rate": 0.0005, "loss": 2.1217, "step": 71820 }, { "epoch": 0.27340270852523163, "grad_norm": 0.12606863677501678, "learning_rate": 0.0005, "loss": 2.1291, "step": 71830 }, { "epoch": 0.2734407709933543, "grad_norm": 0.14228320121765137, "learning_rate": 0.0005, "loss": 2.1308, "step": 71840 }, { "epoch": 0.273478833461477, "grad_norm": 0.1278725564479828, "learning_rate": 0.0005, "loss": 2.1125, "step": 71850 }, { "epoch": 0.27351689592959966, "grad_norm": 0.13307037949562073, "learning_rate": 0.0005, "loss": 2.1259, "step": 71860 }, { "epoch": 0.2735549583977223, "grad_norm": 0.6107332110404968, "learning_rate": 0.0005, "loss": 2.1309, "step": 71870 }, { "epoch": 0.273593020865845, "grad_norm": 0.12317752838134766, "learning_rate": 0.0005, "loss": 2.1467, "step": 71880 }, { "epoch": 0.2736310833339677, "grad_norm": 0.12442834675312042, "learning_rate": 0.0005, "loss": 2.1395, "step": 71890 }, { "epoch": 0.2736691458020904, "grad_norm": 0.11339490860700607, "learning_rate": 0.0005, "loss": 2.1461, "step": 71900 }, { "epoch": 0.27370720827021305, "grad_norm": 0.1318211406469345, "learning_rate": 0.0005, "loss": 2.1326, "step": 71910 }, { "epoch": 0.27374527073833577, "grad_norm": 0.1129222959280014, "learning_rate": 0.0005, "loss": 2.1346, "step": 71920 }, { "epoch": 0.2737833332064584, "grad_norm": 0.14258186519145966, "learning_rate": 0.0005, "loss": 2.1301, "step": 71930 }, { "epoch": 0.27382139567458114, "grad_norm": 0.1269596368074417, "learning_rate": 0.0005, "loss": 2.1045, "step": 71940 }, { "epoch": 0.2738594581427038, "grad_norm": 0.13796262443065643, "learning_rate": 0.0005, "loss": 2.1281, "step": 71950 }, { "epoch": 0.2738975206108265, "grad_norm": 0.1308310478925705, "learning_rate": 0.0005, "loss": 2.1308, "step": 71960 }, { "epoch": 0.27393558307894916, "grad_norm": 0.1301732063293457, "learning_rate": 0.0005, "loss": 2.1224, "step": 71970 }, { "epoch": 0.2739736455470719, "grad_norm": 0.11353213340044022, "learning_rate": 0.0005, "loss": 2.1163, "step": 71980 }, { "epoch": 0.27401170801519453, "grad_norm": 0.1273941546678543, "learning_rate": 0.0005, "loss": 2.1232, "step": 71990 }, { "epoch": 0.27404977048331725, "grad_norm": 0.12934479117393494, "learning_rate": 0.0005, "loss": 2.1338, "step": 72000 }, { "epoch": 0.2740878329514399, "grad_norm": 0.11563912779092789, "learning_rate": 0.0005, "loss": 2.1274, "step": 72010 }, { "epoch": 0.27412589541956256, "grad_norm": 0.1264961212873459, "learning_rate": 0.0005, "loss": 2.1222, "step": 72020 }, { "epoch": 0.2741639578876853, "grad_norm": 0.14248254895210266, "learning_rate": 0.0005, "loss": 2.1296, "step": 72030 }, { "epoch": 0.27420202035580793, "grad_norm": 0.11887305229902267, "learning_rate": 0.0005, "loss": 2.1289, "step": 72040 }, { "epoch": 0.27424008282393064, "grad_norm": 0.12073611468076706, "learning_rate": 0.0005, "loss": 2.136, "step": 72050 }, { "epoch": 0.2742781452920533, "grad_norm": 0.1239795982837677, "learning_rate": 0.0005, "loss": 2.1261, "step": 72060 }, { "epoch": 0.274316207760176, "grad_norm": 0.12811580300331116, "learning_rate": 0.0005, "loss": 2.138, "step": 72070 }, { "epoch": 0.27435427022829867, "grad_norm": 0.12124840915203094, "learning_rate": 0.0005, "loss": 2.1158, "step": 72080 }, { "epoch": 0.2743923326964214, "grad_norm": 0.12464161962270737, "learning_rate": 0.0005, "loss": 2.1245, "step": 72090 }, { "epoch": 0.27443039516454404, "grad_norm": 0.13844406604766846, "learning_rate": 0.0005, "loss": 2.1227, "step": 72100 }, { "epoch": 0.27446845763266675, "grad_norm": 0.15234865248203278, "learning_rate": 0.0005, "loss": 2.1389, "step": 72110 }, { "epoch": 0.2745065201007894, "grad_norm": 0.10983190685510635, "learning_rate": 0.0005, "loss": 2.1201, "step": 72120 }, { "epoch": 0.2745445825689121, "grad_norm": 0.13060855865478516, "learning_rate": 0.0005, "loss": 2.1365, "step": 72130 }, { "epoch": 0.2745826450370348, "grad_norm": 0.12701942026615143, "learning_rate": 0.0005, "loss": 2.1231, "step": 72140 }, { "epoch": 0.2746207075051575, "grad_norm": 0.11977972090244293, "learning_rate": 0.0005, "loss": 2.1334, "step": 72150 }, { "epoch": 0.27465876997328015, "grad_norm": 0.12274200469255447, "learning_rate": 0.0005, "loss": 2.1241, "step": 72160 }, { "epoch": 0.2746968324414028, "grad_norm": 0.12599435448646545, "learning_rate": 0.0005, "loss": 2.1222, "step": 72170 }, { "epoch": 0.2747348949095255, "grad_norm": 0.13417574763298035, "learning_rate": 0.0005, "loss": 2.1223, "step": 72180 }, { "epoch": 0.2747729573776482, "grad_norm": 0.11986130475997925, "learning_rate": 0.0005, "loss": 2.1203, "step": 72190 }, { "epoch": 0.2748110198457709, "grad_norm": 0.1296272724866867, "learning_rate": 0.0005, "loss": 2.1327, "step": 72200 }, { "epoch": 0.27484908231389354, "grad_norm": 0.11655092984437943, "learning_rate": 0.0005, "loss": 2.1319, "step": 72210 }, { "epoch": 0.27488714478201626, "grad_norm": 0.12658260762691498, "learning_rate": 0.0005, "loss": 2.1182, "step": 72220 }, { "epoch": 0.2749252072501389, "grad_norm": 0.11190526187419891, "learning_rate": 0.0005, "loss": 2.1291, "step": 72230 }, { "epoch": 0.2749632697182616, "grad_norm": 0.12382861971855164, "learning_rate": 0.0005, "loss": 2.1209, "step": 72240 }, { "epoch": 0.2750013321863843, "grad_norm": 0.14002086222171783, "learning_rate": 0.0005, "loss": 2.1176, "step": 72250 }, { "epoch": 0.275039394654507, "grad_norm": 0.12437102198600769, "learning_rate": 0.0005, "loss": 2.1116, "step": 72260 }, { "epoch": 0.27507745712262965, "grad_norm": 0.12130562216043472, "learning_rate": 0.0005, "loss": 2.1461, "step": 72270 }, { "epoch": 0.27511551959075237, "grad_norm": 0.12964333593845367, "learning_rate": 0.0005, "loss": 2.1319, "step": 72280 }, { "epoch": 0.275153582058875, "grad_norm": 0.1255006045103073, "learning_rate": 0.0005, "loss": 2.1358, "step": 72290 }, { "epoch": 0.27519164452699774, "grad_norm": 0.13847501575946808, "learning_rate": 0.0005, "loss": 2.125, "step": 72300 }, { "epoch": 0.2752297069951204, "grad_norm": 0.1399521380662918, "learning_rate": 0.0005, "loss": 2.1279, "step": 72310 }, { "epoch": 0.27526776946324305, "grad_norm": 0.1263510286808014, "learning_rate": 0.0005, "loss": 2.1023, "step": 72320 }, { "epoch": 0.27530583193136576, "grad_norm": 0.12814727425575256, "learning_rate": 0.0005, "loss": 2.1083, "step": 72330 }, { "epoch": 0.2753438943994884, "grad_norm": 0.12721291184425354, "learning_rate": 0.0005, "loss": 2.1178, "step": 72340 }, { "epoch": 0.27538195686761113, "grad_norm": 0.12828604876995087, "learning_rate": 0.0005, "loss": 2.1182, "step": 72350 }, { "epoch": 0.2754200193357338, "grad_norm": 0.13843713700771332, "learning_rate": 0.0005, "loss": 2.1015, "step": 72360 }, { "epoch": 0.2754580818038565, "grad_norm": 0.12065796554088593, "learning_rate": 0.0005, "loss": 2.1287, "step": 72370 }, { "epoch": 0.27549614427197916, "grad_norm": 0.1309119164943695, "learning_rate": 0.0005, "loss": 2.1258, "step": 72380 }, { "epoch": 0.2755342067401019, "grad_norm": 0.12360799312591553, "learning_rate": 0.0005, "loss": 2.1087, "step": 72390 }, { "epoch": 0.27557226920822453, "grad_norm": 0.11458812654018402, "learning_rate": 0.0005, "loss": 2.1264, "step": 72400 }, { "epoch": 0.27561033167634724, "grad_norm": 0.12010073661804199, "learning_rate": 0.0005, "loss": 2.1225, "step": 72410 }, { "epoch": 0.2756483941444699, "grad_norm": 0.1172177717089653, "learning_rate": 0.0005, "loss": 2.1312, "step": 72420 }, { "epoch": 0.2756864566125926, "grad_norm": 0.1232071965932846, "learning_rate": 0.0005, "loss": 2.1326, "step": 72430 }, { "epoch": 0.27572451908071527, "grad_norm": 0.12564972043037415, "learning_rate": 0.0005, "loss": 2.1269, "step": 72440 }, { "epoch": 0.2757625815488379, "grad_norm": 0.11357922106981277, "learning_rate": 0.0005, "loss": 2.1428, "step": 72450 }, { "epoch": 0.27580064401696064, "grad_norm": 0.13264420628547668, "learning_rate": 0.0005, "loss": 2.1185, "step": 72460 }, { "epoch": 0.2758387064850833, "grad_norm": 0.1259356290102005, "learning_rate": 0.0005, "loss": 2.1272, "step": 72470 }, { "epoch": 0.275876768953206, "grad_norm": 0.1306888610124588, "learning_rate": 0.0005, "loss": 2.1294, "step": 72480 }, { "epoch": 0.27591483142132867, "grad_norm": 0.12248992919921875, "learning_rate": 0.0005, "loss": 2.1284, "step": 72490 }, { "epoch": 0.2759528938894514, "grad_norm": 0.11357472091913223, "learning_rate": 0.0005, "loss": 2.1281, "step": 72500 }, { "epoch": 0.27599095635757404, "grad_norm": 0.11590695381164551, "learning_rate": 0.0005, "loss": 2.1407, "step": 72510 }, { "epoch": 0.27602901882569675, "grad_norm": 0.12227820605039597, "learning_rate": 0.0005, "loss": 2.1248, "step": 72520 }, { "epoch": 0.2760670812938194, "grad_norm": 0.11770544946193695, "learning_rate": 0.0005, "loss": 2.1201, "step": 72530 }, { "epoch": 0.2761051437619421, "grad_norm": 0.12528224289417267, "learning_rate": 0.0005, "loss": 2.1461, "step": 72540 }, { "epoch": 0.2761432062300648, "grad_norm": 0.12878407537937164, "learning_rate": 0.0005, "loss": 2.1335, "step": 72550 }, { "epoch": 0.2761812686981875, "grad_norm": 0.13306809961795807, "learning_rate": 0.0005, "loss": 2.1427, "step": 72560 }, { "epoch": 0.27621933116631014, "grad_norm": 0.13325989246368408, "learning_rate": 0.0005, "loss": 2.1287, "step": 72570 }, { "epoch": 0.27625739363443286, "grad_norm": 0.1145695373415947, "learning_rate": 0.0005, "loss": 2.1344, "step": 72580 }, { "epoch": 0.2762954561025555, "grad_norm": 0.11948937922716141, "learning_rate": 0.0005, "loss": 2.1322, "step": 72590 }, { "epoch": 0.27633351857067817, "grad_norm": 0.11720909178256989, "learning_rate": 0.0005, "loss": 2.1272, "step": 72600 }, { "epoch": 0.2763715810388009, "grad_norm": 0.12367577850818634, "learning_rate": 0.0005, "loss": 2.124, "step": 72610 }, { "epoch": 0.27640964350692354, "grad_norm": 0.13133859634399414, "learning_rate": 0.0005, "loss": 2.1246, "step": 72620 }, { "epoch": 0.27644770597504625, "grad_norm": 0.12476001679897308, "learning_rate": 0.0005, "loss": 2.1109, "step": 72630 }, { "epoch": 0.2764857684431689, "grad_norm": 0.12994538247585297, "learning_rate": 0.0005, "loss": 2.13, "step": 72640 }, { "epoch": 0.2765238309112916, "grad_norm": 0.13551446795463562, "learning_rate": 0.0005, "loss": 2.1143, "step": 72650 }, { "epoch": 0.2765618933794143, "grad_norm": 0.13254836201667786, "learning_rate": 0.0005, "loss": 2.125, "step": 72660 }, { "epoch": 0.276599955847537, "grad_norm": 0.13123776018619537, "learning_rate": 0.0005, "loss": 2.1314, "step": 72670 }, { "epoch": 0.27663801831565965, "grad_norm": 0.11721350997686386, "learning_rate": 0.0005, "loss": 2.1264, "step": 72680 }, { "epoch": 0.27667608078378236, "grad_norm": 0.125374436378479, "learning_rate": 0.0005, "loss": 2.1315, "step": 72690 }, { "epoch": 0.276714143251905, "grad_norm": 0.129308819770813, "learning_rate": 0.0005, "loss": 2.124, "step": 72700 }, { "epoch": 0.27675220572002773, "grad_norm": 0.14361171424388885, "learning_rate": 0.0005, "loss": 2.1395, "step": 72710 }, { "epoch": 0.2767902681881504, "grad_norm": 0.14105713367462158, "learning_rate": 0.0005, "loss": 2.1293, "step": 72720 }, { "epoch": 0.2768283306562731, "grad_norm": 0.12079322338104248, "learning_rate": 0.0005, "loss": 2.1224, "step": 72730 }, { "epoch": 0.27686639312439576, "grad_norm": 0.12566441297531128, "learning_rate": 0.0005, "loss": 2.1144, "step": 72740 }, { "epoch": 0.2769044555925184, "grad_norm": 0.12056665867567062, "learning_rate": 0.0005, "loss": 2.1209, "step": 72750 }, { "epoch": 0.27694251806064113, "grad_norm": 0.11491995304822922, "learning_rate": 0.0005, "loss": 2.1285, "step": 72760 }, { "epoch": 0.2769805805287638, "grad_norm": 0.12552793323993683, "learning_rate": 0.0005, "loss": 2.1263, "step": 72770 }, { "epoch": 0.2770186429968865, "grad_norm": 0.12376090884208679, "learning_rate": 0.0005, "loss": 2.1364, "step": 72780 }, { "epoch": 0.27705670546500916, "grad_norm": 0.11079999059438705, "learning_rate": 0.0005, "loss": 2.1259, "step": 72790 }, { "epoch": 0.27709476793313187, "grad_norm": 0.11827901005744934, "learning_rate": 0.0005, "loss": 2.1274, "step": 72800 }, { "epoch": 0.2771328304012545, "grad_norm": 0.12451273202896118, "learning_rate": 0.0005, "loss": 2.1226, "step": 72810 }, { "epoch": 0.27717089286937724, "grad_norm": 0.13412445783615112, "learning_rate": 0.0005, "loss": 2.1154, "step": 72820 }, { "epoch": 0.2772089553374999, "grad_norm": 0.12760554254055023, "learning_rate": 0.0005, "loss": 2.1197, "step": 72830 }, { "epoch": 0.2772470178056226, "grad_norm": 0.1256927102804184, "learning_rate": 0.0005, "loss": 2.1135, "step": 72840 }, { "epoch": 0.27728508027374527, "grad_norm": 0.12824873626232147, "learning_rate": 0.0005, "loss": 2.1365, "step": 72850 }, { "epoch": 0.277323142741868, "grad_norm": 0.11432984471321106, "learning_rate": 0.0005, "loss": 2.129, "step": 72860 }, { "epoch": 0.27736120520999064, "grad_norm": 0.11752847582101822, "learning_rate": 0.0005, "loss": 2.1298, "step": 72870 }, { "epoch": 0.2773992676781133, "grad_norm": 0.12238814681768417, "learning_rate": 0.0005, "loss": 2.1239, "step": 72880 }, { "epoch": 0.277437330146236, "grad_norm": 0.128130242228508, "learning_rate": 0.0005, "loss": 2.1286, "step": 72890 }, { "epoch": 0.27747539261435866, "grad_norm": 0.1231345683336258, "learning_rate": 0.0005, "loss": 2.1262, "step": 72900 }, { "epoch": 0.2775134550824814, "grad_norm": 0.11722811311483383, "learning_rate": 0.0005, "loss": 2.1059, "step": 72910 }, { "epoch": 0.27755151755060403, "grad_norm": 0.1214623749256134, "learning_rate": 0.0005, "loss": 2.1426, "step": 72920 }, { "epoch": 0.27758958001872674, "grad_norm": 0.1423121839761734, "learning_rate": 0.0005, "loss": 2.1211, "step": 72930 }, { "epoch": 0.2776276424868494, "grad_norm": 0.12980258464813232, "learning_rate": 0.0005, "loss": 2.1299, "step": 72940 }, { "epoch": 0.2776657049549721, "grad_norm": 0.14954525232315063, "learning_rate": 0.0005, "loss": 2.1236, "step": 72950 }, { "epoch": 0.27770376742309477, "grad_norm": 0.12002082914113998, "learning_rate": 0.0005, "loss": 2.1203, "step": 72960 }, { "epoch": 0.2777418298912175, "grad_norm": 0.11208499222993851, "learning_rate": 0.0005, "loss": 2.1252, "step": 72970 }, { "epoch": 0.27777989235934014, "grad_norm": 0.12585601210594177, "learning_rate": 0.0005, "loss": 2.1262, "step": 72980 }, { "epoch": 0.27781795482746285, "grad_norm": 0.135234072804451, "learning_rate": 0.0005, "loss": 2.1274, "step": 72990 }, { "epoch": 0.2778560172955855, "grad_norm": 0.11779285222291946, "learning_rate": 0.0005, "loss": 2.1371, "step": 73000 }, { "epoch": 0.2778940797637082, "grad_norm": 0.13158877193927765, "learning_rate": 0.0005, "loss": 2.1133, "step": 73010 }, { "epoch": 0.2779321422318309, "grad_norm": 0.31607118248939514, "learning_rate": 0.0005, "loss": 2.1352, "step": 73020 }, { "epoch": 0.27797020469995354, "grad_norm": 0.11737809330224991, "learning_rate": 0.0005, "loss": 2.1376, "step": 73030 }, { "epoch": 0.27800826716807625, "grad_norm": 0.12738829851150513, "learning_rate": 0.0005, "loss": 2.1231, "step": 73040 }, { "epoch": 0.2780463296361989, "grad_norm": 0.13510660827159882, "learning_rate": 0.0005, "loss": 2.1118, "step": 73050 }, { "epoch": 0.2780843921043216, "grad_norm": 0.12289441376924515, "learning_rate": 0.0005, "loss": 2.1192, "step": 73060 }, { "epoch": 0.2781224545724443, "grad_norm": 0.15084408223628998, "learning_rate": 0.0005, "loss": 2.1273, "step": 73070 }, { "epoch": 0.278160517040567, "grad_norm": 0.11964649707078934, "learning_rate": 0.0005, "loss": 2.1409, "step": 73080 }, { "epoch": 0.27819857950868965, "grad_norm": 0.1252226084470749, "learning_rate": 0.0005, "loss": 2.1251, "step": 73090 }, { "epoch": 0.27823664197681236, "grad_norm": 0.12739278376102448, "learning_rate": 0.0005, "loss": 2.1262, "step": 73100 }, { "epoch": 0.278274704444935, "grad_norm": 0.10967393219470978, "learning_rate": 0.0005, "loss": 2.1232, "step": 73110 }, { "epoch": 0.27831276691305773, "grad_norm": 0.13770455121994019, "learning_rate": 0.0005, "loss": 2.1254, "step": 73120 }, { "epoch": 0.2783508293811804, "grad_norm": 0.1319299340248108, "learning_rate": 0.0005, "loss": 2.1215, "step": 73130 }, { "epoch": 0.2783888918493031, "grad_norm": 0.12463415414094925, "learning_rate": 0.0005, "loss": 2.1379, "step": 73140 }, { "epoch": 0.27842695431742576, "grad_norm": 0.12271592766046524, "learning_rate": 0.0005, "loss": 2.1171, "step": 73150 }, { "epoch": 0.27846501678554847, "grad_norm": 0.14967022836208344, "learning_rate": 0.0005, "loss": 2.112, "step": 73160 }, { "epoch": 0.2785030792536711, "grad_norm": 0.13166722655296326, "learning_rate": 0.0005, "loss": 2.1239, "step": 73170 }, { "epoch": 0.2785411417217938, "grad_norm": 0.13443920016288757, "learning_rate": 0.0005, "loss": 2.124, "step": 73180 }, { "epoch": 0.2785792041899165, "grad_norm": 0.12196287512779236, "learning_rate": 0.0005, "loss": 2.131, "step": 73190 }, { "epoch": 0.27861726665803915, "grad_norm": 0.14742255210876465, "learning_rate": 0.0005, "loss": 2.118, "step": 73200 }, { "epoch": 0.27865532912616187, "grad_norm": 0.11828165501356125, "learning_rate": 0.0005, "loss": 2.1145, "step": 73210 }, { "epoch": 0.2786933915942845, "grad_norm": 0.12427137792110443, "learning_rate": 0.0005, "loss": 2.1281, "step": 73220 }, { "epoch": 0.27873145406240724, "grad_norm": 0.11481276899576187, "learning_rate": 0.0005, "loss": 2.1244, "step": 73230 }, { "epoch": 0.2787695165305299, "grad_norm": 0.13980519771575928, "learning_rate": 0.0005, "loss": 2.1389, "step": 73240 }, { "epoch": 0.2788075789986526, "grad_norm": 0.12095464020967484, "learning_rate": 0.0005, "loss": 2.1143, "step": 73250 }, { "epoch": 0.27884564146677526, "grad_norm": 0.1202949658036232, "learning_rate": 0.0005, "loss": 2.1275, "step": 73260 }, { "epoch": 0.278883703934898, "grad_norm": 0.12349908798933029, "learning_rate": 0.0005, "loss": 2.1363, "step": 73270 }, { "epoch": 0.27892176640302063, "grad_norm": 0.12171231955289841, "learning_rate": 0.0005, "loss": 2.1346, "step": 73280 }, { "epoch": 0.27895982887114334, "grad_norm": 0.12804117798805237, "learning_rate": 0.0005, "loss": 2.1289, "step": 73290 }, { "epoch": 0.278997891339266, "grad_norm": 0.139211505651474, "learning_rate": 0.0005, "loss": 2.1086, "step": 73300 }, { "epoch": 0.27903595380738866, "grad_norm": 0.12075608968734741, "learning_rate": 0.0005, "loss": 2.1121, "step": 73310 }, { "epoch": 0.27907401627551137, "grad_norm": 0.120372474193573, "learning_rate": 0.0005, "loss": 2.1047, "step": 73320 }, { "epoch": 0.27911207874363403, "grad_norm": 0.1180444061756134, "learning_rate": 0.0005, "loss": 2.136, "step": 73330 }, { "epoch": 0.27915014121175674, "grad_norm": 0.12552054226398468, "learning_rate": 0.0005, "loss": 2.1122, "step": 73340 }, { "epoch": 0.2791882036798794, "grad_norm": 0.15276488661766052, "learning_rate": 0.0005, "loss": 2.1422, "step": 73350 }, { "epoch": 0.2792262661480021, "grad_norm": 0.13675646483898163, "learning_rate": 0.0005, "loss": 2.1367, "step": 73360 }, { "epoch": 0.27926432861612477, "grad_norm": 0.1263941377401352, "learning_rate": 0.0005, "loss": 2.1278, "step": 73370 }, { "epoch": 0.2793023910842475, "grad_norm": 0.12853482365608215, "learning_rate": 0.0005, "loss": 2.135, "step": 73380 }, { "epoch": 0.27934045355237014, "grad_norm": 0.13050010800361633, "learning_rate": 0.0005, "loss": 2.1126, "step": 73390 }, { "epoch": 0.27937851602049285, "grad_norm": 0.12878821790218353, "learning_rate": 0.0005, "loss": 2.1197, "step": 73400 }, { "epoch": 0.2794165784886155, "grad_norm": 0.12213098257780075, "learning_rate": 0.0005, "loss": 2.1325, "step": 73410 }, { "epoch": 0.2794546409567382, "grad_norm": 0.12876629829406738, "learning_rate": 0.0005, "loss": 2.1361, "step": 73420 }, { "epoch": 0.2794927034248609, "grad_norm": 0.1272810399532318, "learning_rate": 0.0005, "loss": 2.1296, "step": 73430 }, { "epoch": 0.2795307658929836, "grad_norm": 0.12028060853481293, "learning_rate": 0.0005, "loss": 2.1176, "step": 73440 }, { "epoch": 0.27956882836110625, "grad_norm": 0.1291312426328659, "learning_rate": 0.0005, "loss": 2.1262, "step": 73450 }, { "epoch": 0.2796068908292289, "grad_norm": 0.1288689821958542, "learning_rate": 0.0005, "loss": 2.1311, "step": 73460 }, { "epoch": 0.2796449532973516, "grad_norm": 0.1291615068912506, "learning_rate": 0.0005, "loss": 2.1303, "step": 73470 }, { "epoch": 0.2796830157654743, "grad_norm": 0.12141037732362747, "learning_rate": 0.0005, "loss": 2.124, "step": 73480 }, { "epoch": 0.279721078233597, "grad_norm": 0.14140747487545013, "learning_rate": 0.0005, "loss": 2.115, "step": 73490 }, { "epoch": 0.27975914070171964, "grad_norm": 0.12675082683563232, "learning_rate": 0.0005, "loss": 2.1249, "step": 73500 }, { "epoch": 0.27979720316984236, "grad_norm": 0.12179327756166458, "learning_rate": 0.0005, "loss": 2.1192, "step": 73510 }, { "epoch": 0.279835265637965, "grad_norm": 0.11616285890340805, "learning_rate": 0.0005, "loss": 2.137, "step": 73520 }, { "epoch": 0.2798733281060877, "grad_norm": 0.12648041546344757, "learning_rate": 0.0005, "loss": 2.1356, "step": 73530 }, { "epoch": 0.2799113905742104, "grad_norm": 0.12828604876995087, "learning_rate": 0.0005, "loss": 2.1196, "step": 73540 }, { "epoch": 0.2799494530423331, "grad_norm": 0.11238578706979752, "learning_rate": 0.0005, "loss": 2.1374, "step": 73550 }, { "epoch": 0.27998751551045575, "grad_norm": 0.13090801239013672, "learning_rate": 0.0005, "loss": 2.1312, "step": 73560 }, { "epoch": 0.28002557797857847, "grad_norm": 0.13082750141620636, "learning_rate": 0.0005, "loss": 2.1249, "step": 73570 }, { "epoch": 0.2800636404467011, "grad_norm": 0.13069972395896912, "learning_rate": 0.0005, "loss": 2.1332, "step": 73580 }, { "epoch": 0.28010170291482384, "grad_norm": 0.12461374700069427, "learning_rate": 0.0005, "loss": 2.1189, "step": 73590 }, { "epoch": 0.2801397653829465, "grad_norm": 0.13140574097633362, "learning_rate": 0.0005, "loss": 2.1242, "step": 73600 }, { "epoch": 0.28017782785106915, "grad_norm": 0.12235130369663239, "learning_rate": 0.0005, "loss": 2.1227, "step": 73610 }, { "epoch": 0.28021589031919186, "grad_norm": 0.1320742815732956, "learning_rate": 0.0005, "loss": 2.1256, "step": 73620 }, { "epoch": 0.2802539527873145, "grad_norm": 0.13617640733718872, "learning_rate": 0.0005, "loss": 2.1418, "step": 73630 }, { "epoch": 0.28029201525543723, "grad_norm": 0.12380155920982361, "learning_rate": 0.0005, "loss": 2.1307, "step": 73640 }, { "epoch": 0.2803300777235599, "grad_norm": 0.14408768713474274, "learning_rate": 0.0005, "loss": 2.1343, "step": 73650 }, { "epoch": 0.2803681401916826, "grad_norm": 0.11952300369739532, "learning_rate": 0.0005, "loss": 2.1116, "step": 73660 }, { "epoch": 0.28040620265980526, "grad_norm": 0.14055335521697998, "learning_rate": 0.0005, "loss": 2.1237, "step": 73670 }, { "epoch": 0.28044426512792797, "grad_norm": 0.12879961729049683, "learning_rate": 0.0005, "loss": 2.1231, "step": 73680 }, { "epoch": 0.28048232759605063, "grad_norm": 0.1171557605266571, "learning_rate": 0.0005, "loss": 2.1254, "step": 73690 }, { "epoch": 0.28052039006417334, "grad_norm": 0.1254773586988449, "learning_rate": 0.0005, "loss": 2.1331, "step": 73700 }, { "epoch": 0.280558452532296, "grad_norm": 0.14595383405685425, "learning_rate": 0.0005, "loss": 2.1402, "step": 73710 }, { "epoch": 0.2805965150004187, "grad_norm": 0.1298341304063797, "learning_rate": 0.0005, "loss": 2.131, "step": 73720 }, { "epoch": 0.28063457746854137, "grad_norm": 0.12870018184185028, "learning_rate": 0.0005, "loss": 2.1213, "step": 73730 }, { "epoch": 0.280672639936664, "grad_norm": 0.12347274273633957, "learning_rate": 0.0005, "loss": 2.1226, "step": 73740 }, { "epoch": 0.28071070240478674, "grad_norm": 0.11614564806222916, "learning_rate": 0.0005, "loss": 2.1184, "step": 73750 }, { "epoch": 0.2807487648729094, "grad_norm": 0.120811328291893, "learning_rate": 0.0005, "loss": 2.1181, "step": 73760 }, { "epoch": 0.2807868273410321, "grad_norm": 0.11254284530878067, "learning_rate": 0.0005, "loss": 2.1246, "step": 73770 }, { "epoch": 0.28082488980915477, "grad_norm": 0.12072857469320297, "learning_rate": 0.0005, "loss": 2.1258, "step": 73780 }, { "epoch": 0.2808629522772775, "grad_norm": 0.11864668130874634, "learning_rate": 0.0005, "loss": 2.1279, "step": 73790 }, { "epoch": 0.28090101474540013, "grad_norm": 0.12393586337566376, "learning_rate": 0.0005, "loss": 2.1313, "step": 73800 }, { "epoch": 0.28093907721352285, "grad_norm": 0.11929041892290115, "learning_rate": 0.0005, "loss": 2.1119, "step": 73810 }, { "epoch": 0.2809771396816455, "grad_norm": 0.11602532863616943, "learning_rate": 0.0005, "loss": 2.1245, "step": 73820 }, { "epoch": 0.2810152021497682, "grad_norm": 0.12017183005809784, "learning_rate": 0.0005, "loss": 2.1263, "step": 73830 }, { "epoch": 0.2810532646178909, "grad_norm": 0.131440207362175, "learning_rate": 0.0005, "loss": 2.1288, "step": 73840 }, { "epoch": 0.2810913270860136, "grad_norm": 0.12414850294589996, "learning_rate": 0.0005, "loss": 2.1311, "step": 73850 }, { "epoch": 0.28112938955413624, "grad_norm": 0.1354745477437973, "learning_rate": 0.0005, "loss": 2.1303, "step": 73860 }, { "epoch": 0.28116745202225896, "grad_norm": 0.13583865761756897, "learning_rate": 0.0005, "loss": 2.1175, "step": 73870 }, { "epoch": 0.2812055144903816, "grad_norm": 0.12519274652004242, "learning_rate": 0.0005, "loss": 2.1307, "step": 73880 }, { "epoch": 0.28124357695850427, "grad_norm": 0.12412375956773758, "learning_rate": 0.0005, "loss": 2.1177, "step": 73890 }, { "epoch": 0.281281639426627, "grad_norm": 0.12623530626296997, "learning_rate": 0.0005, "loss": 2.1183, "step": 73900 }, { "epoch": 0.28131970189474964, "grad_norm": 0.12054393440485, "learning_rate": 0.0005, "loss": 2.1246, "step": 73910 }, { "epoch": 0.28135776436287235, "grad_norm": 0.1273050457239151, "learning_rate": 0.0005, "loss": 2.1366, "step": 73920 }, { "epoch": 0.281395826830995, "grad_norm": 0.13887496292591095, "learning_rate": 0.0005, "loss": 2.1196, "step": 73930 }, { "epoch": 0.2814338892991177, "grad_norm": 0.14566300809383392, "learning_rate": 0.0005, "loss": 2.1367, "step": 73940 }, { "epoch": 0.2814719517672404, "grad_norm": 0.11882128566503525, "learning_rate": 0.0005, "loss": 2.1208, "step": 73950 }, { "epoch": 0.2815100142353631, "grad_norm": 0.13641339540481567, "learning_rate": 0.0005, "loss": 2.1332, "step": 73960 }, { "epoch": 0.28154807670348575, "grad_norm": 0.12365715205669403, "learning_rate": 0.0005, "loss": 2.1187, "step": 73970 }, { "epoch": 0.28158613917160846, "grad_norm": 0.15532588958740234, "learning_rate": 0.0005, "loss": 2.1347, "step": 73980 }, { "epoch": 0.2816242016397311, "grad_norm": 0.1406833678483963, "learning_rate": 0.0005, "loss": 2.1195, "step": 73990 }, { "epoch": 0.28166226410785383, "grad_norm": 0.1372503936290741, "learning_rate": 0.0005, "loss": 2.1242, "step": 74000 }, { "epoch": 0.2817003265759765, "grad_norm": 0.11215030401945114, "learning_rate": 0.0005, "loss": 2.1278, "step": 74010 }, { "epoch": 0.2817383890440992, "grad_norm": 0.12551802396774292, "learning_rate": 0.0005, "loss": 2.1259, "step": 74020 }, { "epoch": 0.28177645151222186, "grad_norm": 0.11723242700099945, "learning_rate": 0.0005, "loss": 2.1122, "step": 74030 }, { "epoch": 0.2818145139803445, "grad_norm": 0.12735706567764282, "learning_rate": 0.0005, "loss": 2.1331, "step": 74040 }, { "epoch": 0.28185257644846723, "grad_norm": 0.12368855625391006, "learning_rate": 0.0005, "loss": 2.1307, "step": 74050 }, { "epoch": 0.2818906389165899, "grad_norm": 0.1375809907913208, "learning_rate": 0.0005, "loss": 2.1297, "step": 74060 }, { "epoch": 0.2819287013847126, "grad_norm": 0.128162682056427, "learning_rate": 0.0005, "loss": 2.132, "step": 74070 }, { "epoch": 0.28196676385283526, "grad_norm": 0.1310570240020752, "learning_rate": 0.0005, "loss": 2.1413, "step": 74080 }, { "epoch": 0.28200482632095797, "grad_norm": 0.11914895474910736, "learning_rate": 0.0005, "loss": 2.1428, "step": 74090 }, { "epoch": 0.2820428887890806, "grad_norm": 0.12058401107788086, "learning_rate": 0.0005, "loss": 2.1311, "step": 74100 }, { "epoch": 0.28208095125720334, "grad_norm": 0.13332392275333405, "learning_rate": 0.0005, "loss": 2.1418, "step": 74110 }, { "epoch": 0.282119013725326, "grad_norm": 0.130690336227417, "learning_rate": 0.0005, "loss": 2.1176, "step": 74120 }, { "epoch": 0.2821570761934487, "grad_norm": 0.1214623898267746, "learning_rate": 0.0005, "loss": 2.1343, "step": 74130 }, { "epoch": 0.28219513866157137, "grad_norm": 0.1267269253730774, "learning_rate": 0.0005, "loss": 2.1293, "step": 74140 }, { "epoch": 0.2822332011296941, "grad_norm": 0.13244369626045227, "learning_rate": 0.0005, "loss": 2.1217, "step": 74150 }, { "epoch": 0.28227126359781674, "grad_norm": 0.12093862146139145, "learning_rate": 0.0005, "loss": 2.1272, "step": 74160 }, { "epoch": 0.2823093260659394, "grad_norm": 0.13259826600551605, "learning_rate": 0.0005, "loss": 2.1198, "step": 74170 }, { "epoch": 0.2823473885340621, "grad_norm": 0.12631496787071228, "learning_rate": 0.0005, "loss": 2.1022, "step": 74180 }, { "epoch": 0.28238545100218476, "grad_norm": 0.11987242102622986, "learning_rate": 0.0005, "loss": 2.1355, "step": 74190 }, { "epoch": 0.2824235134703075, "grad_norm": 0.12496347725391388, "learning_rate": 0.0005, "loss": 2.1343, "step": 74200 }, { "epoch": 0.28246157593843013, "grad_norm": 0.12754859030246735, "learning_rate": 0.0005, "loss": 2.1133, "step": 74210 }, { "epoch": 0.28249963840655284, "grad_norm": 0.12239838391542435, "learning_rate": 0.0005, "loss": 2.1377, "step": 74220 }, { "epoch": 0.2825377008746755, "grad_norm": 0.13033181428909302, "learning_rate": 0.0005, "loss": 2.1306, "step": 74230 }, { "epoch": 0.2825757633427982, "grad_norm": 0.12822109460830688, "learning_rate": 0.0005, "loss": 2.1267, "step": 74240 }, { "epoch": 0.28261382581092087, "grad_norm": 0.12703551352024078, "learning_rate": 0.0005, "loss": 2.126, "step": 74250 }, { "epoch": 0.2826518882790436, "grad_norm": 0.11725924909114838, "learning_rate": 0.0005, "loss": 2.1181, "step": 74260 }, { "epoch": 0.28268995074716624, "grad_norm": 0.12533323466777802, "learning_rate": 0.0005, "loss": 2.1236, "step": 74270 }, { "epoch": 0.28272801321528895, "grad_norm": 0.11936778575181961, "learning_rate": 0.0005, "loss": 2.1199, "step": 74280 }, { "epoch": 0.2827660756834116, "grad_norm": 0.1154133751988411, "learning_rate": 0.0005, "loss": 2.1212, "step": 74290 }, { "epoch": 0.2828041381515343, "grad_norm": 0.11242377758026123, "learning_rate": 0.0005, "loss": 2.1297, "step": 74300 }, { "epoch": 0.282842200619657, "grad_norm": 0.12405408173799515, "learning_rate": 0.0005, "loss": 2.1282, "step": 74310 }, { "epoch": 0.28288026308777964, "grad_norm": 0.11994063854217529, "learning_rate": 0.0005, "loss": 2.1475, "step": 74320 }, { "epoch": 0.28291832555590235, "grad_norm": 0.1216566413640976, "learning_rate": 0.0005, "loss": 2.1219, "step": 74330 }, { "epoch": 0.282956388024025, "grad_norm": 0.13246215879917145, "learning_rate": 0.0005, "loss": 2.1162, "step": 74340 }, { "epoch": 0.2829944504921477, "grad_norm": 0.13144953548908234, "learning_rate": 0.0005, "loss": 2.1097, "step": 74350 }, { "epoch": 0.2830325129602704, "grad_norm": 0.14443819224834442, "learning_rate": 0.0005, "loss": 2.1365, "step": 74360 }, { "epoch": 0.2830705754283931, "grad_norm": 0.13754788041114807, "learning_rate": 0.0005, "loss": 2.1247, "step": 74370 }, { "epoch": 0.28310863789651575, "grad_norm": 0.12103603780269623, "learning_rate": 0.0005, "loss": 2.1431, "step": 74380 }, { "epoch": 0.28314670036463846, "grad_norm": 0.13121990859508514, "learning_rate": 0.0005, "loss": 2.1322, "step": 74390 }, { "epoch": 0.2831847628327611, "grad_norm": 0.11447214335203171, "learning_rate": 0.0005, "loss": 2.1269, "step": 74400 }, { "epoch": 0.28322282530088383, "grad_norm": 0.12295140326023102, "learning_rate": 0.0005, "loss": 2.1195, "step": 74410 }, { "epoch": 0.2832608877690065, "grad_norm": 0.13450969755649567, "learning_rate": 0.0005, "loss": 2.1249, "step": 74420 }, { "epoch": 0.2832989502371292, "grad_norm": 0.12559343874454498, "learning_rate": 0.0005, "loss": 2.127, "step": 74430 }, { "epoch": 0.28333701270525186, "grad_norm": 0.11981473118066788, "learning_rate": 0.0005, "loss": 2.1192, "step": 74440 }, { "epoch": 0.28337507517337457, "grad_norm": 0.14503072202205658, "learning_rate": 0.0005, "loss": 2.1246, "step": 74450 }, { "epoch": 0.2834131376414972, "grad_norm": 0.1257060319185257, "learning_rate": 0.0005, "loss": 2.1193, "step": 74460 }, { "epoch": 0.2834512001096199, "grad_norm": 0.12952490150928497, "learning_rate": 0.0005, "loss": 2.1076, "step": 74470 }, { "epoch": 0.2834892625777426, "grad_norm": 0.12097674608230591, "learning_rate": 0.0005, "loss": 2.1318, "step": 74480 }, { "epoch": 0.28352732504586525, "grad_norm": 0.13185162842273712, "learning_rate": 0.0005, "loss": 2.1319, "step": 74490 }, { "epoch": 0.28356538751398797, "grad_norm": 0.14450843632221222, "learning_rate": 0.0005, "loss": 2.1296, "step": 74500 }, { "epoch": 0.2836034499821106, "grad_norm": 0.12296470254659653, "learning_rate": 0.0005, "loss": 2.1219, "step": 74510 }, { "epoch": 0.28364151245023334, "grad_norm": 0.13791923224925995, "learning_rate": 0.0005, "loss": 2.1174, "step": 74520 }, { "epoch": 0.283679574918356, "grad_norm": 0.12625502049922943, "learning_rate": 0.0005, "loss": 2.1338, "step": 74530 }, { "epoch": 0.2837176373864787, "grad_norm": 0.13758540153503418, "learning_rate": 0.0005, "loss": 2.1267, "step": 74540 }, { "epoch": 0.28375569985460136, "grad_norm": 0.11705252528190613, "learning_rate": 0.0005, "loss": 2.129, "step": 74550 }, { "epoch": 0.2837937623227241, "grad_norm": 0.11666113138198853, "learning_rate": 0.0005, "loss": 2.1401, "step": 74560 }, { "epoch": 0.28383182479084673, "grad_norm": 0.1299813836812973, "learning_rate": 0.0005, "loss": 2.1264, "step": 74570 }, { "epoch": 0.28386988725896944, "grad_norm": 0.3286300003528595, "learning_rate": 0.0005, "loss": 2.1568, "step": 74580 }, { "epoch": 0.2839079497270921, "grad_norm": 0.1263127326965332, "learning_rate": 0.0005, "loss": 2.1269, "step": 74590 }, { "epoch": 0.2839460121952148, "grad_norm": 0.12550726532936096, "learning_rate": 0.0005, "loss": 2.1442, "step": 74600 }, { "epoch": 0.28398407466333747, "grad_norm": 0.13197514414787292, "learning_rate": 0.0005, "loss": 2.1258, "step": 74610 }, { "epoch": 0.28402213713146013, "grad_norm": 0.11156166344881058, "learning_rate": 0.0005, "loss": 2.1395, "step": 74620 }, { "epoch": 0.28406019959958284, "grad_norm": 0.12309394776821136, "learning_rate": 0.0005, "loss": 2.1266, "step": 74630 }, { "epoch": 0.2840982620677055, "grad_norm": 0.13029909133911133, "learning_rate": 0.0005, "loss": 2.117, "step": 74640 }, { "epoch": 0.2841363245358282, "grad_norm": 0.13694219291210175, "learning_rate": 0.0005, "loss": 2.1165, "step": 74650 }, { "epoch": 0.28417438700395087, "grad_norm": 0.12063515186309814, "learning_rate": 0.0005, "loss": 2.1418, "step": 74660 }, { "epoch": 0.2842124494720736, "grad_norm": 0.136690154671669, "learning_rate": 0.0005, "loss": 2.1193, "step": 74670 }, { "epoch": 0.28425051194019624, "grad_norm": 0.1438709944486618, "learning_rate": 0.0005, "loss": 2.1361, "step": 74680 }, { "epoch": 0.28428857440831895, "grad_norm": 0.14761757850646973, "learning_rate": 0.0005, "loss": 2.1222, "step": 74690 }, { "epoch": 0.2843266368764416, "grad_norm": 0.14363446831703186, "learning_rate": 0.0005, "loss": 2.1086, "step": 74700 }, { "epoch": 0.2843646993445643, "grad_norm": 0.13309715688228607, "learning_rate": 0.0005, "loss": 2.1257, "step": 74710 }, { "epoch": 0.284402761812687, "grad_norm": 0.14354118704795837, "learning_rate": 0.0005, "loss": 2.1093, "step": 74720 }, { "epoch": 0.2844408242808097, "grad_norm": 0.11641010642051697, "learning_rate": 0.0005, "loss": 2.1162, "step": 74730 }, { "epoch": 0.28447888674893235, "grad_norm": 0.13731835782527924, "learning_rate": 0.0005, "loss": 2.1342, "step": 74740 }, { "epoch": 0.284516949217055, "grad_norm": 0.13670092821121216, "learning_rate": 0.0005, "loss": 2.1257, "step": 74750 }, { "epoch": 0.2845550116851777, "grad_norm": 0.1361704021692276, "learning_rate": 0.0005, "loss": 2.1213, "step": 74760 }, { "epoch": 0.2845930741533004, "grad_norm": 0.12592118978500366, "learning_rate": 0.0005, "loss": 2.1171, "step": 74770 }, { "epoch": 0.2846311366214231, "grad_norm": 0.12394729256629944, "learning_rate": 0.0005, "loss": 2.1423, "step": 74780 }, { "epoch": 0.28466919908954574, "grad_norm": 0.12797507643699646, "learning_rate": 0.0005, "loss": 2.1295, "step": 74790 }, { "epoch": 0.28470726155766846, "grad_norm": 0.13249561190605164, "learning_rate": 0.0005, "loss": 2.1274, "step": 74800 }, { "epoch": 0.2847453240257911, "grad_norm": 0.13181069493293762, "learning_rate": 0.0005, "loss": 2.1249, "step": 74810 }, { "epoch": 0.2847833864939138, "grad_norm": 0.13363561034202576, "learning_rate": 0.0005, "loss": 2.1283, "step": 74820 }, { "epoch": 0.2848214489620365, "grad_norm": 0.12551195919513702, "learning_rate": 0.0005, "loss": 2.1266, "step": 74830 }, { "epoch": 0.2848595114301592, "grad_norm": 0.12691380083560944, "learning_rate": 0.0005, "loss": 2.1377, "step": 74840 }, { "epoch": 0.28489757389828185, "grad_norm": 0.13347996771335602, "learning_rate": 0.0005, "loss": 2.111, "step": 74850 }, { "epoch": 0.28493563636640457, "grad_norm": 0.12483879178762436, "learning_rate": 0.0005, "loss": 2.134, "step": 74860 }, { "epoch": 0.2849736988345272, "grad_norm": 0.11743100732564926, "learning_rate": 0.0005, "loss": 2.1269, "step": 74870 }, { "epoch": 0.28501176130264994, "grad_norm": 0.11756300181150436, "learning_rate": 0.0005, "loss": 2.1204, "step": 74880 }, { "epoch": 0.2850498237707726, "grad_norm": 0.1318596601486206, "learning_rate": 0.0005, "loss": 2.1247, "step": 74890 }, { "epoch": 0.28508788623889525, "grad_norm": 0.13540859520435333, "learning_rate": 0.0005, "loss": 2.1436, "step": 74900 }, { "epoch": 0.28512594870701796, "grad_norm": 0.13051091134548187, "learning_rate": 0.0005, "loss": 2.1208, "step": 74910 }, { "epoch": 0.2851640111751406, "grad_norm": 0.13236452639102936, "learning_rate": 0.0005, "loss": 2.1383, "step": 74920 }, { "epoch": 0.28520207364326333, "grad_norm": 0.12521909177303314, "learning_rate": 0.0005, "loss": 2.124, "step": 74930 }, { "epoch": 0.285240136111386, "grad_norm": 0.11868865042924881, "learning_rate": 0.0005, "loss": 2.1329, "step": 74940 }, { "epoch": 0.2852781985795087, "grad_norm": 0.12900525331497192, "learning_rate": 0.0005, "loss": 2.1201, "step": 74950 }, { "epoch": 0.28531626104763136, "grad_norm": 0.12465689331293106, "learning_rate": 0.0005, "loss": 2.1346, "step": 74960 }, { "epoch": 0.28535432351575407, "grad_norm": 0.12036836892366409, "learning_rate": 0.0005, "loss": 2.1207, "step": 74970 }, { "epoch": 0.28539238598387673, "grad_norm": 0.1260298192501068, "learning_rate": 0.0005, "loss": 2.1285, "step": 74980 }, { "epoch": 0.28543044845199944, "grad_norm": 0.12549512088298798, "learning_rate": 0.0005, "loss": 2.1266, "step": 74990 }, { "epoch": 0.2854685109201221, "grad_norm": 0.1338365375995636, "learning_rate": 0.0005, "loss": 2.1347, "step": 75000 }, { "epoch": 0.2855065733882448, "grad_norm": 0.1249644011259079, "learning_rate": 0.0005, "loss": 2.1331, "step": 75010 }, { "epoch": 0.28554463585636747, "grad_norm": 0.11954779922962189, "learning_rate": 0.0005, "loss": 2.1207, "step": 75020 }, { "epoch": 0.2855826983244902, "grad_norm": 0.12321747094392776, "learning_rate": 0.0005, "loss": 2.1456, "step": 75030 }, { "epoch": 0.28562076079261284, "grad_norm": 0.12530706822872162, "learning_rate": 0.0005, "loss": 2.1222, "step": 75040 }, { "epoch": 0.2856588232607355, "grad_norm": 0.11701780557632446, "learning_rate": 0.0005, "loss": 2.1301, "step": 75050 }, { "epoch": 0.2856968857288582, "grad_norm": 0.12913040816783905, "learning_rate": 0.0005, "loss": 2.1149, "step": 75060 }, { "epoch": 0.28573494819698086, "grad_norm": 0.13130801916122437, "learning_rate": 0.0005, "loss": 2.1354, "step": 75070 }, { "epoch": 0.2857730106651036, "grad_norm": 0.12802307307720184, "learning_rate": 0.0005, "loss": 2.1283, "step": 75080 }, { "epoch": 0.28581107313322623, "grad_norm": 0.12605822086334229, "learning_rate": 0.0005, "loss": 2.1317, "step": 75090 }, { "epoch": 0.28584913560134895, "grad_norm": 0.12512341141700745, "learning_rate": 0.0005, "loss": 2.1124, "step": 75100 }, { "epoch": 0.2858871980694716, "grad_norm": 0.13102522492408752, "learning_rate": 0.0005, "loss": 2.1284, "step": 75110 }, { "epoch": 0.2859252605375943, "grad_norm": 0.3720043897628784, "learning_rate": 0.0005, "loss": 2.1222, "step": 75120 }, { "epoch": 0.285963323005717, "grad_norm": 0.11563277244567871, "learning_rate": 0.0005, "loss": 2.1282, "step": 75130 }, { "epoch": 0.2860013854738397, "grad_norm": 0.11799177527427673, "learning_rate": 0.0005, "loss": 2.1262, "step": 75140 }, { "epoch": 0.28603944794196234, "grad_norm": 0.13001975417137146, "learning_rate": 0.0005, "loss": 2.1376, "step": 75150 }, { "epoch": 0.28607751041008506, "grad_norm": 0.13930030167102814, "learning_rate": 0.0005, "loss": 2.1254, "step": 75160 }, { "epoch": 0.2861155728782077, "grad_norm": 0.12200480699539185, "learning_rate": 0.0005, "loss": 2.1486, "step": 75170 }, { "epoch": 0.28615363534633037, "grad_norm": 0.1231589987874031, "learning_rate": 0.0005, "loss": 2.1311, "step": 75180 }, { "epoch": 0.2861916978144531, "grad_norm": 0.1211152896285057, "learning_rate": 0.0005, "loss": 2.1369, "step": 75190 }, { "epoch": 0.28622976028257574, "grad_norm": 0.1369701474905014, "learning_rate": 0.0005, "loss": 2.1268, "step": 75200 }, { "epoch": 0.28626782275069845, "grad_norm": 0.1483720988035202, "learning_rate": 0.0005, "loss": 2.1233, "step": 75210 }, { "epoch": 0.2863058852188211, "grad_norm": 0.1256931573152542, "learning_rate": 0.0005, "loss": 2.1077, "step": 75220 }, { "epoch": 0.2863439476869438, "grad_norm": 0.12462868541479111, "learning_rate": 0.0005, "loss": 2.1159, "step": 75230 }, { "epoch": 0.2863820101550665, "grad_norm": 0.12703180313110352, "learning_rate": 0.0005, "loss": 2.1314, "step": 75240 }, { "epoch": 0.2864200726231892, "grad_norm": 0.127125084400177, "learning_rate": 0.0005, "loss": 2.1184, "step": 75250 }, { "epoch": 0.28645813509131185, "grad_norm": 0.13014651834964752, "learning_rate": 0.0005, "loss": 2.1081, "step": 75260 }, { "epoch": 0.28649619755943456, "grad_norm": 0.13283497095108032, "learning_rate": 0.0005, "loss": 2.1205, "step": 75270 }, { "epoch": 0.2865342600275572, "grad_norm": 0.13949061930179596, "learning_rate": 0.0005, "loss": 2.128, "step": 75280 }, { "epoch": 0.28657232249567993, "grad_norm": 0.12150692194700241, "learning_rate": 0.0005, "loss": 2.1247, "step": 75290 }, { "epoch": 0.2866103849638026, "grad_norm": 0.13123773038387299, "learning_rate": 0.0005, "loss": 2.1313, "step": 75300 }, { "epoch": 0.2866484474319253, "grad_norm": 0.1253703385591507, "learning_rate": 0.0005, "loss": 2.127, "step": 75310 }, { "epoch": 0.28668650990004796, "grad_norm": 0.12628312408924103, "learning_rate": 0.0005, "loss": 2.1163, "step": 75320 }, { "epoch": 0.2867245723681706, "grad_norm": 0.11620379239320755, "learning_rate": 0.0005, "loss": 2.1229, "step": 75330 }, { "epoch": 0.28676263483629333, "grad_norm": 0.12156597524881363, "learning_rate": 0.0005, "loss": 2.1362, "step": 75340 }, { "epoch": 0.286800697304416, "grad_norm": 0.1484280377626419, "learning_rate": 0.0005, "loss": 2.1298, "step": 75350 }, { "epoch": 0.2868387597725387, "grad_norm": 0.14153222739696503, "learning_rate": 0.0005, "loss": 2.1407, "step": 75360 }, { "epoch": 0.28687682224066136, "grad_norm": 0.1369723528623581, "learning_rate": 0.0005, "loss": 2.1149, "step": 75370 }, { "epoch": 0.28691488470878407, "grad_norm": 0.12162962555885315, "learning_rate": 0.0005, "loss": 2.1357, "step": 75380 }, { "epoch": 0.2869529471769067, "grad_norm": 0.12366687506437302, "learning_rate": 0.0005, "loss": 2.1254, "step": 75390 }, { "epoch": 0.28699100964502944, "grad_norm": 0.11675120145082474, "learning_rate": 0.0005, "loss": 2.1316, "step": 75400 }, { "epoch": 0.2870290721131521, "grad_norm": 0.12342153489589691, "learning_rate": 0.0005, "loss": 2.1296, "step": 75410 }, { "epoch": 0.2870671345812748, "grad_norm": 0.14156018197536469, "learning_rate": 0.0005, "loss": 2.1238, "step": 75420 }, { "epoch": 0.28710519704939746, "grad_norm": 0.135422945022583, "learning_rate": 0.0005, "loss": 2.1156, "step": 75430 }, { "epoch": 0.2871432595175202, "grad_norm": 0.13034862279891968, "learning_rate": 0.0005, "loss": 2.122, "step": 75440 }, { "epoch": 0.28718132198564283, "grad_norm": 0.12067141383886337, "learning_rate": 0.0005, "loss": 2.1304, "step": 75450 }, { "epoch": 0.28721938445376555, "grad_norm": 0.14023612439632416, "learning_rate": 0.0005, "loss": 2.1272, "step": 75460 }, { "epoch": 0.2872574469218882, "grad_norm": 0.1173662468791008, "learning_rate": 0.0005, "loss": 2.1441, "step": 75470 }, { "epoch": 0.28729550939001086, "grad_norm": 0.13125747442245483, "learning_rate": 0.0005, "loss": 2.1209, "step": 75480 }, { "epoch": 0.2873335718581336, "grad_norm": 0.1212615892291069, "learning_rate": 0.0005, "loss": 2.1212, "step": 75490 }, { "epoch": 0.28737163432625623, "grad_norm": 0.12185412645339966, "learning_rate": 0.0005, "loss": 2.1342, "step": 75500 }, { "epoch": 0.28740969679437894, "grad_norm": 0.11994576454162598, "learning_rate": 0.0005, "loss": 2.121, "step": 75510 }, { "epoch": 0.2874477592625016, "grad_norm": 0.12515906989574432, "learning_rate": 0.0005, "loss": 2.1317, "step": 75520 }, { "epoch": 0.2874858217306243, "grad_norm": 0.12057694792747498, "learning_rate": 0.0005, "loss": 2.1151, "step": 75530 }, { "epoch": 0.28752388419874697, "grad_norm": 0.12256751954555511, "learning_rate": 0.0005, "loss": 2.1469, "step": 75540 }, { "epoch": 0.2875619466668697, "grad_norm": 0.12268295139074326, "learning_rate": 0.0005, "loss": 2.1269, "step": 75550 }, { "epoch": 0.28760000913499234, "grad_norm": 0.1268979161977768, "learning_rate": 0.0005, "loss": 2.119, "step": 75560 }, { "epoch": 0.28763807160311505, "grad_norm": 0.1251557618379593, "learning_rate": 0.0005, "loss": 2.1252, "step": 75570 }, { "epoch": 0.2876761340712377, "grad_norm": 0.12928272783756256, "learning_rate": 0.0005, "loss": 2.127, "step": 75580 }, { "epoch": 0.2877141965393604, "grad_norm": 0.11583087593317032, "learning_rate": 0.0005, "loss": 2.1363, "step": 75590 }, { "epoch": 0.2877522590074831, "grad_norm": 0.12400549650192261, "learning_rate": 0.0005, "loss": 2.117, "step": 75600 }, { "epoch": 0.28779032147560574, "grad_norm": 0.13491888344287872, "learning_rate": 0.0005, "loss": 2.1286, "step": 75610 }, { "epoch": 0.28782838394372845, "grad_norm": 0.1373259276151657, "learning_rate": 0.0005, "loss": 2.1336, "step": 75620 }, { "epoch": 0.2878664464118511, "grad_norm": 0.1513000875711441, "learning_rate": 0.0005, "loss": 2.133, "step": 75630 }, { "epoch": 0.2879045088799738, "grad_norm": 0.13211409747600555, "learning_rate": 0.0005, "loss": 2.1244, "step": 75640 }, { "epoch": 0.2879425713480965, "grad_norm": 0.11001871526241302, "learning_rate": 0.0005, "loss": 2.1245, "step": 75650 }, { "epoch": 0.2879806338162192, "grad_norm": 0.12871001660823822, "learning_rate": 0.0005, "loss": 2.1377, "step": 75660 }, { "epoch": 0.28801869628434185, "grad_norm": 0.11630739271640778, "learning_rate": 0.0005, "loss": 2.1333, "step": 75670 }, { "epoch": 0.28805675875246456, "grad_norm": 0.12869928777217865, "learning_rate": 0.0005, "loss": 2.1288, "step": 75680 }, { "epoch": 0.2880948212205872, "grad_norm": 0.1421797275543213, "learning_rate": 0.0005, "loss": 2.1185, "step": 75690 }, { "epoch": 0.28813288368870993, "grad_norm": 0.14080357551574707, "learning_rate": 0.0005, "loss": 2.1297, "step": 75700 }, { "epoch": 0.2881709461568326, "grad_norm": 0.12262321263551712, "learning_rate": 0.0005, "loss": 2.1251, "step": 75710 }, { "epoch": 0.2882090086249553, "grad_norm": 0.12388603389263153, "learning_rate": 0.0005, "loss": 2.1323, "step": 75720 }, { "epoch": 0.28824707109307796, "grad_norm": 0.1366262435913086, "learning_rate": 0.0005, "loss": 2.1136, "step": 75730 }, { "epoch": 0.28828513356120067, "grad_norm": 0.11656782776117325, "learning_rate": 0.0005, "loss": 2.1371, "step": 75740 }, { "epoch": 0.2883231960293233, "grad_norm": 0.12055030465126038, "learning_rate": 0.0005, "loss": 2.1232, "step": 75750 }, { "epoch": 0.288361258497446, "grad_norm": 0.12981313467025757, "learning_rate": 0.0005, "loss": 2.1156, "step": 75760 }, { "epoch": 0.2883993209655687, "grad_norm": 0.12313870340585709, "learning_rate": 0.0005, "loss": 2.123, "step": 75770 }, { "epoch": 0.28843738343369135, "grad_norm": 0.13794687390327454, "learning_rate": 0.0005, "loss": 2.1177, "step": 75780 }, { "epoch": 0.28847544590181406, "grad_norm": 0.12309377640485764, "learning_rate": 0.0005, "loss": 2.1333, "step": 75790 }, { "epoch": 0.2885135083699367, "grad_norm": 0.13513195514678955, "learning_rate": 0.0005, "loss": 2.1034, "step": 75800 }, { "epoch": 0.28855157083805943, "grad_norm": 0.1164679303765297, "learning_rate": 0.0005, "loss": 2.1199, "step": 75810 }, { "epoch": 0.2885896333061821, "grad_norm": 0.1197584792971611, "learning_rate": 0.0005, "loss": 2.1218, "step": 75820 }, { "epoch": 0.2886276957743048, "grad_norm": 0.10965582728385925, "learning_rate": 0.0005, "loss": 2.118, "step": 75830 }, { "epoch": 0.28866575824242746, "grad_norm": 0.1275942176580429, "learning_rate": 0.0005, "loss": 2.1303, "step": 75840 }, { "epoch": 0.2887038207105502, "grad_norm": 0.11930042505264282, "learning_rate": 0.0005, "loss": 2.1205, "step": 75850 }, { "epoch": 0.28874188317867283, "grad_norm": 0.13442851603031158, "learning_rate": 0.0005, "loss": 2.1083, "step": 75860 }, { "epoch": 0.28877994564679554, "grad_norm": 0.1342979371547699, "learning_rate": 0.0005, "loss": 2.1161, "step": 75870 }, { "epoch": 0.2888180081149182, "grad_norm": 0.12899208068847656, "learning_rate": 0.0005, "loss": 2.1281, "step": 75880 }, { "epoch": 0.2888560705830409, "grad_norm": 0.1196591928601265, "learning_rate": 0.0005, "loss": 2.1215, "step": 75890 }, { "epoch": 0.28889413305116357, "grad_norm": 0.12251043319702148, "learning_rate": 0.0005, "loss": 2.1488, "step": 75900 }, { "epoch": 0.28893219551928623, "grad_norm": 0.12906859815120697, "learning_rate": 0.0005, "loss": 2.1387, "step": 75910 }, { "epoch": 0.28897025798740894, "grad_norm": 0.12498313933610916, "learning_rate": 0.0005, "loss": 2.1191, "step": 75920 }, { "epoch": 0.2890083204555316, "grad_norm": 0.13862529397010803, "learning_rate": 0.0005, "loss": 2.1263, "step": 75930 }, { "epoch": 0.2890463829236543, "grad_norm": 0.12166710942983627, "learning_rate": 0.0005, "loss": 2.1398, "step": 75940 }, { "epoch": 0.28908444539177697, "grad_norm": 0.11352758854627609, "learning_rate": 0.0005, "loss": 2.1303, "step": 75950 }, { "epoch": 0.2891225078598997, "grad_norm": 0.1291627287864685, "learning_rate": 0.0005, "loss": 2.1146, "step": 75960 }, { "epoch": 0.28916057032802234, "grad_norm": 0.1307760775089264, "learning_rate": 0.0005, "loss": 2.1389, "step": 75970 }, { "epoch": 0.28919863279614505, "grad_norm": 0.13838613033294678, "learning_rate": 0.0005, "loss": 2.126, "step": 75980 }, { "epoch": 0.2892366952642677, "grad_norm": 0.1261829286813736, "learning_rate": 0.0005, "loss": 2.1304, "step": 75990 }, { "epoch": 0.2892747577323904, "grad_norm": 0.12509678304195404, "learning_rate": 0.0005, "loss": 2.133, "step": 76000 }, { "epoch": 0.2893128202005131, "grad_norm": 0.12762987613677979, "learning_rate": 0.0005, "loss": 2.1374, "step": 76010 }, { "epoch": 0.2893508826686358, "grad_norm": 0.14674775302410126, "learning_rate": 0.0005, "loss": 2.1359, "step": 76020 }, { "epoch": 0.28938894513675845, "grad_norm": 0.12226782739162445, "learning_rate": 0.0005, "loss": 2.1193, "step": 76030 }, { "epoch": 0.2894270076048811, "grad_norm": 0.13139107823371887, "learning_rate": 0.0005, "loss": 2.1257, "step": 76040 }, { "epoch": 0.2894650700730038, "grad_norm": 0.12519071996212006, "learning_rate": 0.0005, "loss": 2.1252, "step": 76050 }, { "epoch": 0.2895031325411265, "grad_norm": 0.12543900310993195, "learning_rate": 0.0005, "loss": 2.1243, "step": 76060 }, { "epoch": 0.2895411950092492, "grad_norm": 0.14839930832386017, "learning_rate": 0.0005, "loss": 2.1237, "step": 76070 }, { "epoch": 0.28957925747737184, "grad_norm": 0.12887993454933167, "learning_rate": 0.0005, "loss": 2.1334, "step": 76080 }, { "epoch": 0.28961731994549456, "grad_norm": 0.12014911323785782, "learning_rate": 0.0005, "loss": 2.1376, "step": 76090 }, { "epoch": 0.2896553824136172, "grad_norm": 0.1231941282749176, "learning_rate": 0.0005, "loss": 2.1304, "step": 76100 }, { "epoch": 0.2896934448817399, "grad_norm": 0.14408724009990692, "learning_rate": 0.0005, "loss": 2.1282, "step": 76110 }, { "epoch": 0.2897315073498626, "grad_norm": 0.12259387224912643, "learning_rate": 0.0005, "loss": 2.1206, "step": 76120 }, { "epoch": 0.2897695698179853, "grad_norm": 0.13093988597393036, "learning_rate": 0.0005, "loss": 2.1172, "step": 76130 }, { "epoch": 0.28980763228610795, "grad_norm": 0.12523812055587769, "learning_rate": 0.0005, "loss": 2.1217, "step": 76140 }, { "epoch": 0.28984569475423066, "grad_norm": 0.13562336564064026, "learning_rate": 0.0005, "loss": 2.1363, "step": 76150 }, { "epoch": 0.2898837572223533, "grad_norm": 0.12516431510448456, "learning_rate": 0.0005, "loss": 2.1218, "step": 76160 }, { "epoch": 0.28992181969047603, "grad_norm": 0.12162970751523972, "learning_rate": 0.0005, "loss": 2.1199, "step": 76170 }, { "epoch": 0.2899598821585987, "grad_norm": 0.12627138197422028, "learning_rate": 0.0005, "loss": 2.1167, "step": 76180 }, { "epoch": 0.28999794462672135, "grad_norm": 0.13824544847011566, "learning_rate": 0.0005, "loss": 2.1132, "step": 76190 }, { "epoch": 0.29003600709484406, "grad_norm": 0.12304244190454483, "learning_rate": 0.0005, "loss": 2.1298, "step": 76200 }, { "epoch": 0.2900740695629667, "grad_norm": 0.1312766671180725, "learning_rate": 0.0005, "loss": 2.1324, "step": 76210 }, { "epoch": 0.29011213203108943, "grad_norm": 0.13339751958847046, "learning_rate": 0.0005, "loss": 2.1259, "step": 76220 }, { "epoch": 0.2901501944992121, "grad_norm": 0.1529046893119812, "learning_rate": 0.0005, "loss": 2.1091, "step": 76230 }, { "epoch": 0.2901882569673348, "grad_norm": 0.13795912265777588, "learning_rate": 0.0005, "loss": 2.1061, "step": 76240 }, { "epoch": 0.29022631943545746, "grad_norm": 0.12184420228004456, "learning_rate": 0.0005, "loss": 2.1357, "step": 76250 }, { "epoch": 0.29026438190358017, "grad_norm": 0.12675736844539642, "learning_rate": 0.0005, "loss": 2.1356, "step": 76260 }, { "epoch": 0.29030244437170283, "grad_norm": 0.12017787992954254, "learning_rate": 0.0005, "loss": 2.1239, "step": 76270 }, { "epoch": 0.29034050683982554, "grad_norm": 0.12479761242866516, "learning_rate": 0.0005, "loss": 2.1334, "step": 76280 }, { "epoch": 0.2903785693079482, "grad_norm": 0.1499720960855484, "learning_rate": 0.0005, "loss": 2.1256, "step": 76290 }, { "epoch": 0.2904166317760709, "grad_norm": 0.12147608399391174, "learning_rate": 0.0005, "loss": 2.1469, "step": 76300 }, { "epoch": 0.29045469424419357, "grad_norm": 0.12691138684749603, "learning_rate": 0.0005, "loss": 2.1237, "step": 76310 }, { "epoch": 0.2904927567123163, "grad_norm": 0.14038929343223572, "learning_rate": 0.0005, "loss": 2.1212, "step": 76320 }, { "epoch": 0.29053081918043894, "grad_norm": 0.12324102967977524, "learning_rate": 0.0005, "loss": 2.1128, "step": 76330 }, { "epoch": 0.2905688816485616, "grad_norm": 0.1249835193157196, "learning_rate": 0.0005, "loss": 2.1264, "step": 76340 }, { "epoch": 0.2906069441166843, "grad_norm": 0.12767577171325684, "learning_rate": 0.0005, "loss": 2.1208, "step": 76350 }, { "epoch": 0.29064500658480696, "grad_norm": 0.11480826884508133, "learning_rate": 0.0005, "loss": 2.1356, "step": 76360 }, { "epoch": 0.2906830690529297, "grad_norm": 0.11960236728191376, "learning_rate": 0.0005, "loss": 2.1128, "step": 76370 }, { "epoch": 0.29072113152105233, "grad_norm": 0.1302240490913391, "learning_rate": 0.0005, "loss": 2.1132, "step": 76380 }, { "epoch": 0.29075919398917505, "grad_norm": 0.14519484341144562, "learning_rate": 0.0005, "loss": 2.1135, "step": 76390 }, { "epoch": 0.2907972564572977, "grad_norm": 0.1436435431241989, "learning_rate": 0.0005, "loss": 2.1141, "step": 76400 }, { "epoch": 0.2908353189254204, "grad_norm": 0.11903155595064163, "learning_rate": 0.0005, "loss": 2.1162, "step": 76410 }, { "epoch": 0.2908733813935431, "grad_norm": 0.1267947554588318, "learning_rate": 0.0005, "loss": 2.1422, "step": 76420 }, { "epoch": 0.2909114438616658, "grad_norm": 0.12625740468502045, "learning_rate": 0.0005, "loss": 2.1276, "step": 76430 }, { "epoch": 0.29094950632978844, "grad_norm": 0.11667564511299133, "learning_rate": 0.0005, "loss": 2.108, "step": 76440 }, { "epoch": 0.29098756879791116, "grad_norm": 0.12369339913129807, "learning_rate": 0.0005, "loss": 2.1337, "step": 76450 }, { "epoch": 0.2910256312660338, "grad_norm": 0.12127465754747391, "learning_rate": 0.0005, "loss": 2.1514, "step": 76460 }, { "epoch": 0.29106369373415647, "grad_norm": 0.12116898596286774, "learning_rate": 0.0005, "loss": 2.1295, "step": 76470 }, { "epoch": 0.2911017562022792, "grad_norm": 0.11794110387563705, "learning_rate": 0.0005, "loss": 2.1144, "step": 76480 }, { "epoch": 0.29113981867040184, "grad_norm": 0.12648813426494598, "learning_rate": 0.0005, "loss": 2.1287, "step": 76490 }, { "epoch": 0.29117788113852455, "grad_norm": 0.1190585196018219, "learning_rate": 0.0005, "loss": 2.1317, "step": 76500 }, { "epoch": 0.2912159436066472, "grad_norm": 0.13424323499202728, "learning_rate": 0.0005, "loss": 2.1263, "step": 76510 }, { "epoch": 0.2912540060747699, "grad_norm": 0.12387403100728989, "learning_rate": 0.0005, "loss": 2.1303, "step": 76520 }, { "epoch": 0.2912920685428926, "grad_norm": 0.13513913750648499, "learning_rate": 0.0005, "loss": 2.1322, "step": 76530 }, { "epoch": 0.2913301310110153, "grad_norm": 0.12977351248264313, "learning_rate": 0.0005, "loss": 2.1234, "step": 76540 }, { "epoch": 0.29136819347913795, "grad_norm": 0.12603046000003815, "learning_rate": 0.0005, "loss": 2.1277, "step": 76550 }, { "epoch": 0.29140625594726066, "grad_norm": 0.1314680576324463, "learning_rate": 0.0005, "loss": 2.1264, "step": 76560 }, { "epoch": 0.2914443184153833, "grad_norm": 0.1237976998090744, "learning_rate": 0.0005, "loss": 2.1278, "step": 76570 }, { "epoch": 0.29148238088350603, "grad_norm": 0.1232212632894516, "learning_rate": 0.0005, "loss": 2.1112, "step": 76580 }, { "epoch": 0.2915204433516287, "grad_norm": 0.1358116716146469, "learning_rate": 0.0005, "loss": 2.1357, "step": 76590 }, { "epoch": 0.2915585058197514, "grad_norm": 0.1278366893529892, "learning_rate": 0.0005, "loss": 2.112, "step": 76600 }, { "epoch": 0.29159656828787406, "grad_norm": 0.12315915524959564, "learning_rate": 0.0005, "loss": 2.101, "step": 76610 }, { "epoch": 0.2916346307559967, "grad_norm": 0.13123762607574463, "learning_rate": 0.0005, "loss": 2.1161, "step": 76620 }, { "epoch": 0.29167269322411943, "grad_norm": 0.12064166367053986, "learning_rate": 0.0005, "loss": 2.1263, "step": 76630 }, { "epoch": 0.2917107556922421, "grad_norm": 0.13507580757141113, "learning_rate": 0.0005, "loss": 2.1157, "step": 76640 }, { "epoch": 0.2917488181603648, "grad_norm": 0.11641356348991394, "learning_rate": 0.0005, "loss": 2.1261, "step": 76650 }, { "epoch": 0.29178688062848745, "grad_norm": 0.14286088943481445, "learning_rate": 0.0005, "loss": 2.1214, "step": 76660 }, { "epoch": 0.29182494309661017, "grad_norm": 0.12444773316383362, "learning_rate": 0.0005, "loss": 2.105, "step": 76670 }, { "epoch": 0.2918630055647328, "grad_norm": 0.12737885117530823, "learning_rate": 0.0005, "loss": 2.142, "step": 76680 }, { "epoch": 0.29190106803285554, "grad_norm": 0.1293538510799408, "learning_rate": 0.0005, "loss": 2.1219, "step": 76690 }, { "epoch": 0.2919391305009782, "grad_norm": 0.11138560622930527, "learning_rate": 0.0005, "loss": 2.1189, "step": 76700 }, { "epoch": 0.2919771929691009, "grad_norm": 0.12534061074256897, "learning_rate": 0.0005, "loss": 2.1271, "step": 76710 }, { "epoch": 0.29201525543722356, "grad_norm": 0.12235898524522781, "learning_rate": 0.0005, "loss": 2.1238, "step": 76720 }, { "epoch": 0.2920533179053463, "grad_norm": 0.12848271429538727, "learning_rate": 0.0005, "loss": 2.11, "step": 76730 }, { "epoch": 0.29209138037346893, "grad_norm": 0.14083117246627808, "learning_rate": 0.0005, "loss": 2.1238, "step": 76740 }, { "epoch": 0.29212944284159165, "grad_norm": 0.12196497619152069, "learning_rate": 0.0005, "loss": 2.1284, "step": 76750 }, { "epoch": 0.2921675053097143, "grad_norm": 0.14936399459838867, "learning_rate": 0.0005, "loss": 2.1293, "step": 76760 }, { "epoch": 0.29220556777783696, "grad_norm": 0.1394042670726776, "learning_rate": 0.0005, "loss": 2.1419, "step": 76770 }, { "epoch": 0.2922436302459597, "grad_norm": 0.13346344232559204, "learning_rate": 0.0005, "loss": 2.1394, "step": 76780 }, { "epoch": 0.29228169271408233, "grad_norm": 0.13088877499103546, "learning_rate": 0.0005, "loss": 2.127, "step": 76790 }, { "epoch": 0.29231975518220504, "grad_norm": 0.12399112433195114, "learning_rate": 0.0005, "loss": 2.121, "step": 76800 }, { "epoch": 0.2923578176503277, "grad_norm": 0.12089783698320389, "learning_rate": 0.0005, "loss": 2.1204, "step": 76810 }, { "epoch": 0.2923958801184504, "grad_norm": 0.12687864899635315, "learning_rate": 0.0005, "loss": 2.1083, "step": 76820 }, { "epoch": 0.29243394258657307, "grad_norm": 0.12511956691741943, "learning_rate": 0.0005, "loss": 2.1156, "step": 76830 }, { "epoch": 0.2924720050546958, "grad_norm": 0.11711279302835464, "learning_rate": 0.0005, "loss": 2.1203, "step": 76840 }, { "epoch": 0.29251006752281844, "grad_norm": 0.12532839179039001, "learning_rate": 0.0005, "loss": 2.1179, "step": 76850 }, { "epoch": 0.29254812999094115, "grad_norm": 0.12755703926086426, "learning_rate": 0.0005, "loss": 2.1161, "step": 76860 }, { "epoch": 0.2925861924590638, "grad_norm": 0.1276831328868866, "learning_rate": 0.0005, "loss": 2.1281, "step": 76870 }, { "epoch": 0.2926242549271865, "grad_norm": 0.12448505312204361, "learning_rate": 0.0005, "loss": 2.1388, "step": 76880 }, { "epoch": 0.2926623173953092, "grad_norm": 0.13176031410694122, "learning_rate": 0.0005, "loss": 2.1338, "step": 76890 }, { "epoch": 0.2927003798634319, "grad_norm": 0.1303681880235672, "learning_rate": 0.0005, "loss": 2.1169, "step": 76900 }, { "epoch": 0.29273844233155455, "grad_norm": 0.12910409271717072, "learning_rate": 0.0005, "loss": 2.113, "step": 76910 }, { "epoch": 0.2927765047996772, "grad_norm": 0.14085090160369873, "learning_rate": 0.0005, "loss": 2.1295, "step": 76920 }, { "epoch": 0.2928145672677999, "grad_norm": 0.1368868201971054, "learning_rate": 0.0005, "loss": 2.111, "step": 76930 }, { "epoch": 0.2928526297359226, "grad_norm": 0.14073766767978668, "learning_rate": 0.0005, "loss": 2.1319, "step": 76940 }, { "epoch": 0.2928906922040453, "grad_norm": 0.11861219257116318, "learning_rate": 0.0005, "loss": 2.1293, "step": 76950 }, { "epoch": 0.29292875467216795, "grad_norm": 0.12039192020893097, "learning_rate": 0.0005, "loss": 2.1261, "step": 76960 }, { "epoch": 0.29296681714029066, "grad_norm": 0.11999056488275528, "learning_rate": 0.0005, "loss": 2.1196, "step": 76970 }, { "epoch": 0.2930048796084133, "grad_norm": 0.13445483148097992, "learning_rate": 0.0005, "loss": 2.1255, "step": 76980 }, { "epoch": 0.29304294207653603, "grad_norm": 0.1250647008419037, "learning_rate": 0.0005, "loss": 2.1291, "step": 76990 }, { "epoch": 0.2930810045446587, "grad_norm": 0.11895033717155457, "learning_rate": 0.0005, "loss": 2.1333, "step": 77000 }, { "epoch": 0.2931190670127814, "grad_norm": 0.13068489730358124, "learning_rate": 0.0005, "loss": 2.1276, "step": 77010 }, { "epoch": 0.29315712948090406, "grad_norm": 0.12797445058822632, "learning_rate": 0.0005, "loss": 2.1169, "step": 77020 }, { "epoch": 0.29319519194902677, "grad_norm": 0.13003671169281006, "learning_rate": 0.0005, "loss": 2.1346, "step": 77030 }, { "epoch": 0.2932332544171494, "grad_norm": 0.1370018720626831, "learning_rate": 0.0005, "loss": 2.1295, "step": 77040 }, { "epoch": 0.2932713168852721, "grad_norm": 0.13659188151359558, "learning_rate": 0.0005, "loss": 2.1129, "step": 77050 }, { "epoch": 0.2933093793533948, "grad_norm": 0.1316078156232834, "learning_rate": 0.0005, "loss": 2.1256, "step": 77060 }, { "epoch": 0.29334744182151745, "grad_norm": 0.12242893129587173, "learning_rate": 0.0005, "loss": 2.1407, "step": 77070 }, { "epoch": 0.29338550428964016, "grad_norm": 0.14071308076381683, "learning_rate": 0.0005, "loss": 2.1312, "step": 77080 }, { "epoch": 0.2934235667577628, "grad_norm": 0.11522958427667618, "learning_rate": 0.0005, "loss": 2.1209, "step": 77090 }, { "epoch": 0.29346162922588553, "grad_norm": 0.1277889758348465, "learning_rate": 0.0005, "loss": 2.1271, "step": 77100 }, { "epoch": 0.2934996916940082, "grad_norm": 0.12273859977722168, "learning_rate": 0.0005, "loss": 2.1235, "step": 77110 }, { "epoch": 0.2935377541621309, "grad_norm": 0.13316503167152405, "learning_rate": 0.0005, "loss": 2.1244, "step": 77120 }, { "epoch": 0.29357581663025356, "grad_norm": 0.11983367800712585, "learning_rate": 0.0005, "loss": 2.1223, "step": 77130 }, { "epoch": 0.2936138790983763, "grad_norm": 0.11889369785785675, "learning_rate": 0.0005, "loss": 2.1271, "step": 77140 }, { "epoch": 0.29365194156649893, "grad_norm": 0.1265701949596405, "learning_rate": 0.0005, "loss": 2.1191, "step": 77150 }, { "epoch": 0.29369000403462164, "grad_norm": 0.13863658905029297, "learning_rate": 0.0005, "loss": 2.1158, "step": 77160 }, { "epoch": 0.2937280665027443, "grad_norm": 0.14393354952335358, "learning_rate": 0.0005, "loss": 2.1222, "step": 77170 }, { "epoch": 0.293766128970867, "grad_norm": 0.12794476747512817, "learning_rate": 0.0005, "loss": 2.1271, "step": 77180 }, { "epoch": 0.29380419143898967, "grad_norm": 0.12353495508432388, "learning_rate": 0.0005, "loss": 2.1184, "step": 77190 }, { "epoch": 0.2938422539071123, "grad_norm": 0.12665621936321259, "learning_rate": 0.0005, "loss": 2.1416, "step": 77200 }, { "epoch": 0.29388031637523504, "grad_norm": 0.12271159887313843, "learning_rate": 0.0005, "loss": 2.1155, "step": 77210 }, { "epoch": 0.2939183788433577, "grad_norm": 0.14436013996601105, "learning_rate": 0.0005, "loss": 2.1139, "step": 77220 }, { "epoch": 0.2939564413114804, "grad_norm": 0.12181149423122406, "learning_rate": 0.0005, "loss": 2.1231, "step": 77230 }, { "epoch": 0.29399450377960307, "grad_norm": 0.13174769282341003, "learning_rate": 0.0005, "loss": 2.1309, "step": 77240 }, { "epoch": 0.2940325662477258, "grad_norm": 0.1304592788219452, "learning_rate": 0.0005, "loss": 2.119, "step": 77250 }, { "epoch": 0.29407062871584844, "grad_norm": 0.13582275807857513, "learning_rate": 0.0005, "loss": 2.1137, "step": 77260 }, { "epoch": 0.29410869118397115, "grad_norm": 0.12943530082702637, "learning_rate": 0.0005, "loss": 2.1215, "step": 77270 }, { "epoch": 0.2941467536520938, "grad_norm": 0.12374609708786011, "learning_rate": 0.0005, "loss": 2.14, "step": 77280 }, { "epoch": 0.2941848161202165, "grad_norm": 0.12450854480266571, "learning_rate": 0.0005, "loss": 2.118, "step": 77290 }, { "epoch": 0.2942228785883392, "grad_norm": 0.11895159631967545, "learning_rate": 0.0005, "loss": 2.1131, "step": 77300 }, { "epoch": 0.2942609410564619, "grad_norm": 0.12057216465473175, "learning_rate": 0.0005, "loss": 2.1181, "step": 77310 }, { "epoch": 0.29429900352458455, "grad_norm": 0.11752424389123917, "learning_rate": 0.0005, "loss": 2.1356, "step": 77320 }, { "epoch": 0.29433706599270726, "grad_norm": 0.1295253336429596, "learning_rate": 0.0005, "loss": 2.1385, "step": 77330 }, { "epoch": 0.2943751284608299, "grad_norm": 0.1286362111568451, "learning_rate": 0.0005, "loss": 2.1328, "step": 77340 }, { "epoch": 0.2944131909289526, "grad_norm": 0.11909947544336319, "learning_rate": 0.0005, "loss": 2.126, "step": 77350 }, { "epoch": 0.2944512533970753, "grad_norm": 0.13250653445720673, "learning_rate": 0.0005, "loss": 2.1331, "step": 77360 }, { "epoch": 0.29448931586519794, "grad_norm": 0.1223980188369751, "learning_rate": 0.0005, "loss": 2.1062, "step": 77370 }, { "epoch": 0.29452737833332066, "grad_norm": 0.12595222890377045, "learning_rate": 0.0005, "loss": 2.1191, "step": 77380 }, { "epoch": 0.2945654408014433, "grad_norm": 0.1286715269088745, "learning_rate": 0.0005, "loss": 2.0977, "step": 77390 }, { "epoch": 0.294603503269566, "grad_norm": 0.13232561945915222, "learning_rate": 0.0005, "loss": 2.1217, "step": 77400 }, { "epoch": 0.2946415657376887, "grad_norm": 0.1344754844903946, "learning_rate": 0.0005, "loss": 2.1168, "step": 77410 }, { "epoch": 0.2946796282058114, "grad_norm": 0.12267153710126877, "learning_rate": 0.0005, "loss": 2.1069, "step": 77420 }, { "epoch": 0.29471769067393405, "grad_norm": 0.11879909783601761, "learning_rate": 0.0005, "loss": 2.1196, "step": 77430 }, { "epoch": 0.29475575314205676, "grad_norm": 0.1467399001121521, "learning_rate": 0.0005, "loss": 2.1326, "step": 77440 }, { "epoch": 0.2947938156101794, "grad_norm": 0.12790904939174652, "learning_rate": 0.0005, "loss": 2.1256, "step": 77450 }, { "epoch": 0.29483187807830213, "grad_norm": 0.12110629677772522, "learning_rate": 0.0005, "loss": 2.1266, "step": 77460 }, { "epoch": 0.2948699405464248, "grad_norm": 0.11823920905590057, "learning_rate": 0.0005, "loss": 2.1198, "step": 77470 }, { "epoch": 0.29490800301454745, "grad_norm": 0.11439647525548935, "learning_rate": 0.0005, "loss": 2.1074, "step": 77480 }, { "epoch": 0.29494606548267016, "grad_norm": 0.13713325560092926, "learning_rate": 0.0005, "loss": 2.1273, "step": 77490 }, { "epoch": 0.2949841279507928, "grad_norm": 0.1307079792022705, "learning_rate": 0.0005, "loss": 2.122, "step": 77500 }, { "epoch": 0.29502219041891553, "grad_norm": 0.12214863300323486, "learning_rate": 0.0005, "loss": 2.1404, "step": 77510 }, { "epoch": 0.2950602528870382, "grad_norm": 0.12139144539833069, "learning_rate": 0.0005, "loss": 2.1151, "step": 77520 }, { "epoch": 0.2950983153551609, "grad_norm": 0.13657598197460175, "learning_rate": 0.0005, "loss": 2.1362, "step": 77530 }, { "epoch": 0.29513637782328356, "grad_norm": 0.12807506322860718, "learning_rate": 0.0005, "loss": 2.1152, "step": 77540 }, { "epoch": 0.29517444029140627, "grad_norm": 0.13245591521263123, "learning_rate": 0.0005, "loss": 2.1206, "step": 77550 }, { "epoch": 0.2952125027595289, "grad_norm": 0.12371648848056793, "learning_rate": 0.0005, "loss": 2.1416, "step": 77560 }, { "epoch": 0.29525056522765164, "grad_norm": 0.11524897068738937, "learning_rate": 0.0005, "loss": 2.118, "step": 77570 }, { "epoch": 0.2952886276957743, "grad_norm": 0.11479327082633972, "learning_rate": 0.0005, "loss": 2.1178, "step": 77580 }, { "epoch": 0.295326690163897, "grad_norm": 0.14055931568145752, "learning_rate": 0.0005, "loss": 2.1388, "step": 77590 }, { "epoch": 0.29536475263201967, "grad_norm": 0.11579246819019318, "learning_rate": 0.0005, "loss": 2.1256, "step": 77600 }, { "epoch": 0.2954028151001424, "grad_norm": 0.12510639429092407, "learning_rate": 0.0005, "loss": 2.1218, "step": 77610 }, { "epoch": 0.29544087756826504, "grad_norm": 0.15101352334022522, "learning_rate": 0.0005, "loss": 2.1096, "step": 77620 }, { "epoch": 0.2954789400363877, "grad_norm": 0.1322011798620224, "learning_rate": 0.0005, "loss": 2.1387, "step": 77630 }, { "epoch": 0.2955170025045104, "grad_norm": 0.13096703588962555, "learning_rate": 0.0005, "loss": 2.1242, "step": 77640 }, { "epoch": 0.29555506497263306, "grad_norm": 0.13723981380462646, "learning_rate": 0.0005, "loss": 2.1285, "step": 77650 }, { "epoch": 0.2955931274407558, "grad_norm": 0.14213553071022034, "learning_rate": 0.0005, "loss": 2.1255, "step": 77660 }, { "epoch": 0.29563118990887843, "grad_norm": 0.14133940637111664, "learning_rate": 0.0005, "loss": 2.1312, "step": 77670 }, { "epoch": 0.29566925237700115, "grad_norm": 0.1276482492685318, "learning_rate": 0.0005, "loss": 2.1168, "step": 77680 }, { "epoch": 0.2957073148451238, "grad_norm": 0.12077028304338455, "learning_rate": 0.0005, "loss": 2.1308, "step": 77690 }, { "epoch": 0.2957453773132465, "grad_norm": 0.1180778294801712, "learning_rate": 0.0005, "loss": 2.1211, "step": 77700 }, { "epoch": 0.2957834397813692, "grad_norm": 0.12569206953048706, "learning_rate": 0.0005, "loss": 2.1285, "step": 77710 }, { "epoch": 0.2958215022494919, "grad_norm": 0.14068447053432465, "learning_rate": 0.0005, "loss": 2.1359, "step": 77720 }, { "epoch": 0.29585956471761454, "grad_norm": 0.12761713564395905, "learning_rate": 0.0005, "loss": 2.135, "step": 77730 }, { "epoch": 0.29589762718573726, "grad_norm": 0.1189335286617279, "learning_rate": 0.0005, "loss": 2.12, "step": 77740 }, { "epoch": 0.2959356896538599, "grad_norm": 0.11740929633378983, "learning_rate": 0.0005, "loss": 2.1345, "step": 77750 }, { "epoch": 0.2959737521219826, "grad_norm": 0.12548650801181793, "learning_rate": 0.0005, "loss": 2.1419, "step": 77760 }, { "epoch": 0.2960118145901053, "grad_norm": 0.13285624980926514, "learning_rate": 0.0005, "loss": 2.1198, "step": 77770 }, { "epoch": 0.29604987705822794, "grad_norm": 0.12186500430107117, "learning_rate": 0.0005, "loss": 2.1312, "step": 77780 }, { "epoch": 0.29608793952635065, "grad_norm": 0.14229805767536163, "learning_rate": 0.0005, "loss": 2.1204, "step": 77790 }, { "epoch": 0.2961260019944733, "grad_norm": 0.12769339978694916, "learning_rate": 0.0005, "loss": 2.1218, "step": 77800 }, { "epoch": 0.296164064462596, "grad_norm": 0.12053908407688141, "learning_rate": 0.0005, "loss": 2.1163, "step": 77810 }, { "epoch": 0.2962021269307187, "grad_norm": 0.12113712728023529, "learning_rate": 0.0005, "loss": 2.1327, "step": 77820 }, { "epoch": 0.2962401893988414, "grad_norm": 0.12331096082925797, "learning_rate": 0.0005, "loss": 2.1282, "step": 77830 }, { "epoch": 0.29627825186696405, "grad_norm": 0.12478657066822052, "learning_rate": 0.0005, "loss": 2.1356, "step": 77840 }, { "epoch": 0.29631631433508676, "grad_norm": 0.14286023378372192, "learning_rate": 0.0005, "loss": 2.1245, "step": 77850 }, { "epoch": 0.2963543768032094, "grad_norm": 0.11814841628074646, "learning_rate": 0.0005, "loss": 2.1195, "step": 77860 }, { "epoch": 0.29639243927133213, "grad_norm": 0.12050684541463852, "learning_rate": 0.0005, "loss": 2.1204, "step": 77870 }, { "epoch": 0.2964305017394548, "grad_norm": 0.2006334811449051, "learning_rate": 0.0005, "loss": 2.1134, "step": 77880 }, { "epoch": 0.2964685642075775, "grad_norm": 0.1282692402601242, "learning_rate": 0.0005, "loss": 2.1377, "step": 77890 }, { "epoch": 0.29650662667570016, "grad_norm": 0.11931289732456207, "learning_rate": 0.0005, "loss": 2.1262, "step": 77900 }, { "epoch": 0.2965446891438228, "grad_norm": 0.12939167022705078, "learning_rate": 0.0005, "loss": 2.1257, "step": 77910 }, { "epoch": 0.2965827516119455, "grad_norm": 0.1268099993467331, "learning_rate": 0.0005, "loss": 2.1234, "step": 77920 }, { "epoch": 0.2966208140800682, "grad_norm": 0.13123664259910583, "learning_rate": 0.0005, "loss": 2.1143, "step": 77930 }, { "epoch": 0.2966588765481909, "grad_norm": 0.12263063341379166, "learning_rate": 0.0005, "loss": 2.1325, "step": 77940 }, { "epoch": 0.29669693901631355, "grad_norm": 0.12018568813800812, "learning_rate": 0.0005, "loss": 2.1242, "step": 77950 }, { "epoch": 0.29673500148443627, "grad_norm": 0.11666128784418106, "learning_rate": 0.0005, "loss": 2.1148, "step": 77960 }, { "epoch": 0.2967730639525589, "grad_norm": 0.12188054621219635, "learning_rate": 0.0005, "loss": 2.126, "step": 77970 }, { "epoch": 0.29681112642068164, "grad_norm": 0.12619496881961823, "learning_rate": 0.0005, "loss": 2.1332, "step": 77980 }, { "epoch": 0.2968491888888043, "grad_norm": 0.13010945916175842, "learning_rate": 0.0005, "loss": 2.1258, "step": 77990 }, { "epoch": 0.296887251356927, "grad_norm": 0.1309840977191925, "learning_rate": 0.0005, "loss": 2.1196, "step": 78000 }, { "epoch": 0.29692531382504966, "grad_norm": 0.11490017920732498, "learning_rate": 0.0005, "loss": 2.1151, "step": 78010 }, { "epoch": 0.2969633762931724, "grad_norm": 0.1267571896314621, "learning_rate": 0.0005, "loss": 2.1319, "step": 78020 }, { "epoch": 0.29700143876129503, "grad_norm": 0.12055765092372894, "learning_rate": 0.0005, "loss": 2.1185, "step": 78030 }, { "epoch": 0.29703950122941775, "grad_norm": 0.13392072916030884, "learning_rate": 0.0005, "loss": 2.124, "step": 78040 }, { "epoch": 0.2970775636975404, "grad_norm": 0.11891845613718033, "learning_rate": 0.0005, "loss": 2.1297, "step": 78050 }, { "epoch": 0.29711562616566306, "grad_norm": 0.12135432660579681, "learning_rate": 0.0005, "loss": 2.1351, "step": 78060 }, { "epoch": 0.2971536886337858, "grad_norm": 0.12567847967147827, "learning_rate": 0.0005, "loss": 2.1289, "step": 78070 }, { "epoch": 0.29719175110190843, "grad_norm": 0.12707142531871796, "learning_rate": 0.0005, "loss": 2.1275, "step": 78080 }, { "epoch": 0.29722981357003114, "grad_norm": 0.11984868347644806, "learning_rate": 0.0005, "loss": 2.1134, "step": 78090 }, { "epoch": 0.2972678760381538, "grad_norm": 0.11494628340005875, "learning_rate": 0.0005, "loss": 2.1321, "step": 78100 }, { "epoch": 0.2973059385062765, "grad_norm": 0.11759883165359497, "learning_rate": 0.0005, "loss": 2.1228, "step": 78110 }, { "epoch": 0.29734400097439917, "grad_norm": 0.1299513429403305, "learning_rate": 0.0005, "loss": 2.1199, "step": 78120 }, { "epoch": 0.2973820634425219, "grad_norm": 0.1307305544614792, "learning_rate": 0.0005, "loss": 2.1327, "step": 78130 }, { "epoch": 0.29742012591064454, "grad_norm": 0.12624263763427734, "learning_rate": 0.0005, "loss": 2.1357, "step": 78140 }, { "epoch": 0.29745818837876725, "grad_norm": 0.1459318846464157, "learning_rate": 0.0005, "loss": 2.1429, "step": 78150 }, { "epoch": 0.2974962508468899, "grad_norm": 0.1276773065328598, "learning_rate": 0.0005, "loss": 2.125, "step": 78160 }, { "epoch": 0.2975343133150126, "grad_norm": 0.12030398100614548, "learning_rate": 0.0005, "loss": 2.1373, "step": 78170 }, { "epoch": 0.2975723757831353, "grad_norm": 0.12564341723918915, "learning_rate": 0.0005, "loss": 2.109, "step": 78180 }, { "epoch": 0.297610438251258, "grad_norm": 0.12345995008945465, "learning_rate": 0.0005, "loss": 2.1301, "step": 78190 }, { "epoch": 0.29764850071938065, "grad_norm": 0.1233118325471878, "learning_rate": 0.0005, "loss": 2.1264, "step": 78200 }, { "epoch": 0.2976865631875033, "grad_norm": 0.1251271665096283, "learning_rate": 0.0005, "loss": 2.1131, "step": 78210 }, { "epoch": 0.297724625655626, "grad_norm": 0.11773156374692917, "learning_rate": 0.0005, "loss": 2.1319, "step": 78220 }, { "epoch": 0.2977626881237487, "grad_norm": 0.1261213719844818, "learning_rate": 0.0005, "loss": 2.1146, "step": 78230 }, { "epoch": 0.2978007505918714, "grad_norm": 0.12224511802196503, "learning_rate": 0.0005, "loss": 2.1189, "step": 78240 }, { "epoch": 0.29783881305999405, "grad_norm": 0.12229447066783905, "learning_rate": 0.0005, "loss": 2.1261, "step": 78250 }, { "epoch": 0.29787687552811676, "grad_norm": 0.13535583019256592, "learning_rate": 0.0005, "loss": 2.1242, "step": 78260 }, { "epoch": 0.2979149379962394, "grad_norm": 0.120675228536129, "learning_rate": 0.0005, "loss": 2.1236, "step": 78270 }, { "epoch": 0.2979530004643621, "grad_norm": 0.13022558391094208, "learning_rate": 0.0005, "loss": 2.1133, "step": 78280 }, { "epoch": 0.2979910629324848, "grad_norm": 0.12307160347700119, "learning_rate": 0.0005, "loss": 2.1327, "step": 78290 }, { "epoch": 0.2980291254006075, "grad_norm": 0.1300327628850937, "learning_rate": 0.0005, "loss": 2.1325, "step": 78300 }, { "epoch": 0.29806718786873015, "grad_norm": 0.11864542961120605, "learning_rate": 0.0005, "loss": 2.1173, "step": 78310 }, { "epoch": 0.29810525033685287, "grad_norm": 0.13210463523864746, "learning_rate": 0.0005, "loss": 2.1259, "step": 78320 }, { "epoch": 0.2981433128049755, "grad_norm": 0.11801157891750336, "learning_rate": 0.0005, "loss": 2.1157, "step": 78330 }, { "epoch": 0.2981813752730982, "grad_norm": 0.12514695525169373, "learning_rate": 0.0005, "loss": 2.125, "step": 78340 }, { "epoch": 0.2982194377412209, "grad_norm": 0.12310615181922913, "learning_rate": 0.0005, "loss": 2.1282, "step": 78350 }, { "epoch": 0.29825750020934355, "grad_norm": 0.13028378784656525, "learning_rate": 0.0005, "loss": 2.1241, "step": 78360 }, { "epoch": 0.29829556267746626, "grad_norm": 0.1156439259648323, "learning_rate": 0.0005, "loss": 2.1134, "step": 78370 }, { "epoch": 0.2983336251455889, "grad_norm": 0.12388564646244049, "learning_rate": 0.0005, "loss": 2.1165, "step": 78380 }, { "epoch": 0.29837168761371163, "grad_norm": 0.13688057661056519, "learning_rate": 0.0005, "loss": 2.125, "step": 78390 }, { "epoch": 0.2984097500818343, "grad_norm": 0.13023453950881958, "learning_rate": 0.0005, "loss": 2.121, "step": 78400 }, { "epoch": 0.298447812549957, "grad_norm": 0.12264353781938553, "learning_rate": 0.0005, "loss": 2.1298, "step": 78410 }, { "epoch": 0.29848587501807966, "grad_norm": 0.14520218968391418, "learning_rate": 0.0005, "loss": 2.1259, "step": 78420 }, { "epoch": 0.2985239374862024, "grad_norm": 0.11594079434871674, "learning_rate": 0.0005, "loss": 2.1293, "step": 78430 }, { "epoch": 0.29856199995432503, "grad_norm": 0.12649290263652802, "learning_rate": 0.0005, "loss": 2.1225, "step": 78440 }, { "epoch": 0.29860006242244774, "grad_norm": 0.11950668692588806, "learning_rate": 0.0005, "loss": 2.1276, "step": 78450 }, { "epoch": 0.2986381248905704, "grad_norm": 0.1311403214931488, "learning_rate": 0.0005, "loss": 2.1194, "step": 78460 }, { "epoch": 0.2986761873586931, "grad_norm": 0.14947395026683807, "learning_rate": 0.0005, "loss": 2.1233, "step": 78470 }, { "epoch": 0.29871424982681577, "grad_norm": 0.13090112805366516, "learning_rate": 0.0005, "loss": 2.1416, "step": 78480 }, { "epoch": 0.2987523122949384, "grad_norm": 0.11776856333017349, "learning_rate": 0.0005, "loss": 2.1125, "step": 78490 }, { "epoch": 0.29879037476306114, "grad_norm": 0.16615436971187592, "learning_rate": 0.0005, "loss": 2.1297, "step": 78500 }, { "epoch": 0.2988284372311838, "grad_norm": 0.121536485850811, "learning_rate": 0.0005, "loss": 2.1287, "step": 78510 }, { "epoch": 0.2988664996993065, "grad_norm": 0.12763504683971405, "learning_rate": 0.0005, "loss": 2.1205, "step": 78520 }, { "epoch": 0.29890456216742917, "grad_norm": 0.13472750782966614, "learning_rate": 0.0005, "loss": 2.131, "step": 78530 }, { "epoch": 0.2989426246355519, "grad_norm": 0.12608040869235992, "learning_rate": 0.0005, "loss": 2.122, "step": 78540 }, { "epoch": 0.29898068710367454, "grad_norm": 0.12341855466365814, "learning_rate": 0.0005, "loss": 2.119, "step": 78550 }, { "epoch": 0.29901874957179725, "grad_norm": 0.1237914189696312, "learning_rate": 0.0005, "loss": 2.1279, "step": 78560 }, { "epoch": 0.2990568120399199, "grad_norm": 0.1223236694931984, "learning_rate": 0.0005, "loss": 2.1306, "step": 78570 }, { "epoch": 0.2990948745080426, "grad_norm": 0.12150850892066956, "learning_rate": 0.0005, "loss": 2.1218, "step": 78580 }, { "epoch": 0.2991329369761653, "grad_norm": 0.11814062297344208, "learning_rate": 0.0005, "loss": 2.1177, "step": 78590 }, { "epoch": 0.299170999444288, "grad_norm": 0.1291181743144989, "learning_rate": 0.0005, "loss": 2.114, "step": 78600 }, { "epoch": 0.29920906191241065, "grad_norm": 0.1198849007487297, "learning_rate": 0.0005, "loss": 2.1152, "step": 78610 }, { "epoch": 0.29924712438053336, "grad_norm": 0.12144499272108078, "learning_rate": 0.0005, "loss": 2.1269, "step": 78620 }, { "epoch": 0.299285186848656, "grad_norm": 0.12497196346521378, "learning_rate": 0.0005, "loss": 2.1299, "step": 78630 }, { "epoch": 0.2993232493167787, "grad_norm": 0.12080928683280945, "learning_rate": 0.0005, "loss": 2.1264, "step": 78640 }, { "epoch": 0.2993613117849014, "grad_norm": 0.14563171565532684, "learning_rate": 0.0005, "loss": 2.1541, "step": 78650 }, { "epoch": 0.29939937425302404, "grad_norm": 0.13387221097946167, "learning_rate": 0.0005, "loss": 2.1305, "step": 78660 }, { "epoch": 0.29943743672114675, "grad_norm": 0.11875801533460617, "learning_rate": 0.0005, "loss": 2.1396, "step": 78670 }, { "epoch": 0.2994754991892694, "grad_norm": 0.11633630841970444, "learning_rate": 0.0005, "loss": 2.1113, "step": 78680 }, { "epoch": 0.2995135616573921, "grad_norm": 0.12366536259651184, "learning_rate": 0.0005, "loss": 2.12, "step": 78690 }, { "epoch": 0.2995516241255148, "grad_norm": 0.1260582059621811, "learning_rate": 0.0005, "loss": 2.1215, "step": 78700 }, { "epoch": 0.2995896865936375, "grad_norm": 0.12684305012226105, "learning_rate": 0.0005, "loss": 2.1179, "step": 78710 }, { "epoch": 0.29962774906176015, "grad_norm": 0.1218743845820427, "learning_rate": 0.0005, "loss": 2.1264, "step": 78720 }, { "epoch": 0.29966581152988286, "grad_norm": 0.1152462288737297, "learning_rate": 0.0005, "loss": 2.1214, "step": 78730 }, { "epoch": 0.2997038739980055, "grad_norm": 0.12806940078735352, "learning_rate": 0.0005, "loss": 2.1298, "step": 78740 }, { "epoch": 0.29974193646612823, "grad_norm": 0.11596892774105072, "learning_rate": 0.0005, "loss": 2.1254, "step": 78750 }, { "epoch": 0.2997799989342509, "grad_norm": 0.12253748625516891, "learning_rate": 0.0005, "loss": 2.1374, "step": 78760 }, { "epoch": 0.29981806140237355, "grad_norm": 0.13022203743457794, "learning_rate": 0.0005, "loss": 2.1246, "step": 78770 }, { "epoch": 0.29985612387049626, "grad_norm": 0.12577566504478455, "learning_rate": 0.0005, "loss": 2.1248, "step": 78780 }, { "epoch": 0.2998941863386189, "grad_norm": 0.12384268641471863, "learning_rate": 0.0005, "loss": 2.1211, "step": 78790 }, { "epoch": 0.29993224880674163, "grad_norm": 0.12241975963115692, "learning_rate": 0.0005, "loss": 2.1224, "step": 78800 }, { "epoch": 0.2999703112748643, "grad_norm": 0.12391491234302521, "learning_rate": 0.0005, "loss": 2.1279, "step": 78810 }, { "epoch": 0.300008373742987, "grad_norm": 0.12020351737737656, "learning_rate": 0.0005, "loss": 2.131, "step": 78820 }, { "epoch": 0.30004643621110966, "grad_norm": 0.11576558649539948, "learning_rate": 0.0005, "loss": 2.1402, "step": 78830 }, { "epoch": 0.30008449867923237, "grad_norm": 0.11263838410377502, "learning_rate": 0.0005, "loss": 2.1187, "step": 78840 }, { "epoch": 0.300122561147355, "grad_norm": 0.1252390593290329, "learning_rate": 0.0005, "loss": 2.1111, "step": 78850 }, { "epoch": 0.30016062361547774, "grad_norm": 0.13681459426879883, "learning_rate": 0.0005, "loss": 2.1332, "step": 78860 }, { "epoch": 0.3001986860836004, "grad_norm": 0.1321314573287964, "learning_rate": 0.0005, "loss": 2.1396, "step": 78870 }, { "epoch": 0.3002367485517231, "grad_norm": 0.12426672130823135, "learning_rate": 0.0005, "loss": 2.1245, "step": 78880 }, { "epoch": 0.30027481101984577, "grad_norm": 0.1215248554944992, "learning_rate": 0.0005, "loss": 2.1252, "step": 78890 }, { "epoch": 0.3003128734879685, "grad_norm": 0.1245889812707901, "learning_rate": 0.0005, "loss": 2.129, "step": 78900 }, { "epoch": 0.30035093595609114, "grad_norm": 0.12007194012403488, "learning_rate": 0.0005, "loss": 2.1347, "step": 78910 }, { "epoch": 0.3003889984242138, "grad_norm": 0.1228775754570961, "learning_rate": 0.0005, "loss": 2.1299, "step": 78920 }, { "epoch": 0.3004270608923365, "grad_norm": 0.1209733709692955, "learning_rate": 0.0005, "loss": 2.1155, "step": 78930 }, { "epoch": 0.30046512336045916, "grad_norm": 0.11423403024673462, "learning_rate": 0.0005, "loss": 2.1121, "step": 78940 }, { "epoch": 0.3005031858285819, "grad_norm": 0.12101174890995026, "learning_rate": 0.0005, "loss": 2.1359, "step": 78950 }, { "epoch": 0.30054124829670453, "grad_norm": 0.13360558450222015, "learning_rate": 0.0005, "loss": 2.1354, "step": 78960 }, { "epoch": 0.30057931076482725, "grad_norm": 0.11620435863733292, "learning_rate": 0.0005, "loss": 2.1254, "step": 78970 }, { "epoch": 0.3006173732329499, "grad_norm": 0.12485792487859726, "learning_rate": 0.0005, "loss": 2.1289, "step": 78980 }, { "epoch": 0.3006554357010726, "grad_norm": 0.12738344073295593, "learning_rate": 0.0005, "loss": 2.1205, "step": 78990 }, { "epoch": 0.3006934981691953, "grad_norm": 0.1248827874660492, "learning_rate": 0.0005, "loss": 2.1334, "step": 79000 }, { "epoch": 0.300731560637318, "grad_norm": 0.1254810392856598, "learning_rate": 0.0005, "loss": 2.134, "step": 79010 }, { "epoch": 0.30076962310544064, "grad_norm": 0.13309615850448608, "learning_rate": 0.0005, "loss": 2.1168, "step": 79020 }, { "epoch": 0.30080768557356335, "grad_norm": 0.11738866567611694, "learning_rate": 0.0005, "loss": 2.1188, "step": 79030 }, { "epoch": 0.300845748041686, "grad_norm": 0.12218448519706726, "learning_rate": 0.0005, "loss": 2.1257, "step": 79040 }, { "epoch": 0.3008838105098087, "grad_norm": 0.13277316093444824, "learning_rate": 0.0005, "loss": 2.1266, "step": 79050 }, { "epoch": 0.3009218729779314, "grad_norm": 0.11647707223892212, "learning_rate": 0.0005, "loss": 2.1155, "step": 79060 }, { "epoch": 0.30095993544605404, "grad_norm": 0.12075653672218323, "learning_rate": 0.0005, "loss": 2.1239, "step": 79070 }, { "epoch": 0.30099799791417675, "grad_norm": 0.12462682276964188, "learning_rate": 0.0005, "loss": 2.1162, "step": 79080 }, { "epoch": 0.3010360603822994, "grad_norm": 0.1321898251771927, "learning_rate": 0.0005, "loss": 2.143, "step": 79090 }, { "epoch": 0.3010741228504221, "grad_norm": 0.12736287713050842, "learning_rate": 0.0005, "loss": 2.1128, "step": 79100 }, { "epoch": 0.3011121853185448, "grad_norm": 0.1307191550731659, "learning_rate": 0.0005, "loss": 2.1347, "step": 79110 }, { "epoch": 0.3011502477866675, "grad_norm": 0.13263042271137238, "learning_rate": 0.0005, "loss": 2.1145, "step": 79120 }, { "epoch": 0.30118831025479015, "grad_norm": 0.12208875268697739, "learning_rate": 0.0005, "loss": 2.1351, "step": 79130 }, { "epoch": 0.30122637272291286, "grad_norm": 0.1463027149438858, "learning_rate": 0.0005, "loss": 2.1148, "step": 79140 }, { "epoch": 0.3012644351910355, "grad_norm": 0.1252584457397461, "learning_rate": 0.0005, "loss": 2.1307, "step": 79150 }, { "epoch": 0.30130249765915823, "grad_norm": 0.1358025074005127, "learning_rate": 0.0005, "loss": 2.1239, "step": 79160 }, { "epoch": 0.3013405601272809, "grad_norm": 0.12210721522569656, "learning_rate": 0.0005, "loss": 2.1234, "step": 79170 }, { "epoch": 0.3013786225954036, "grad_norm": 0.13368509709835052, "learning_rate": 0.0005, "loss": 2.1382, "step": 79180 }, { "epoch": 0.30141668506352626, "grad_norm": 0.1288861781358719, "learning_rate": 0.0005, "loss": 2.1341, "step": 79190 }, { "epoch": 0.30145474753164897, "grad_norm": 0.12868791818618774, "learning_rate": 0.0005, "loss": 2.118, "step": 79200 }, { "epoch": 0.3014928099997716, "grad_norm": 0.12193211913108826, "learning_rate": 0.0005, "loss": 2.1249, "step": 79210 }, { "epoch": 0.3015308724678943, "grad_norm": 0.11928920447826385, "learning_rate": 0.0005, "loss": 2.1306, "step": 79220 }, { "epoch": 0.301568934936017, "grad_norm": 0.11605922132730484, "learning_rate": 0.0005, "loss": 2.1113, "step": 79230 }, { "epoch": 0.30160699740413965, "grad_norm": 0.12350272387266159, "learning_rate": 0.0005, "loss": 2.1234, "step": 79240 }, { "epoch": 0.30164505987226237, "grad_norm": 0.12360288202762604, "learning_rate": 0.0005, "loss": 2.1241, "step": 79250 }, { "epoch": 0.301683122340385, "grad_norm": 0.11717429757118225, "learning_rate": 0.0005, "loss": 2.1156, "step": 79260 }, { "epoch": 0.30172118480850774, "grad_norm": 0.12328846752643585, "learning_rate": 0.0005, "loss": 2.1226, "step": 79270 }, { "epoch": 0.3017592472766304, "grad_norm": 0.11897186934947968, "learning_rate": 0.0005, "loss": 2.1407, "step": 79280 }, { "epoch": 0.3017973097447531, "grad_norm": 0.1136583536863327, "learning_rate": 0.0005, "loss": 2.1364, "step": 79290 }, { "epoch": 0.30183537221287576, "grad_norm": 0.12160983681678772, "learning_rate": 0.0005, "loss": 2.1261, "step": 79300 }, { "epoch": 0.3018734346809985, "grad_norm": 0.12209036201238632, "learning_rate": 0.0005, "loss": 2.1163, "step": 79310 }, { "epoch": 0.30191149714912113, "grad_norm": 0.1218925416469574, "learning_rate": 0.0005, "loss": 2.1314, "step": 79320 }, { "epoch": 0.30194955961724385, "grad_norm": 0.14273086190223694, "learning_rate": 0.0005, "loss": 2.1415, "step": 79330 }, { "epoch": 0.3019876220853665, "grad_norm": 0.11334306746721268, "learning_rate": 0.0005, "loss": 2.1205, "step": 79340 }, { "epoch": 0.30202568455348916, "grad_norm": 0.13532891869544983, "learning_rate": 0.0005, "loss": 2.1185, "step": 79350 }, { "epoch": 0.3020637470216119, "grad_norm": 0.13038349151611328, "learning_rate": 0.0005, "loss": 2.1199, "step": 79360 }, { "epoch": 0.30210180948973453, "grad_norm": 0.1337863951921463, "learning_rate": 0.0005, "loss": 2.1355, "step": 79370 }, { "epoch": 0.30213987195785724, "grad_norm": 0.11505336314439774, "learning_rate": 0.0005, "loss": 2.1171, "step": 79380 }, { "epoch": 0.3021779344259799, "grad_norm": 0.13061103224754333, "learning_rate": 0.0005, "loss": 2.138, "step": 79390 }, { "epoch": 0.3022159968941026, "grad_norm": 0.12964025139808655, "learning_rate": 0.0005, "loss": 2.1122, "step": 79400 }, { "epoch": 0.30225405936222527, "grad_norm": 0.11681259423494339, "learning_rate": 0.0005, "loss": 2.121, "step": 79410 }, { "epoch": 0.302292121830348, "grad_norm": 0.11721569299697876, "learning_rate": 0.0005, "loss": 2.1252, "step": 79420 }, { "epoch": 0.30233018429847064, "grad_norm": 0.12377406656742096, "learning_rate": 0.0005, "loss": 2.1186, "step": 79430 }, { "epoch": 0.30236824676659335, "grad_norm": 0.13084007799625397, "learning_rate": 0.0005, "loss": 2.1152, "step": 79440 }, { "epoch": 0.302406309234716, "grad_norm": 0.12417054176330566, "learning_rate": 0.0005, "loss": 2.1247, "step": 79450 }, { "epoch": 0.3024443717028387, "grad_norm": 0.15026329457759857, "learning_rate": 0.0005, "loss": 2.1203, "step": 79460 }, { "epoch": 0.3024824341709614, "grad_norm": 0.11741312593221664, "learning_rate": 0.0005, "loss": 2.1282, "step": 79470 }, { "epoch": 0.3025204966390841, "grad_norm": 0.13819944858551025, "learning_rate": 0.0005, "loss": 2.1333, "step": 79480 }, { "epoch": 0.30255855910720675, "grad_norm": 0.12461134791374207, "learning_rate": 0.0005, "loss": 2.1102, "step": 79490 }, { "epoch": 0.3025966215753294, "grad_norm": 0.12379152327775955, "learning_rate": 0.0005, "loss": 2.1317, "step": 79500 }, { "epoch": 0.3026346840434521, "grad_norm": 0.12998011708259583, "learning_rate": 0.0005, "loss": 2.1293, "step": 79510 }, { "epoch": 0.3026727465115748, "grad_norm": 0.12944366037845612, "learning_rate": 0.0005, "loss": 2.1132, "step": 79520 }, { "epoch": 0.3027108089796975, "grad_norm": 0.12776261568069458, "learning_rate": 0.0005, "loss": 2.1076, "step": 79530 }, { "epoch": 0.30274887144782014, "grad_norm": 0.1301940679550171, "learning_rate": 0.0005, "loss": 2.1204, "step": 79540 }, { "epoch": 0.30278693391594286, "grad_norm": 0.14208351075649261, "learning_rate": 0.0005, "loss": 2.1156, "step": 79550 }, { "epoch": 0.3028249963840655, "grad_norm": 0.12232249975204468, "learning_rate": 0.0005, "loss": 2.1325, "step": 79560 }, { "epoch": 0.3028630588521882, "grad_norm": 0.14237213134765625, "learning_rate": 0.0005, "loss": 2.1294, "step": 79570 }, { "epoch": 0.3029011213203109, "grad_norm": 0.12116596847772598, "learning_rate": 0.0005, "loss": 2.1178, "step": 79580 }, { "epoch": 0.3029391837884336, "grad_norm": 0.14186422526836395, "learning_rate": 0.0005, "loss": 2.1262, "step": 79590 }, { "epoch": 0.30297724625655625, "grad_norm": 0.13023512065410614, "learning_rate": 0.0005, "loss": 2.1169, "step": 79600 }, { "epoch": 0.30301530872467897, "grad_norm": 0.12457878142595291, "learning_rate": 0.0005, "loss": 2.1213, "step": 79610 }, { "epoch": 0.3030533711928016, "grad_norm": 0.13523663580417633, "learning_rate": 0.0005, "loss": 2.1214, "step": 79620 }, { "epoch": 0.30309143366092434, "grad_norm": 0.12412901967763901, "learning_rate": 0.0005, "loss": 2.1326, "step": 79630 }, { "epoch": 0.303129496129047, "grad_norm": 0.15414534509181976, "learning_rate": 0.0005, "loss": 2.1191, "step": 79640 }, { "epoch": 0.30316755859716965, "grad_norm": 0.12909327447414398, "learning_rate": 0.0005, "loss": 2.1111, "step": 79650 }, { "epoch": 0.30320562106529236, "grad_norm": 0.11341016739606857, "learning_rate": 0.0005, "loss": 2.1319, "step": 79660 }, { "epoch": 0.303243683533415, "grad_norm": 0.1284220963716507, "learning_rate": 0.0005, "loss": 2.1255, "step": 79670 }, { "epoch": 0.30328174600153773, "grad_norm": 0.12455248087644577, "learning_rate": 0.0005, "loss": 2.1198, "step": 79680 }, { "epoch": 0.3033198084696604, "grad_norm": 0.13200350105762482, "learning_rate": 0.0005, "loss": 2.1118, "step": 79690 }, { "epoch": 0.3033578709377831, "grad_norm": 0.1278725415468216, "learning_rate": 0.0005, "loss": 2.1403, "step": 79700 }, { "epoch": 0.30339593340590576, "grad_norm": 0.12103582173585892, "learning_rate": 0.0005, "loss": 2.1055, "step": 79710 }, { "epoch": 0.3034339958740285, "grad_norm": 0.13383394479751587, "learning_rate": 0.0005, "loss": 2.1293, "step": 79720 }, { "epoch": 0.30347205834215113, "grad_norm": 0.14621300995349884, "learning_rate": 0.0005, "loss": 2.1266, "step": 79730 }, { "epoch": 0.30351012081027384, "grad_norm": 0.12872327864170074, "learning_rate": 0.0005, "loss": 2.1051, "step": 79740 }, { "epoch": 0.3035481832783965, "grad_norm": 0.1543866991996765, "learning_rate": 0.0005, "loss": 2.142, "step": 79750 }, { "epoch": 0.3035862457465192, "grad_norm": 0.1281316876411438, "learning_rate": 0.0005, "loss": 2.126, "step": 79760 }, { "epoch": 0.30362430821464187, "grad_norm": 0.13811922073364258, "learning_rate": 0.0005, "loss": 2.1282, "step": 79770 }, { "epoch": 0.3036623706827645, "grad_norm": 0.11999211460351944, "learning_rate": 0.0005, "loss": 2.1258, "step": 79780 }, { "epoch": 0.30370043315088724, "grad_norm": 0.12365420162677765, "learning_rate": 0.0005, "loss": 2.1194, "step": 79790 }, { "epoch": 0.3037384956190099, "grad_norm": 0.11934592574834824, "learning_rate": 0.0005, "loss": 2.1072, "step": 79800 }, { "epoch": 0.3037765580871326, "grad_norm": 0.1247946098446846, "learning_rate": 0.0005, "loss": 2.1345, "step": 79810 }, { "epoch": 0.30381462055525527, "grad_norm": 0.1289442628622055, "learning_rate": 0.0005, "loss": 2.1328, "step": 79820 }, { "epoch": 0.303852683023378, "grad_norm": 0.13009287416934967, "learning_rate": 0.0005, "loss": 2.121, "step": 79830 }, { "epoch": 0.30389074549150064, "grad_norm": 0.12187264859676361, "learning_rate": 0.0005, "loss": 2.1064, "step": 79840 }, { "epoch": 0.30392880795962335, "grad_norm": 0.129324808716774, "learning_rate": 0.0005, "loss": 2.108, "step": 79850 }, { "epoch": 0.303966870427746, "grad_norm": 0.13142378628253937, "learning_rate": 0.0005, "loss": 2.1239, "step": 79860 }, { "epoch": 0.3040049328958687, "grad_norm": 0.13151879608631134, "learning_rate": 0.0005, "loss": 2.1289, "step": 79870 }, { "epoch": 0.3040429953639914, "grad_norm": 0.11516924202442169, "learning_rate": 0.0005, "loss": 2.1216, "step": 79880 }, { "epoch": 0.3040810578321141, "grad_norm": 0.13340215384960175, "learning_rate": 0.0005, "loss": 2.1179, "step": 79890 }, { "epoch": 0.30411912030023674, "grad_norm": 0.1257549673318863, "learning_rate": 0.0005, "loss": 2.1266, "step": 79900 }, { "epoch": 0.30415718276835946, "grad_norm": 0.11783862113952637, "learning_rate": 0.0005, "loss": 2.1275, "step": 79910 }, { "epoch": 0.3041952452364821, "grad_norm": 0.12970565259456635, "learning_rate": 0.0005, "loss": 2.1293, "step": 79920 }, { "epoch": 0.30423330770460477, "grad_norm": 0.12826666235923767, "learning_rate": 0.0005, "loss": 2.1254, "step": 79930 }, { "epoch": 0.3042713701727275, "grad_norm": 0.13249367475509644, "learning_rate": 0.0005, "loss": 2.1202, "step": 79940 }, { "epoch": 0.30430943264085014, "grad_norm": 0.12484166771173477, "learning_rate": 0.0005, "loss": 2.1299, "step": 79950 }, { "epoch": 0.30434749510897285, "grad_norm": 0.12426943331956863, "learning_rate": 0.0005, "loss": 2.1238, "step": 79960 }, { "epoch": 0.3043855575770955, "grad_norm": 0.1309194564819336, "learning_rate": 0.0005, "loss": 2.1224, "step": 79970 }, { "epoch": 0.3044236200452182, "grad_norm": 0.12265652418136597, "learning_rate": 0.0005, "loss": 2.1146, "step": 79980 }, { "epoch": 0.3044616825133409, "grad_norm": 0.13268910348415375, "learning_rate": 0.0005, "loss": 2.1391, "step": 79990 }, { "epoch": 0.3044997449814636, "grad_norm": 0.12794676423072815, "learning_rate": 0.0005, "loss": 2.1206, "step": 80000 }, { "epoch": 0.30453780744958625, "grad_norm": 0.1281774789094925, "learning_rate": 0.0005, "loss": 2.1285, "step": 80010 }, { "epoch": 0.30457586991770896, "grad_norm": 0.13531850278377533, "learning_rate": 0.0005, "loss": 2.1281, "step": 80020 }, { "epoch": 0.3046139323858316, "grad_norm": 0.13700567185878754, "learning_rate": 0.0005, "loss": 2.1157, "step": 80030 }, { "epoch": 0.30465199485395433, "grad_norm": 0.13160550594329834, "learning_rate": 0.0005, "loss": 2.1267, "step": 80040 }, { "epoch": 0.304690057322077, "grad_norm": 0.12073525786399841, "learning_rate": 0.0005, "loss": 2.135, "step": 80050 }, { "epoch": 0.3047281197901997, "grad_norm": 0.12149331718683243, "learning_rate": 0.0005, "loss": 2.1216, "step": 80060 }, { "epoch": 0.30476618225832236, "grad_norm": 0.13199874758720398, "learning_rate": 0.0005, "loss": 2.1161, "step": 80070 }, { "epoch": 0.304804244726445, "grad_norm": 0.14713576436042786, "learning_rate": 0.0005, "loss": 2.1313, "step": 80080 }, { "epoch": 0.30484230719456773, "grad_norm": 0.12162210047245026, "learning_rate": 0.0005, "loss": 2.1202, "step": 80090 }, { "epoch": 0.3048803696626904, "grad_norm": 0.11976277083158493, "learning_rate": 0.0005, "loss": 2.1249, "step": 80100 }, { "epoch": 0.3049184321308131, "grad_norm": 0.11663439124822617, "learning_rate": 0.0005, "loss": 2.1039, "step": 80110 }, { "epoch": 0.30495649459893576, "grad_norm": 0.11407138407230377, "learning_rate": 0.0005, "loss": 2.1276, "step": 80120 }, { "epoch": 0.30499455706705847, "grad_norm": 0.11895322799682617, "learning_rate": 0.0005, "loss": 2.1346, "step": 80130 }, { "epoch": 0.3050326195351811, "grad_norm": 0.12373096495866776, "learning_rate": 0.0005, "loss": 2.1309, "step": 80140 }, { "epoch": 0.30507068200330384, "grad_norm": 0.12218614667654037, "learning_rate": 0.0005, "loss": 2.1254, "step": 80150 }, { "epoch": 0.3051087444714265, "grad_norm": 0.11597719043493271, "learning_rate": 0.0005, "loss": 2.1249, "step": 80160 }, { "epoch": 0.3051468069395492, "grad_norm": 0.12582460045814514, "learning_rate": 0.0005, "loss": 2.1329, "step": 80170 }, { "epoch": 0.30518486940767187, "grad_norm": 0.13839006423950195, "learning_rate": 0.0005, "loss": 2.1237, "step": 80180 }, { "epoch": 0.3052229318757946, "grad_norm": 0.1346767693758011, "learning_rate": 0.0005, "loss": 2.1329, "step": 80190 }, { "epoch": 0.30526099434391724, "grad_norm": 0.13050340116024017, "learning_rate": 0.0005, "loss": 2.1139, "step": 80200 }, { "epoch": 0.3052990568120399, "grad_norm": 0.12204218655824661, "learning_rate": 0.0005, "loss": 2.1235, "step": 80210 }, { "epoch": 0.3053371192801626, "grad_norm": 0.1268157809972763, "learning_rate": 0.0005, "loss": 2.142, "step": 80220 }, { "epoch": 0.30537518174828526, "grad_norm": 0.12779980897903442, "learning_rate": 0.0005, "loss": 2.1154, "step": 80230 }, { "epoch": 0.305413244216408, "grad_norm": 0.12223111093044281, "learning_rate": 0.0005, "loss": 2.1171, "step": 80240 }, { "epoch": 0.30545130668453063, "grad_norm": 0.11273365467786789, "learning_rate": 0.0005, "loss": 2.1284, "step": 80250 }, { "epoch": 0.30548936915265335, "grad_norm": 0.13877561688423157, "learning_rate": 0.0005, "loss": 2.1553, "step": 80260 }, { "epoch": 0.305527431620776, "grad_norm": 0.13216367363929749, "learning_rate": 0.0005, "loss": 2.1316, "step": 80270 }, { "epoch": 0.3055654940888987, "grad_norm": 0.12295740097761154, "learning_rate": 0.0005, "loss": 2.1373, "step": 80280 }, { "epoch": 0.30560355655702137, "grad_norm": 0.1392471343278885, "learning_rate": 0.0005, "loss": 2.1221, "step": 80290 }, { "epoch": 0.3056416190251441, "grad_norm": 0.125979945063591, "learning_rate": 0.0005, "loss": 2.1342, "step": 80300 }, { "epoch": 0.30567968149326674, "grad_norm": 0.12661942839622498, "learning_rate": 0.0005, "loss": 2.1244, "step": 80310 }, { "epoch": 0.30571774396138945, "grad_norm": 0.12181428074836731, "learning_rate": 0.0005, "loss": 2.1271, "step": 80320 }, { "epoch": 0.3057558064295121, "grad_norm": 0.11611612141132355, "learning_rate": 0.0005, "loss": 2.1324, "step": 80330 }, { "epoch": 0.3057938688976348, "grad_norm": 0.1311190128326416, "learning_rate": 0.0005, "loss": 2.1336, "step": 80340 }, { "epoch": 0.3058319313657575, "grad_norm": 0.12849178910255432, "learning_rate": 0.0005, "loss": 2.1203, "step": 80350 }, { "epoch": 0.30586999383388014, "grad_norm": 0.14650395512580872, "learning_rate": 0.0005, "loss": 2.1283, "step": 80360 }, { "epoch": 0.30590805630200285, "grad_norm": 0.14445063471794128, "learning_rate": 0.0005, "loss": 2.1247, "step": 80370 }, { "epoch": 0.3059461187701255, "grad_norm": 0.11967745423316956, "learning_rate": 0.0005, "loss": 2.1294, "step": 80380 }, { "epoch": 0.3059841812382482, "grad_norm": 0.12437941133975983, "learning_rate": 0.0005, "loss": 2.1525, "step": 80390 }, { "epoch": 0.3060222437063709, "grad_norm": 0.11988309025764465, "learning_rate": 0.0005, "loss": 2.1255, "step": 80400 }, { "epoch": 0.3060603061744936, "grad_norm": 0.122268445789814, "learning_rate": 0.0005, "loss": 2.1285, "step": 80410 }, { "epoch": 0.30609836864261625, "grad_norm": 0.11369524151086807, "learning_rate": 0.0005, "loss": 2.1237, "step": 80420 }, { "epoch": 0.30613643111073896, "grad_norm": 0.1246071457862854, "learning_rate": 0.0005, "loss": 2.1254, "step": 80430 }, { "epoch": 0.3061744935788616, "grad_norm": 0.13041529059410095, "learning_rate": 0.0005, "loss": 2.1313, "step": 80440 }, { "epoch": 0.30621255604698433, "grad_norm": 0.12700675427913666, "learning_rate": 0.0005, "loss": 2.119, "step": 80450 }, { "epoch": 0.306250618515107, "grad_norm": 0.1286431849002838, "learning_rate": 0.0005, "loss": 2.1244, "step": 80460 }, { "epoch": 0.3062886809832297, "grad_norm": 0.12057080119848251, "learning_rate": 0.0005, "loss": 2.1299, "step": 80470 }, { "epoch": 0.30632674345135236, "grad_norm": 0.1279134750366211, "learning_rate": 0.0005, "loss": 2.1223, "step": 80480 }, { "epoch": 0.30636480591947507, "grad_norm": 0.13255710899829865, "learning_rate": 0.0005, "loss": 2.1146, "step": 80490 }, { "epoch": 0.3064028683875977, "grad_norm": 0.11516743898391724, "learning_rate": 0.0005, "loss": 2.1272, "step": 80500 }, { "epoch": 0.3064409308557204, "grad_norm": 0.12610827386379242, "learning_rate": 0.0005, "loss": 2.127, "step": 80510 }, { "epoch": 0.3064789933238431, "grad_norm": 0.11879970878362656, "learning_rate": 0.0005, "loss": 2.1386, "step": 80520 }, { "epoch": 0.30651705579196575, "grad_norm": 0.11862923949956894, "learning_rate": 0.0005, "loss": 2.1246, "step": 80530 }, { "epoch": 0.30655511826008847, "grad_norm": 0.12239440530538559, "learning_rate": 0.0005, "loss": 2.1206, "step": 80540 }, { "epoch": 0.3065931807282111, "grad_norm": 0.12470649182796478, "learning_rate": 0.0005, "loss": 2.1383, "step": 80550 }, { "epoch": 0.30663124319633384, "grad_norm": 0.12501883506774902, "learning_rate": 0.0005, "loss": 2.1248, "step": 80560 }, { "epoch": 0.3066693056644565, "grad_norm": 0.1407422125339508, "learning_rate": 0.0005, "loss": 2.1314, "step": 80570 }, { "epoch": 0.3067073681325792, "grad_norm": 0.12229721248149872, "learning_rate": 0.0005, "loss": 2.1394, "step": 80580 }, { "epoch": 0.30674543060070186, "grad_norm": 0.11805040389299393, "learning_rate": 0.0005, "loss": 2.105, "step": 80590 }, { "epoch": 0.3067834930688246, "grad_norm": 0.1299421191215515, "learning_rate": 0.0005, "loss": 2.1268, "step": 80600 }, { "epoch": 0.30682155553694723, "grad_norm": 0.12241193652153015, "learning_rate": 0.0005, "loss": 2.1204, "step": 80610 }, { "epoch": 0.30685961800506995, "grad_norm": 0.13378703594207764, "learning_rate": 0.0005, "loss": 2.1371, "step": 80620 }, { "epoch": 0.3068976804731926, "grad_norm": 0.12666665017604828, "learning_rate": 0.0005, "loss": 2.1214, "step": 80630 }, { "epoch": 0.30693574294131526, "grad_norm": 0.13109880685806274, "learning_rate": 0.0005, "loss": 2.1309, "step": 80640 }, { "epoch": 0.30697380540943797, "grad_norm": 0.12502406537532806, "learning_rate": 0.0005, "loss": 2.1327, "step": 80650 }, { "epoch": 0.30701186787756063, "grad_norm": 0.1233179122209549, "learning_rate": 0.0005, "loss": 2.1339, "step": 80660 }, { "epoch": 0.30704993034568334, "grad_norm": 0.1341659426689148, "learning_rate": 0.0005, "loss": 2.1254, "step": 80670 }, { "epoch": 0.307087992813806, "grad_norm": 0.17359262704849243, "learning_rate": 0.0005, "loss": 2.1229, "step": 80680 }, { "epoch": 0.3071260552819287, "grad_norm": 0.12785649299621582, "learning_rate": 0.0005, "loss": 2.1257, "step": 80690 }, { "epoch": 0.30716411775005137, "grad_norm": 0.12294891476631165, "learning_rate": 0.0005, "loss": 2.1148, "step": 80700 }, { "epoch": 0.3072021802181741, "grad_norm": 0.11376363039016724, "learning_rate": 0.0005, "loss": 2.125, "step": 80710 }, { "epoch": 0.30724024268629674, "grad_norm": 0.12576399743556976, "learning_rate": 0.0005, "loss": 2.12, "step": 80720 }, { "epoch": 0.30727830515441945, "grad_norm": 0.12487640976905823, "learning_rate": 0.0005, "loss": 2.1321, "step": 80730 }, { "epoch": 0.3073163676225421, "grad_norm": 0.12618839740753174, "learning_rate": 0.0005, "loss": 2.1176, "step": 80740 }, { "epoch": 0.3073544300906648, "grad_norm": 0.11612705141305923, "learning_rate": 0.0005, "loss": 2.1357, "step": 80750 }, { "epoch": 0.3073924925587875, "grad_norm": 0.12097634375095367, "learning_rate": 0.0005, "loss": 2.1184, "step": 80760 }, { "epoch": 0.3074305550269102, "grad_norm": 0.12826375663280487, "learning_rate": 0.0005, "loss": 2.1157, "step": 80770 }, { "epoch": 0.30746861749503285, "grad_norm": 0.13108542561531067, "learning_rate": 0.0005, "loss": 2.1341, "step": 80780 }, { "epoch": 0.3075066799631555, "grad_norm": 0.12649457156658173, "learning_rate": 0.0005, "loss": 2.1238, "step": 80790 }, { "epoch": 0.3075447424312782, "grad_norm": 0.12309123575687408, "learning_rate": 0.0005, "loss": 2.1122, "step": 80800 }, { "epoch": 0.3075828048994009, "grad_norm": 0.12929083406925201, "learning_rate": 0.0005, "loss": 2.1413, "step": 80810 }, { "epoch": 0.3076208673675236, "grad_norm": 0.11814327538013458, "learning_rate": 0.0005, "loss": 2.12, "step": 80820 }, { "epoch": 0.30765892983564624, "grad_norm": 0.14209777116775513, "learning_rate": 0.0005, "loss": 2.1178, "step": 80830 }, { "epoch": 0.30769699230376896, "grad_norm": 0.12277386337518692, "learning_rate": 0.0005, "loss": 2.1268, "step": 80840 }, { "epoch": 0.3077350547718916, "grad_norm": 0.14316172897815704, "learning_rate": 0.0005, "loss": 2.1145, "step": 80850 }, { "epoch": 0.3077731172400143, "grad_norm": 0.12181728333234787, "learning_rate": 0.0005, "loss": 2.1326, "step": 80860 }, { "epoch": 0.307811179708137, "grad_norm": 0.12636560201644897, "learning_rate": 0.0005, "loss": 2.1209, "step": 80870 }, { "epoch": 0.3078492421762597, "grad_norm": 0.13825927674770355, "learning_rate": 0.0005, "loss": 2.1306, "step": 80880 }, { "epoch": 0.30788730464438235, "grad_norm": 0.14487019181251526, "learning_rate": 0.0005, "loss": 2.1241, "step": 80890 }, { "epoch": 0.30792536711250507, "grad_norm": 0.1338072568178177, "learning_rate": 0.0005, "loss": 2.1186, "step": 80900 }, { "epoch": 0.3079634295806277, "grad_norm": 0.12863421440124512, "learning_rate": 0.0005, "loss": 2.1245, "step": 80910 }, { "epoch": 0.30800149204875044, "grad_norm": 0.1381281465291977, "learning_rate": 0.0005, "loss": 2.1213, "step": 80920 }, { "epoch": 0.3080395545168731, "grad_norm": 0.12698550522327423, "learning_rate": 0.0005, "loss": 2.1068, "step": 80930 }, { "epoch": 0.30807761698499575, "grad_norm": 0.12528090178966522, "learning_rate": 0.0005, "loss": 2.1153, "step": 80940 }, { "epoch": 0.30811567945311846, "grad_norm": 0.12406053394079208, "learning_rate": 0.0005, "loss": 2.1223, "step": 80950 }, { "epoch": 0.3081537419212411, "grad_norm": 0.12869417667388916, "learning_rate": 0.0005, "loss": 2.1263, "step": 80960 }, { "epoch": 0.30819180438936383, "grad_norm": 0.12129397690296173, "learning_rate": 0.0005, "loss": 2.1397, "step": 80970 }, { "epoch": 0.3082298668574865, "grad_norm": 0.14590273797512054, "learning_rate": 0.0005, "loss": 2.1238, "step": 80980 }, { "epoch": 0.3082679293256092, "grad_norm": 0.12732772529125214, "learning_rate": 0.0005, "loss": 2.119, "step": 80990 }, { "epoch": 0.30830599179373186, "grad_norm": 0.11924770474433899, "learning_rate": 0.0005, "loss": 2.129, "step": 81000 }, { "epoch": 0.30834405426185457, "grad_norm": 0.12182494252920151, "learning_rate": 0.0005, "loss": 2.1139, "step": 81010 }, { "epoch": 0.30838211672997723, "grad_norm": 0.11438652127981186, "learning_rate": 0.0005, "loss": 2.1104, "step": 81020 }, { "epoch": 0.30842017919809994, "grad_norm": 0.11833816021680832, "learning_rate": 0.0005, "loss": 2.1291, "step": 81030 }, { "epoch": 0.3084582416662226, "grad_norm": 0.12479960173368454, "learning_rate": 0.0005, "loss": 2.1217, "step": 81040 }, { "epoch": 0.3084963041343453, "grad_norm": 0.11975480616092682, "learning_rate": 0.0005, "loss": 2.1284, "step": 81050 }, { "epoch": 0.30853436660246797, "grad_norm": 0.15223437547683716, "learning_rate": 0.0005, "loss": 2.1138, "step": 81060 }, { "epoch": 0.3085724290705906, "grad_norm": 0.1335378885269165, "learning_rate": 0.0005, "loss": 2.1283, "step": 81070 }, { "epoch": 0.30861049153871334, "grad_norm": 0.13886189460754395, "learning_rate": 0.0005, "loss": 2.121, "step": 81080 }, { "epoch": 0.308648554006836, "grad_norm": 0.13074654340744019, "learning_rate": 0.0005, "loss": 2.1206, "step": 81090 }, { "epoch": 0.3086866164749587, "grad_norm": 0.13728798925876617, "learning_rate": 0.0005, "loss": 2.1387, "step": 81100 }, { "epoch": 0.30872467894308137, "grad_norm": 0.12100497633218765, "learning_rate": 0.0005, "loss": 2.1259, "step": 81110 }, { "epoch": 0.3087627414112041, "grad_norm": 0.12018078565597534, "learning_rate": 0.0005, "loss": 2.1239, "step": 81120 }, { "epoch": 0.30880080387932674, "grad_norm": 0.12105260044336319, "learning_rate": 0.0005, "loss": 2.1195, "step": 81130 }, { "epoch": 0.30883886634744945, "grad_norm": 0.11997707188129425, "learning_rate": 0.0005, "loss": 2.1274, "step": 81140 }, { "epoch": 0.3088769288155721, "grad_norm": 0.11912636458873749, "learning_rate": 0.0005, "loss": 2.1281, "step": 81150 }, { "epoch": 0.3089149912836948, "grad_norm": 0.1256522387266159, "learning_rate": 0.0005, "loss": 2.1343, "step": 81160 }, { "epoch": 0.3089530537518175, "grad_norm": 0.12447361648082733, "learning_rate": 0.0005, "loss": 2.1223, "step": 81170 }, { "epoch": 0.3089911162199402, "grad_norm": 0.12331148236989975, "learning_rate": 0.0005, "loss": 2.1257, "step": 81180 }, { "epoch": 0.30902917868806284, "grad_norm": 0.12121827155351639, "learning_rate": 0.0005, "loss": 2.1228, "step": 81190 }, { "epoch": 0.30906724115618556, "grad_norm": 0.13635829091072083, "learning_rate": 0.0005, "loss": 2.113, "step": 81200 }, { "epoch": 0.3091053036243082, "grad_norm": 0.10935920476913452, "learning_rate": 0.0005, "loss": 2.12, "step": 81210 }, { "epoch": 0.30914336609243087, "grad_norm": 0.12570302188396454, "learning_rate": 0.0005, "loss": 2.1313, "step": 81220 }, { "epoch": 0.3091814285605536, "grad_norm": 0.13077987730503082, "learning_rate": 0.0005, "loss": 2.1095, "step": 81230 }, { "epoch": 0.30921949102867624, "grad_norm": 0.12293568253517151, "learning_rate": 0.0005, "loss": 2.135, "step": 81240 }, { "epoch": 0.30925755349679895, "grad_norm": 0.14048981666564941, "learning_rate": 0.0005, "loss": 2.1237, "step": 81250 }, { "epoch": 0.3092956159649216, "grad_norm": 0.12315212190151215, "learning_rate": 0.0005, "loss": 2.1226, "step": 81260 }, { "epoch": 0.3093336784330443, "grad_norm": 0.1180586889386177, "learning_rate": 0.0005, "loss": 2.1162, "step": 81270 }, { "epoch": 0.309371740901167, "grad_norm": 0.12581458687782288, "learning_rate": 0.0005, "loss": 2.135, "step": 81280 }, { "epoch": 0.3094098033692897, "grad_norm": 0.13311411440372467, "learning_rate": 0.0005, "loss": 2.1128, "step": 81290 }, { "epoch": 0.30944786583741235, "grad_norm": 0.12257678806781769, "learning_rate": 0.0005, "loss": 2.1357, "step": 81300 }, { "epoch": 0.30948592830553506, "grad_norm": 0.12836302816867828, "learning_rate": 0.0005, "loss": 2.1331, "step": 81310 }, { "epoch": 0.3095239907736577, "grad_norm": 0.11991007626056671, "learning_rate": 0.0005, "loss": 2.13, "step": 81320 }, { "epoch": 0.30956205324178043, "grad_norm": 0.12842082977294922, "learning_rate": 0.0005, "loss": 2.1306, "step": 81330 }, { "epoch": 0.3096001157099031, "grad_norm": 0.13021841645240784, "learning_rate": 0.0005, "loss": 2.1233, "step": 81340 }, { "epoch": 0.3096381781780258, "grad_norm": 0.11219491064548492, "learning_rate": 0.0005, "loss": 2.1201, "step": 81350 }, { "epoch": 0.30967624064614846, "grad_norm": 0.13072046637535095, "learning_rate": 0.0005, "loss": 2.1292, "step": 81360 }, { "epoch": 0.3097143031142711, "grad_norm": 0.1303190290927887, "learning_rate": 0.0005, "loss": 2.1175, "step": 81370 }, { "epoch": 0.30975236558239383, "grad_norm": 0.12236102670431137, "learning_rate": 0.0005, "loss": 2.1065, "step": 81380 }, { "epoch": 0.3097904280505165, "grad_norm": 0.12813524901866913, "learning_rate": 0.0005, "loss": 2.1372, "step": 81390 }, { "epoch": 0.3098284905186392, "grad_norm": 0.1337427794933319, "learning_rate": 0.0005, "loss": 2.1325, "step": 81400 }, { "epoch": 0.30986655298676186, "grad_norm": 0.11618053168058395, "learning_rate": 0.0005, "loss": 2.1223, "step": 81410 }, { "epoch": 0.30990461545488457, "grad_norm": 0.11345713585615158, "learning_rate": 0.0005, "loss": 2.1252, "step": 81420 }, { "epoch": 0.3099426779230072, "grad_norm": 0.11531246453523636, "learning_rate": 0.0005, "loss": 2.1298, "step": 81430 }, { "epoch": 0.30998074039112994, "grad_norm": 0.1406860649585724, "learning_rate": 0.0005, "loss": 2.1388, "step": 81440 }, { "epoch": 0.3100188028592526, "grad_norm": 0.11967140436172485, "learning_rate": 0.0005, "loss": 2.1163, "step": 81450 }, { "epoch": 0.3100568653273753, "grad_norm": 0.13636508584022522, "learning_rate": 0.0005, "loss": 2.1244, "step": 81460 }, { "epoch": 0.31009492779549797, "grad_norm": 0.11664880812168121, "learning_rate": 0.0005, "loss": 2.1188, "step": 81470 }, { "epoch": 0.3101329902636207, "grad_norm": 0.1184813529253006, "learning_rate": 0.0005, "loss": 2.1145, "step": 81480 }, { "epoch": 0.31017105273174334, "grad_norm": 0.11842097342014313, "learning_rate": 0.0005, "loss": 2.1149, "step": 81490 }, { "epoch": 0.310209115199866, "grad_norm": 0.11978063732385635, "learning_rate": 0.0005, "loss": 2.1249, "step": 81500 }, { "epoch": 0.3102471776679887, "grad_norm": 0.1305670589208603, "learning_rate": 0.0005, "loss": 2.1229, "step": 81510 }, { "epoch": 0.31028524013611136, "grad_norm": 0.12473088502883911, "learning_rate": 0.0005, "loss": 2.0986, "step": 81520 }, { "epoch": 0.3103233026042341, "grad_norm": 0.13610634207725525, "learning_rate": 0.0005, "loss": 2.1213, "step": 81530 }, { "epoch": 0.31036136507235673, "grad_norm": 0.12722398340702057, "learning_rate": 0.0005, "loss": 2.1189, "step": 81540 }, { "epoch": 0.31039942754047944, "grad_norm": 0.11809282749891281, "learning_rate": 0.0005, "loss": 2.1221, "step": 81550 }, { "epoch": 0.3104374900086021, "grad_norm": 0.11579307168722153, "learning_rate": 0.0005, "loss": 2.135, "step": 81560 }, { "epoch": 0.3104755524767248, "grad_norm": 0.1184435710310936, "learning_rate": 0.0005, "loss": 2.1271, "step": 81570 }, { "epoch": 0.31051361494484747, "grad_norm": 0.11998499184846878, "learning_rate": 0.0005, "loss": 2.1153, "step": 81580 }, { "epoch": 0.3105516774129702, "grad_norm": 0.11946476250886917, "learning_rate": 0.0005, "loss": 2.1091, "step": 81590 }, { "epoch": 0.31058973988109284, "grad_norm": 0.12768951058387756, "learning_rate": 0.0005, "loss": 2.0991, "step": 81600 }, { "epoch": 0.31062780234921555, "grad_norm": 0.11800798773765564, "learning_rate": 0.0005, "loss": 2.1214, "step": 81610 }, { "epoch": 0.3106658648173382, "grad_norm": 0.1284877508878708, "learning_rate": 0.0005, "loss": 2.1336, "step": 81620 }, { "epoch": 0.3107039272854609, "grad_norm": 0.13244765996932983, "learning_rate": 0.0005, "loss": 2.126, "step": 81630 }, { "epoch": 0.3107419897535836, "grad_norm": 0.12389449030160904, "learning_rate": 0.0005, "loss": 2.1286, "step": 81640 }, { "epoch": 0.31078005222170624, "grad_norm": 0.1269783228635788, "learning_rate": 0.0005, "loss": 2.1265, "step": 81650 }, { "epoch": 0.31081811468982895, "grad_norm": 0.12645235657691956, "learning_rate": 0.0005, "loss": 2.1322, "step": 81660 }, { "epoch": 0.3108561771579516, "grad_norm": 0.12159260362386703, "learning_rate": 0.0005, "loss": 2.1221, "step": 81670 }, { "epoch": 0.3108942396260743, "grad_norm": 0.140539288520813, "learning_rate": 0.0005, "loss": 2.1301, "step": 81680 }, { "epoch": 0.310932302094197, "grad_norm": 0.12919221818447113, "learning_rate": 0.0005, "loss": 2.1034, "step": 81690 }, { "epoch": 0.3109703645623197, "grad_norm": 0.13578203320503235, "learning_rate": 0.0005, "loss": 2.121, "step": 81700 }, { "epoch": 0.31100842703044235, "grad_norm": 0.11720879375934601, "learning_rate": 0.0005, "loss": 2.1015, "step": 81710 }, { "epoch": 0.31104648949856506, "grad_norm": 0.11770500987768173, "learning_rate": 0.0005, "loss": 2.1168, "step": 81720 }, { "epoch": 0.3110845519666877, "grad_norm": 0.1371755450963974, "learning_rate": 0.0005, "loss": 2.1228, "step": 81730 }, { "epoch": 0.31112261443481043, "grad_norm": 0.12106727808713913, "learning_rate": 0.0005, "loss": 2.1122, "step": 81740 }, { "epoch": 0.3111606769029331, "grad_norm": 0.1305050551891327, "learning_rate": 0.0005, "loss": 2.1295, "step": 81750 }, { "epoch": 0.3111987393710558, "grad_norm": 0.13445325195789337, "learning_rate": 0.0005, "loss": 2.1351, "step": 81760 }, { "epoch": 0.31123680183917846, "grad_norm": 0.1206723302602768, "learning_rate": 0.0005, "loss": 2.1178, "step": 81770 }, { "epoch": 0.31127486430730117, "grad_norm": 0.1241808757185936, "learning_rate": 0.0005, "loss": 2.1242, "step": 81780 }, { "epoch": 0.3113129267754238, "grad_norm": 0.12647885084152222, "learning_rate": 0.0005, "loss": 2.1209, "step": 81790 }, { "epoch": 0.3113509892435465, "grad_norm": 0.11793207377195358, "learning_rate": 0.0005, "loss": 2.124, "step": 81800 }, { "epoch": 0.3113890517116692, "grad_norm": 0.13345180451869965, "learning_rate": 0.0005, "loss": 2.1236, "step": 81810 }, { "epoch": 0.31142711417979185, "grad_norm": 0.13315977156162262, "learning_rate": 0.0005, "loss": 2.1275, "step": 81820 }, { "epoch": 0.31146517664791457, "grad_norm": 0.1259089708328247, "learning_rate": 0.0005, "loss": 2.117, "step": 81830 }, { "epoch": 0.3115032391160372, "grad_norm": 0.11526235193014145, "learning_rate": 0.0005, "loss": 2.1182, "step": 81840 }, { "epoch": 0.31154130158415994, "grad_norm": 0.13554058969020844, "learning_rate": 0.0005, "loss": 2.1212, "step": 81850 }, { "epoch": 0.3115793640522826, "grad_norm": 0.1364506483078003, "learning_rate": 0.0005, "loss": 2.1287, "step": 81860 }, { "epoch": 0.3116174265204053, "grad_norm": 0.1301794946193695, "learning_rate": 0.0005, "loss": 2.1256, "step": 81870 }, { "epoch": 0.31165548898852796, "grad_norm": 0.1580042839050293, "learning_rate": 0.0005, "loss": 2.1174, "step": 81880 }, { "epoch": 0.3116935514566507, "grad_norm": 0.13931454718112946, "learning_rate": 0.0005, "loss": 2.145, "step": 81890 }, { "epoch": 0.31173161392477333, "grad_norm": 0.13075725734233856, "learning_rate": 0.0005, "loss": 2.1153, "step": 81900 }, { "epoch": 0.31176967639289604, "grad_norm": 0.14682362973690033, "learning_rate": 0.0005, "loss": 2.1165, "step": 81910 }, { "epoch": 0.3118077388610187, "grad_norm": 0.14456802606582642, "learning_rate": 0.0005, "loss": 2.1167, "step": 81920 }, { "epoch": 0.3118458013291414, "grad_norm": 0.13011014461517334, "learning_rate": 0.0005, "loss": 2.1273, "step": 81930 }, { "epoch": 0.31188386379726407, "grad_norm": 0.13531902432441711, "learning_rate": 0.0005, "loss": 2.1011, "step": 81940 }, { "epoch": 0.31192192626538673, "grad_norm": 0.13501720130443573, "learning_rate": 0.0005, "loss": 2.1187, "step": 81950 }, { "epoch": 0.31195998873350944, "grad_norm": 0.12197815626859665, "learning_rate": 0.0005, "loss": 2.113, "step": 81960 }, { "epoch": 0.3119980512016321, "grad_norm": 0.11486975848674774, "learning_rate": 0.0005, "loss": 2.1203, "step": 81970 }, { "epoch": 0.3120361136697548, "grad_norm": 0.11543624103069305, "learning_rate": 0.0005, "loss": 2.1251, "step": 81980 }, { "epoch": 0.31207417613787747, "grad_norm": 0.14072877168655396, "learning_rate": 0.0005, "loss": 2.1233, "step": 81990 }, { "epoch": 0.3121122386060002, "grad_norm": 0.11827097833156586, "learning_rate": 0.0005, "loss": 2.1412, "step": 82000 }, { "epoch": 0.31215030107412284, "grad_norm": 0.12375832349061966, "learning_rate": 0.0005, "loss": 2.1087, "step": 82010 }, { "epoch": 0.31218836354224555, "grad_norm": 0.12070365995168686, "learning_rate": 0.0005, "loss": 2.1256, "step": 82020 }, { "epoch": 0.3122264260103682, "grad_norm": 0.1299162209033966, "learning_rate": 0.0005, "loss": 2.1264, "step": 82030 }, { "epoch": 0.3122644884784909, "grad_norm": 0.11619587242603302, "learning_rate": 0.0005, "loss": 2.1233, "step": 82040 }, { "epoch": 0.3123025509466136, "grad_norm": 0.13671796023845673, "learning_rate": 0.0005, "loss": 2.135, "step": 82050 }, { "epoch": 0.3123406134147363, "grad_norm": 0.11353994160890579, "learning_rate": 0.0005, "loss": 2.136, "step": 82060 }, { "epoch": 0.31237867588285895, "grad_norm": 0.12547942996025085, "learning_rate": 0.0005, "loss": 2.1177, "step": 82070 }, { "epoch": 0.3124167383509816, "grad_norm": 0.12046770751476288, "learning_rate": 0.0005, "loss": 2.1225, "step": 82080 }, { "epoch": 0.3124548008191043, "grad_norm": 0.1242809072136879, "learning_rate": 0.0005, "loss": 2.1295, "step": 82090 }, { "epoch": 0.312492863287227, "grad_norm": 0.12850497663021088, "learning_rate": 0.0005, "loss": 2.135, "step": 82100 }, { "epoch": 0.3125309257553497, "grad_norm": 0.13980376720428467, "learning_rate": 0.0005, "loss": 2.1192, "step": 82110 }, { "epoch": 0.31256898822347234, "grad_norm": 0.12118227034807205, "learning_rate": 0.0005, "loss": 2.1311, "step": 82120 }, { "epoch": 0.31260705069159506, "grad_norm": 0.13284452259540558, "learning_rate": 0.0005, "loss": 2.1224, "step": 82130 }, { "epoch": 0.3126451131597177, "grad_norm": 0.13698700070381165, "learning_rate": 0.0005, "loss": 2.1455, "step": 82140 }, { "epoch": 0.3126831756278404, "grad_norm": 0.13114923238754272, "learning_rate": 0.0005, "loss": 2.1198, "step": 82150 }, { "epoch": 0.3127212380959631, "grad_norm": 0.14440645277500153, "learning_rate": 0.0005, "loss": 2.1225, "step": 82160 }, { "epoch": 0.3127593005640858, "grad_norm": 0.1295204907655716, "learning_rate": 0.0005, "loss": 2.1171, "step": 82170 }, { "epoch": 0.31279736303220845, "grad_norm": 0.12142180651426315, "learning_rate": 0.0005, "loss": 2.1301, "step": 82180 }, { "epoch": 0.31283542550033117, "grad_norm": 0.12380526959896088, "learning_rate": 0.0005, "loss": 2.1326, "step": 82190 }, { "epoch": 0.3128734879684538, "grad_norm": 0.1308668702840805, "learning_rate": 0.0005, "loss": 2.1231, "step": 82200 }, { "epoch": 0.31291155043657654, "grad_norm": 0.12716633081436157, "learning_rate": 0.0005, "loss": 2.1333, "step": 82210 }, { "epoch": 0.3129496129046992, "grad_norm": 0.13136553764343262, "learning_rate": 0.0005, "loss": 2.1222, "step": 82220 }, { "epoch": 0.31298767537282185, "grad_norm": 0.12801966071128845, "learning_rate": 0.0005, "loss": 2.1465, "step": 82230 }, { "epoch": 0.31302573784094456, "grad_norm": 0.12598226964473724, "learning_rate": 0.0005, "loss": 2.1245, "step": 82240 }, { "epoch": 0.3130638003090672, "grad_norm": 0.11136070638895035, "learning_rate": 0.0005, "loss": 2.1196, "step": 82250 }, { "epoch": 0.31310186277718993, "grad_norm": 0.12658236920833588, "learning_rate": 0.0005, "loss": 2.1242, "step": 82260 }, { "epoch": 0.3131399252453126, "grad_norm": 0.1299169957637787, "learning_rate": 0.0005, "loss": 2.1162, "step": 82270 }, { "epoch": 0.3131779877134353, "grad_norm": 0.11735150963068008, "learning_rate": 0.0005, "loss": 2.1295, "step": 82280 }, { "epoch": 0.31321605018155796, "grad_norm": 0.11863543093204498, "learning_rate": 0.0005, "loss": 2.1351, "step": 82290 }, { "epoch": 0.31325411264968067, "grad_norm": 0.1262042224407196, "learning_rate": 0.0005, "loss": 2.1218, "step": 82300 }, { "epoch": 0.31329217511780333, "grad_norm": 0.11971874535083771, "learning_rate": 0.0005, "loss": 2.141, "step": 82310 }, { "epoch": 0.31333023758592604, "grad_norm": 0.11084333807229996, "learning_rate": 0.0005, "loss": 2.1299, "step": 82320 }, { "epoch": 0.3133683000540487, "grad_norm": 0.13071833550930023, "learning_rate": 0.0005, "loss": 2.1384, "step": 82330 }, { "epoch": 0.3134063625221714, "grad_norm": 0.13296374678611755, "learning_rate": 0.0005, "loss": 2.1342, "step": 82340 }, { "epoch": 0.31344442499029407, "grad_norm": 0.1339769810438156, "learning_rate": 0.0005, "loss": 2.1403, "step": 82350 }, { "epoch": 0.3134824874584168, "grad_norm": 0.12778893113136292, "learning_rate": 0.0005, "loss": 2.1358, "step": 82360 }, { "epoch": 0.31352054992653944, "grad_norm": 0.11761803925037384, "learning_rate": 0.0005, "loss": 2.1226, "step": 82370 }, { "epoch": 0.3135586123946621, "grad_norm": 0.12162280082702637, "learning_rate": 0.0005, "loss": 2.1155, "step": 82380 }, { "epoch": 0.3135966748627848, "grad_norm": 0.12358004599809647, "learning_rate": 0.0005, "loss": 2.1138, "step": 82390 }, { "epoch": 0.31363473733090746, "grad_norm": 0.12462770938873291, "learning_rate": 0.0005, "loss": 2.12, "step": 82400 }, { "epoch": 0.3136727997990302, "grad_norm": 0.12161625921726227, "learning_rate": 0.0005, "loss": 2.1196, "step": 82410 }, { "epoch": 0.31371086226715283, "grad_norm": 0.12660494446754456, "learning_rate": 0.0005, "loss": 2.1309, "step": 82420 }, { "epoch": 0.31374892473527555, "grad_norm": 0.13155150413513184, "learning_rate": 0.0005, "loss": 2.1209, "step": 82430 }, { "epoch": 0.3137869872033982, "grad_norm": 0.12684141099452972, "learning_rate": 0.0005, "loss": 2.1242, "step": 82440 }, { "epoch": 0.3138250496715209, "grad_norm": 0.11854333430528641, "learning_rate": 0.0005, "loss": 2.1229, "step": 82450 }, { "epoch": 0.3138631121396436, "grad_norm": 0.12792110443115234, "learning_rate": 0.0005, "loss": 2.1193, "step": 82460 }, { "epoch": 0.3139011746077663, "grad_norm": 0.12354903668165207, "learning_rate": 0.0005, "loss": 2.1276, "step": 82470 }, { "epoch": 0.31393923707588894, "grad_norm": 0.12082675844430923, "learning_rate": 0.0005, "loss": 2.1209, "step": 82480 }, { "epoch": 0.31397729954401166, "grad_norm": 0.12705554068088531, "learning_rate": 0.0005, "loss": 2.1302, "step": 82490 }, { "epoch": 0.3140153620121343, "grad_norm": 0.12151361256837845, "learning_rate": 0.0005, "loss": 2.1354, "step": 82500 }, { "epoch": 0.31405342448025697, "grad_norm": 0.11975157260894775, "learning_rate": 0.0005, "loss": 2.1203, "step": 82510 }, { "epoch": 0.3140914869483797, "grad_norm": 0.21595405042171478, "learning_rate": 0.0005, "loss": 2.1362, "step": 82520 }, { "epoch": 0.31412954941650234, "grad_norm": 0.12035899609327316, "learning_rate": 0.0005, "loss": 2.1281, "step": 82530 }, { "epoch": 0.31416761188462505, "grad_norm": 0.14372199773788452, "learning_rate": 0.0005, "loss": 2.1303, "step": 82540 }, { "epoch": 0.3142056743527477, "grad_norm": 0.12237467616796494, "learning_rate": 0.0005, "loss": 2.1218, "step": 82550 }, { "epoch": 0.3142437368208704, "grad_norm": 0.1329762488603592, "learning_rate": 0.0005, "loss": 2.1169, "step": 82560 }, { "epoch": 0.3142817992889931, "grad_norm": 0.13943737745285034, "learning_rate": 0.0005, "loss": 2.1277, "step": 82570 }, { "epoch": 0.3143198617571158, "grad_norm": 0.12585225701332092, "learning_rate": 0.0005, "loss": 2.1212, "step": 82580 }, { "epoch": 0.31435792422523845, "grad_norm": 0.1350608915090561, "learning_rate": 0.0005, "loss": 2.1152, "step": 82590 }, { "epoch": 0.31439598669336116, "grad_norm": 0.12448044866323471, "learning_rate": 0.0005, "loss": 2.1338, "step": 82600 }, { "epoch": 0.3144340491614838, "grad_norm": 0.13429243862628937, "learning_rate": 0.0005, "loss": 2.127, "step": 82610 }, { "epoch": 0.31447211162960653, "grad_norm": 0.12286079674959183, "learning_rate": 0.0005, "loss": 2.1459, "step": 82620 }, { "epoch": 0.3145101740977292, "grad_norm": 0.12074466049671173, "learning_rate": 0.0005, "loss": 2.1232, "step": 82630 }, { "epoch": 0.3145482365658519, "grad_norm": 0.12522518634796143, "learning_rate": 0.0005, "loss": 2.1229, "step": 82640 }, { "epoch": 0.31458629903397456, "grad_norm": 0.1266522854566574, "learning_rate": 0.0005, "loss": 2.1235, "step": 82650 }, { "epoch": 0.3146243615020972, "grad_norm": 0.1456829160451889, "learning_rate": 0.0005, "loss": 2.1156, "step": 82660 }, { "epoch": 0.31466242397021993, "grad_norm": 0.12169001996517181, "learning_rate": 0.0005, "loss": 2.1248, "step": 82670 }, { "epoch": 0.3147004864383426, "grad_norm": 0.11195474117994308, "learning_rate": 0.0005, "loss": 2.1209, "step": 82680 }, { "epoch": 0.3147385489064653, "grad_norm": 0.12271256744861603, "learning_rate": 0.0005, "loss": 2.1179, "step": 82690 }, { "epoch": 0.31477661137458796, "grad_norm": 0.1269110143184662, "learning_rate": 0.0005, "loss": 2.1319, "step": 82700 }, { "epoch": 0.31481467384271067, "grad_norm": 0.12153013050556183, "learning_rate": 0.0005, "loss": 2.1241, "step": 82710 }, { "epoch": 0.3148527363108333, "grad_norm": 0.13175609707832336, "learning_rate": 0.0005, "loss": 2.1222, "step": 82720 }, { "epoch": 0.31489079877895604, "grad_norm": 0.1300119310617447, "learning_rate": 0.0005, "loss": 2.1391, "step": 82730 }, { "epoch": 0.3149288612470787, "grad_norm": 0.12770238518714905, "learning_rate": 0.0005, "loss": 2.123, "step": 82740 }, { "epoch": 0.3149669237152014, "grad_norm": 0.15108972787857056, "learning_rate": 0.0005, "loss": 2.1381, "step": 82750 }, { "epoch": 0.31500498618332406, "grad_norm": 0.12652187049388885, "learning_rate": 0.0005, "loss": 2.1107, "step": 82760 }, { "epoch": 0.3150430486514468, "grad_norm": 0.12813642621040344, "learning_rate": 0.0005, "loss": 2.1344, "step": 82770 }, { "epoch": 0.31508111111956943, "grad_norm": 0.11277160048484802, "learning_rate": 0.0005, "loss": 2.1189, "step": 82780 }, { "epoch": 0.31511917358769215, "grad_norm": 0.12133605778217316, "learning_rate": 0.0005, "loss": 2.1288, "step": 82790 }, { "epoch": 0.3151572360558148, "grad_norm": 0.2113325148820877, "learning_rate": 0.0005, "loss": 2.1218, "step": 82800 }, { "epoch": 0.31519529852393746, "grad_norm": 0.13138943910598755, "learning_rate": 0.0005, "loss": 2.1228, "step": 82810 }, { "epoch": 0.3152333609920602, "grad_norm": 0.13198734819889069, "learning_rate": 0.0005, "loss": 2.1306, "step": 82820 }, { "epoch": 0.31527142346018283, "grad_norm": 0.12452925741672516, "learning_rate": 0.0005, "loss": 2.1182, "step": 82830 }, { "epoch": 0.31530948592830554, "grad_norm": 0.12573881447315216, "learning_rate": 0.0005, "loss": 2.1183, "step": 82840 }, { "epoch": 0.3153475483964282, "grad_norm": 0.11985309422016144, "learning_rate": 0.0005, "loss": 2.1328, "step": 82850 }, { "epoch": 0.3153856108645509, "grad_norm": 0.13780802488327026, "learning_rate": 0.0005, "loss": 2.1331, "step": 82860 }, { "epoch": 0.31542367333267357, "grad_norm": 0.11680909991264343, "learning_rate": 0.0005, "loss": 2.1269, "step": 82870 }, { "epoch": 0.3154617358007963, "grad_norm": 0.1293938308954239, "learning_rate": 0.0005, "loss": 2.1004, "step": 82880 }, { "epoch": 0.31549979826891894, "grad_norm": 0.12560796737670898, "learning_rate": 0.0005, "loss": 2.1319, "step": 82890 }, { "epoch": 0.31553786073704165, "grad_norm": 0.11934496462345123, "learning_rate": 0.0005, "loss": 2.1193, "step": 82900 }, { "epoch": 0.3155759232051643, "grad_norm": 0.12571270763874054, "learning_rate": 0.0005, "loss": 2.108, "step": 82910 }, { "epoch": 0.315613985673287, "grad_norm": 0.10958436131477356, "learning_rate": 0.0005, "loss": 2.1426, "step": 82920 }, { "epoch": 0.3156520481414097, "grad_norm": 0.13606712222099304, "learning_rate": 0.0005, "loss": 2.1337, "step": 82930 }, { "epoch": 0.31569011060953234, "grad_norm": 0.11906415224075317, "learning_rate": 0.0005, "loss": 2.1325, "step": 82940 }, { "epoch": 0.31572817307765505, "grad_norm": 0.11434603482484818, "learning_rate": 0.0005, "loss": 2.1299, "step": 82950 }, { "epoch": 0.3157662355457777, "grad_norm": 0.1147366389632225, "learning_rate": 0.0005, "loss": 2.1218, "step": 82960 }, { "epoch": 0.3158042980139004, "grad_norm": 0.12987957894802094, "learning_rate": 0.0005, "loss": 2.1236, "step": 82970 }, { "epoch": 0.3158423604820231, "grad_norm": 0.13320422172546387, "learning_rate": 0.0005, "loss": 2.1178, "step": 82980 }, { "epoch": 0.3158804229501458, "grad_norm": 0.12843391299247742, "learning_rate": 0.0005, "loss": 2.1272, "step": 82990 }, { "epoch": 0.31591848541826845, "grad_norm": 0.1171526238322258, "learning_rate": 0.0005, "loss": 2.1274, "step": 83000 }, { "epoch": 0.31595654788639116, "grad_norm": 0.1265607625246048, "learning_rate": 0.0005, "loss": 2.1379, "step": 83010 }, { "epoch": 0.3159946103545138, "grad_norm": 0.13495829701423645, "learning_rate": 0.0005, "loss": 2.137, "step": 83020 }, { "epoch": 0.31603267282263653, "grad_norm": 0.11635854840278625, "learning_rate": 0.0005, "loss": 2.1238, "step": 83030 }, { "epoch": 0.3160707352907592, "grad_norm": 0.12299887835979462, "learning_rate": 0.0005, "loss": 2.1227, "step": 83040 }, { "epoch": 0.3161087977588819, "grad_norm": 0.12657538056373596, "learning_rate": 0.0005, "loss": 2.1224, "step": 83050 }, { "epoch": 0.31614686022700456, "grad_norm": 0.12187173217535019, "learning_rate": 0.0005, "loss": 2.1343, "step": 83060 }, { "epoch": 0.31618492269512727, "grad_norm": 0.13706451654434204, "learning_rate": 0.0005, "loss": 2.1286, "step": 83070 }, { "epoch": 0.3162229851632499, "grad_norm": 0.11517681926488876, "learning_rate": 0.0005, "loss": 2.1424, "step": 83080 }, { "epoch": 0.3162610476313726, "grad_norm": 0.12629637122154236, "learning_rate": 0.0005, "loss": 2.128, "step": 83090 }, { "epoch": 0.3162991100994953, "grad_norm": 0.1268184632062912, "learning_rate": 0.0005, "loss": 2.132, "step": 83100 }, { "epoch": 0.31633717256761795, "grad_norm": 0.12249192595481873, "learning_rate": 0.0005, "loss": 2.1276, "step": 83110 }, { "epoch": 0.31637523503574067, "grad_norm": 0.12270066142082214, "learning_rate": 0.0005, "loss": 2.1366, "step": 83120 }, { "epoch": 0.3164132975038633, "grad_norm": 0.11289820820093155, "learning_rate": 0.0005, "loss": 2.1312, "step": 83130 }, { "epoch": 0.31645135997198603, "grad_norm": 0.14470253884792328, "learning_rate": 0.0005, "loss": 2.1308, "step": 83140 }, { "epoch": 0.3164894224401087, "grad_norm": 0.12982015311717987, "learning_rate": 0.0005, "loss": 2.1278, "step": 83150 }, { "epoch": 0.3165274849082314, "grad_norm": 0.13806003332138062, "learning_rate": 0.0005, "loss": 2.1284, "step": 83160 }, { "epoch": 0.31656554737635406, "grad_norm": 0.12923070788383484, "learning_rate": 0.0005, "loss": 2.1423, "step": 83170 }, { "epoch": 0.3166036098444768, "grad_norm": 0.113308846950531, "learning_rate": 0.0005, "loss": 2.1199, "step": 83180 }, { "epoch": 0.31664167231259943, "grad_norm": 0.12376850098371506, "learning_rate": 0.0005, "loss": 2.1362, "step": 83190 }, { "epoch": 0.31667973478072214, "grad_norm": 0.12424514442682266, "learning_rate": 0.0005, "loss": 2.1259, "step": 83200 }, { "epoch": 0.3167177972488448, "grad_norm": 0.12274659425020218, "learning_rate": 0.0005, "loss": 2.134, "step": 83210 }, { "epoch": 0.3167558597169675, "grad_norm": 0.13459163904190063, "learning_rate": 0.0005, "loss": 2.1257, "step": 83220 }, { "epoch": 0.31679392218509017, "grad_norm": 0.12273278087377548, "learning_rate": 0.0005, "loss": 2.1313, "step": 83230 }, { "epoch": 0.31683198465321283, "grad_norm": 0.12100246548652649, "learning_rate": 0.0005, "loss": 2.1317, "step": 83240 }, { "epoch": 0.31687004712133554, "grad_norm": 0.13598744571208954, "learning_rate": 0.0005, "loss": 2.1153, "step": 83250 }, { "epoch": 0.3169081095894582, "grad_norm": 0.130888432264328, "learning_rate": 0.0005, "loss": 2.1193, "step": 83260 }, { "epoch": 0.3169461720575809, "grad_norm": 0.1161246970295906, "learning_rate": 0.0005, "loss": 2.1393, "step": 83270 }, { "epoch": 0.31698423452570357, "grad_norm": 0.1331406831741333, "learning_rate": 0.0005, "loss": 2.123, "step": 83280 }, { "epoch": 0.3170222969938263, "grad_norm": 0.13716700673103333, "learning_rate": 0.0005, "loss": 2.1322, "step": 83290 }, { "epoch": 0.31706035946194894, "grad_norm": 0.13076649606227875, "learning_rate": 0.0005, "loss": 2.1374, "step": 83300 }, { "epoch": 0.31709842193007165, "grad_norm": 0.12399870902299881, "learning_rate": 0.0005, "loss": 2.1261, "step": 83310 }, { "epoch": 0.3171364843981943, "grad_norm": 0.12629511952400208, "learning_rate": 0.0005, "loss": 2.1277, "step": 83320 }, { "epoch": 0.317174546866317, "grad_norm": 0.13469186425209045, "learning_rate": 0.0005, "loss": 2.1268, "step": 83330 }, { "epoch": 0.3172126093344397, "grad_norm": 0.12043331563472748, "learning_rate": 0.0005, "loss": 2.1159, "step": 83340 }, { "epoch": 0.3172506718025624, "grad_norm": 0.11527103930711746, "learning_rate": 0.0005, "loss": 2.1327, "step": 83350 }, { "epoch": 0.31728873427068505, "grad_norm": 0.12726227939128876, "learning_rate": 0.0005, "loss": 2.1257, "step": 83360 }, { "epoch": 0.3173267967388077, "grad_norm": 0.1191035658121109, "learning_rate": 0.0005, "loss": 2.1199, "step": 83370 }, { "epoch": 0.3173648592069304, "grad_norm": 0.11827237159013748, "learning_rate": 0.0005, "loss": 2.1347, "step": 83380 }, { "epoch": 0.3174029216750531, "grad_norm": 0.1270776093006134, "learning_rate": 0.0005, "loss": 2.1349, "step": 83390 }, { "epoch": 0.3174409841431758, "grad_norm": 0.12138780951499939, "learning_rate": 0.0005, "loss": 2.125, "step": 83400 }, { "epoch": 0.31747904661129844, "grad_norm": 0.1401662975549698, "learning_rate": 0.0005, "loss": 2.1369, "step": 83410 }, { "epoch": 0.31751710907942116, "grad_norm": 0.12503919005393982, "learning_rate": 0.0005, "loss": 2.1376, "step": 83420 }, { "epoch": 0.3175551715475438, "grad_norm": 0.12604312598705292, "learning_rate": 0.0005, "loss": 2.1068, "step": 83430 }, { "epoch": 0.3175932340156665, "grad_norm": 0.1168069988489151, "learning_rate": 0.0005, "loss": 2.1096, "step": 83440 }, { "epoch": 0.3176312964837892, "grad_norm": 0.12296763062477112, "learning_rate": 0.0005, "loss": 2.1226, "step": 83450 }, { "epoch": 0.3176693589519119, "grad_norm": 0.13476720452308655, "learning_rate": 0.0005, "loss": 2.1256, "step": 83460 }, { "epoch": 0.31770742142003455, "grad_norm": 0.12037523835897446, "learning_rate": 0.0005, "loss": 2.1282, "step": 83470 }, { "epoch": 0.31774548388815727, "grad_norm": 0.12858867645263672, "learning_rate": 0.0005, "loss": 2.1326, "step": 83480 }, { "epoch": 0.3177835463562799, "grad_norm": 0.1294158548116684, "learning_rate": 0.0005, "loss": 2.1313, "step": 83490 }, { "epoch": 0.31782160882440263, "grad_norm": 0.13894596695899963, "learning_rate": 0.0005, "loss": 2.1325, "step": 83500 }, { "epoch": 0.3178596712925253, "grad_norm": 0.12230147421360016, "learning_rate": 0.0005, "loss": 2.1166, "step": 83510 }, { "epoch": 0.31789773376064795, "grad_norm": 0.14162777364253998, "learning_rate": 0.0005, "loss": 2.1032, "step": 83520 }, { "epoch": 0.31793579622877066, "grad_norm": 0.1376306414604187, "learning_rate": 0.0005, "loss": 2.1375, "step": 83530 }, { "epoch": 0.3179738586968933, "grad_norm": 0.12878134846687317, "learning_rate": 0.0005, "loss": 2.131, "step": 83540 }, { "epoch": 0.31801192116501603, "grad_norm": 0.13225965201854706, "learning_rate": 0.0005, "loss": 2.1339, "step": 83550 }, { "epoch": 0.3180499836331387, "grad_norm": 0.13180622458457947, "learning_rate": 0.0005, "loss": 2.1224, "step": 83560 }, { "epoch": 0.3180880461012614, "grad_norm": 0.14126379787921906, "learning_rate": 0.0005, "loss": 2.124, "step": 83570 }, { "epoch": 0.31812610856938406, "grad_norm": 0.12663407623767853, "learning_rate": 0.0005, "loss": 2.1104, "step": 83580 }, { "epoch": 0.31816417103750677, "grad_norm": 0.12259071320295334, "learning_rate": 0.0005, "loss": 2.1317, "step": 83590 }, { "epoch": 0.31820223350562943, "grad_norm": 0.12154918909072876, "learning_rate": 0.0005, "loss": 2.1179, "step": 83600 }, { "epoch": 0.31824029597375214, "grad_norm": 0.11901416629552841, "learning_rate": 0.0005, "loss": 2.1272, "step": 83610 }, { "epoch": 0.3182783584418748, "grad_norm": 0.13466015458106995, "learning_rate": 0.0005, "loss": 2.1121, "step": 83620 }, { "epoch": 0.3183164209099975, "grad_norm": 0.12550848722457886, "learning_rate": 0.0005, "loss": 2.1419, "step": 83630 }, { "epoch": 0.31835448337812017, "grad_norm": 0.12653183937072754, "learning_rate": 0.0005, "loss": 2.1432, "step": 83640 }, { "epoch": 0.3183925458462429, "grad_norm": 0.1187266856431961, "learning_rate": 0.0005, "loss": 2.1261, "step": 83650 }, { "epoch": 0.31843060831436554, "grad_norm": 0.12700314819812775, "learning_rate": 0.0005, "loss": 2.1265, "step": 83660 }, { "epoch": 0.3184686707824882, "grad_norm": 0.12154296040534973, "learning_rate": 0.0005, "loss": 2.0989, "step": 83670 }, { "epoch": 0.3185067332506109, "grad_norm": 0.12859545648097992, "learning_rate": 0.0005, "loss": 2.1263, "step": 83680 }, { "epoch": 0.31854479571873356, "grad_norm": 0.12260852754116058, "learning_rate": 0.0005, "loss": 2.1343, "step": 83690 }, { "epoch": 0.3185828581868563, "grad_norm": 0.12154964357614517, "learning_rate": 0.0005, "loss": 2.1273, "step": 83700 }, { "epoch": 0.31862092065497893, "grad_norm": 0.14406907558441162, "learning_rate": 0.0005, "loss": 2.1187, "step": 83710 }, { "epoch": 0.31865898312310165, "grad_norm": 0.12068268656730652, "learning_rate": 0.0005, "loss": 2.1269, "step": 83720 }, { "epoch": 0.3186970455912243, "grad_norm": 0.12875112891197205, "learning_rate": 0.0005, "loss": 2.1242, "step": 83730 }, { "epoch": 0.318735108059347, "grad_norm": 0.1260383427143097, "learning_rate": 0.0005, "loss": 2.1343, "step": 83740 }, { "epoch": 0.3187731705274697, "grad_norm": 0.118220753967762, "learning_rate": 0.0005, "loss": 2.1293, "step": 83750 }, { "epoch": 0.3188112329955924, "grad_norm": 0.123013436794281, "learning_rate": 0.0005, "loss": 2.1326, "step": 83760 }, { "epoch": 0.31884929546371504, "grad_norm": 0.11908526718616486, "learning_rate": 0.0005, "loss": 2.1249, "step": 83770 }, { "epoch": 0.31888735793183776, "grad_norm": 0.12813295423984528, "learning_rate": 0.0005, "loss": 2.1145, "step": 83780 }, { "epoch": 0.3189254203999604, "grad_norm": 0.11917294561862946, "learning_rate": 0.0005, "loss": 2.1222, "step": 83790 }, { "epoch": 0.31896348286808307, "grad_norm": 0.13365842401981354, "learning_rate": 0.0005, "loss": 2.1444, "step": 83800 }, { "epoch": 0.3190015453362058, "grad_norm": 0.1154765710234642, "learning_rate": 0.0005, "loss": 2.1288, "step": 83810 }, { "epoch": 0.31903960780432844, "grad_norm": 0.12989890575408936, "learning_rate": 0.0005, "loss": 2.134, "step": 83820 }, { "epoch": 0.31907767027245115, "grad_norm": 0.12488723546266556, "learning_rate": 0.0005, "loss": 2.1177, "step": 83830 }, { "epoch": 0.3191157327405738, "grad_norm": 0.11685290932655334, "learning_rate": 0.0005, "loss": 2.1257, "step": 83840 }, { "epoch": 0.3191537952086965, "grad_norm": 0.12078642845153809, "learning_rate": 0.0005, "loss": 2.117, "step": 83850 }, { "epoch": 0.3191918576768192, "grad_norm": 0.13583782315254211, "learning_rate": 0.0005, "loss": 2.1146, "step": 83860 }, { "epoch": 0.3192299201449419, "grad_norm": 0.12751080095767975, "learning_rate": 0.0005, "loss": 2.1225, "step": 83870 }, { "epoch": 0.31926798261306455, "grad_norm": 0.12122221291065216, "learning_rate": 0.0005, "loss": 2.1198, "step": 83880 }, { "epoch": 0.31930604508118726, "grad_norm": 0.13013103604316711, "learning_rate": 0.0005, "loss": 2.125, "step": 83890 }, { "epoch": 0.3193441075493099, "grad_norm": 0.1183476373553276, "learning_rate": 0.0005, "loss": 2.1321, "step": 83900 }, { "epoch": 0.31938217001743263, "grad_norm": 0.11884431540966034, "learning_rate": 0.0005, "loss": 2.1163, "step": 83910 }, { "epoch": 0.3194202324855553, "grad_norm": 0.12388347089290619, "learning_rate": 0.0005, "loss": 2.112, "step": 83920 }, { "epoch": 0.319458294953678, "grad_norm": 0.11375343799591064, "learning_rate": 0.0005, "loss": 2.1143, "step": 83930 }, { "epoch": 0.31949635742180066, "grad_norm": 0.13243578374385834, "learning_rate": 0.0005, "loss": 2.1229, "step": 83940 }, { "epoch": 0.3195344198899233, "grad_norm": 0.13904094696044922, "learning_rate": 0.0005, "loss": 2.1164, "step": 83950 }, { "epoch": 0.31957248235804603, "grad_norm": 0.14737099409103394, "learning_rate": 0.0005, "loss": 2.1286, "step": 83960 }, { "epoch": 0.3196105448261687, "grad_norm": 0.12814052402973175, "learning_rate": 0.0005, "loss": 2.1119, "step": 83970 }, { "epoch": 0.3196486072942914, "grad_norm": 0.13516543805599213, "learning_rate": 0.0005, "loss": 2.1334, "step": 83980 }, { "epoch": 0.31968666976241406, "grad_norm": 0.11763939261436462, "learning_rate": 0.0005, "loss": 2.1154, "step": 83990 }, { "epoch": 0.31972473223053677, "grad_norm": 0.13765235245227814, "learning_rate": 0.0005, "loss": 2.1247, "step": 84000 }, { "epoch": 0.3197627946986594, "grad_norm": 0.13086718320846558, "learning_rate": 0.0005, "loss": 2.1401, "step": 84010 }, { "epoch": 0.31980085716678214, "grad_norm": 0.12069284170866013, "learning_rate": 0.0005, "loss": 2.116, "step": 84020 }, { "epoch": 0.3198389196349048, "grad_norm": 0.12064868956804276, "learning_rate": 0.0005, "loss": 2.1328, "step": 84030 }, { "epoch": 0.3198769821030275, "grad_norm": 0.12312685698270798, "learning_rate": 0.0005, "loss": 2.1096, "step": 84040 }, { "epoch": 0.31991504457115016, "grad_norm": 0.12011557072401047, "learning_rate": 0.0005, "loss": 2.1321, "step": 84050 }, { "epoch": 0.3199531070392729, "grad_norm": 0.12177038192749023, "learning_rate": 0.0005, "loss": 2.1174, "step": 84060 }, { "epoch": 0.31999116950739553, "grad_norm": 0.13619963824748993, "learning_rate": 0.0005, "loss": 2.139, "step": 84070 }, { "epoch": 0.32002923197551825, "grad_norm": 0.1267249584197998, "learning_rate": 0.0005, "loss": 2.1317, "step": 84080 }, { "epoch": 0.3200672944436409, "grad_norm": 0.1230340376496315, "learning_rate": 0.0005, "loss": 2.1308, "step": 84090 }, { "epoch": 0.32010535691176356, "grad_norm": 0.1240105852484703, "learning_rate": 0.0005, "loss": 2.1104, "step": 84100 }, { "epoch": 0.3201434193798863, "grad_norm": 0.12150612473487854, "learning_rate": 0.0005, "loss": 2.1184, "step": 84110 }, { "epoch": 0.32018148184800893, "grad_norm": 0.12242543697357178, "learning_rate": 0.0005, "loss": 2.1191, "step": 84120 }, { "epoch": 0.32021954431613164, "grad_norm": 0.11171294748783112, "learning_rate": 0.0005, "loss": 2.1284, "step": 84130 }, { "epoch": 0.3202576067842543, "grad_norm": 0.11961314082145691, "learning_rate": 0.0005, "loss": 2.1129, "step": 84140 }, { "epoch": 0.320295669252377, "grad_norm": 0.12245552241802216, "learning_rate": 0.0005, "loss": 2.1212, "step": 84150 }, { "epoch": 0.32033373172049967, "grad_norm": 0.13306266069412231, "learning_rate": 0.0005, "loss": 2.1124, "step": 84160 }, { "epoch": 0.3203717941886224, "grad_norm": 0.13139064610004425, "learning_rate": 0.0005, "loss": 2.124, "step": 84170 }, { "epoch": 0.32040985665674504, "grad_norm": 0.12348007410764694, "learning_rate": 0.0005, "loss": 2.133, "step": 84180 }, { "epoch": 0.32044791912486775, "grad_norm": 0.1370268613100052, "learning_rate": 0.0005, "loss": 2.1339, "step": 84190 }, { "epoch": 0.3204859815929904, "grad_norm": 0.13645033538341522, "learning_rate": 0.0005, "loss": 2.1246, "step": 84200 }, { "epoch": 0.3205240440611131, "grad_norm": 0.11724669486284256, "learning_rate": 0.0005, "loss": 2.1195, "step": 84210 }, { "epoch": 0.3205621065292358, "grad_norm": 0.13534116744995117, "learning_rate": 0.0005, "loss": 2.123, "step": 84220 }, { "epoch": 0.3206001689973585, "grad_norm": 0.1163550466299057, "learning_rate": 0.0005, "loss": 2.133, "step": 84230 }, { "epoch": 0.32063823146548115, "grad_norm": 0.11814413964748383, "learning_rate": 0.0005, "loss": 2.1411, "step": 84240 }, { "epoch": 0.3206762939336038, "grad_norm": 0.13630716502666473, "learning_rate": 0.0005, "loss": 2.1329, "step": 84250 }, { "epoch": 0.3207143564017265, "grad_norm": 0.11421690881252289, "learning_rate": 0.0005, "loss": 2.1269, "step": 84260 }, { "epoch": 0.3207524188698492, "grad_norm": 0.11150612682104111, "learning_rate": 0.0005, "loss": 2.1252, "step": 84270 }, { "epoch": 0.3207904813379719, "grad_norm": 0.1303691864013672, "learning_rate": 0.0005, "loss": 2.1221, "step": 84280 }, { "epoch": 0.32082854380609455, "grad_norm": 0.116021066904068, "learning_rate": 0.0005, "loss": 2.1187, "step": 84290 }, { "epoch": 0.32086660627421726, "grad_norm": 0.12288866192102432, "learning_rate": 0.0005, "loss": 2.1329, "step": 84300 }, { "epoch": 0.3209046687423399, "grad_norm": 0.13716170191764832, "learning_rate": 0.0005, "loss": 2.1143, "step": 84310 }, { "epoch": 0.32094273121046263, "grad_norm": 0.11293182522058487, "learning_rate": 0.0005, "loss": 2.1112, "step": 84320 }, { "epoch": 0.3209807936785853, "grad_norm": 0.12716884911060333, "learning_rate": 0.0005, "loss": 2.1376, "step": 84330 }, { "epoch": 0.321018856146708, "grad_norm": 0.12877345085144043, "learning_rate": 0.0005, "loss": 2.1188, "step": 84340 }, { "epoch": 0.32105691861483066, "grad_norm": 0.12783068418502808, "learning_rate": 0.0005, "loss": 2.1289, "step": 84350 }, { "epoch": 0.32109498108295337, "grad_norm": 0.11733737587928772, "learning_rate": 0.0005, "loss": 2.1294, "step": 84360 }, { "epoch": 0.321133043551076, "grad_norm": 0.13269150257110596, "learning_rate": 0.0005, "loss": 2.1355, "step": 84370 }, { "epoch": 0.3211711060191987, "grad_norm": 0.1295628845691681, "learning_rate": 0.0005, "loss": 2.1288, "step": 84380 }, { "epoch": 0.3212091684873214, "grad_norm": 0.14869417250156403, "learning_rate": 0.0005, "loss": 2.1242, "step": 84390 }, { "epoch": 0.32124723095544405, "grad_norm": 0.11842743307352066, "learning_rate": 0.0005, "loss": 2.1223, "step": 84400 }, { "epoch": 0.32128529342356676, "grad_norm": 0.14121049642562866, "learning_rate": 0.0005, "loss": 2.1205, "step": 84410 }, { "epoch": 0.3213233558916894, "grad_norm": 0.1316528618335724, "learning_rate": 0.0005, "loss": 2.1167, "step": 84420 }, { "epoch": 0.32136141835981213, "grad_norm": 1.0470399856567383, "learning_rate": 0.0005, "loss": 2.1316, "step": 84430 }, { "epoch": 0.3213994808279348, "grad_norm": 0.11911292374134064, "learning_rate": 0.0005, "loss": 2.1273, "step": 84440 }, { "epoch": 0.3214375432960575, "grad_norm": 0.12905144691467285, "learning_rate": 0.0005, "loss": 2.1183, "step": 84450 }, { "epoch": 0.32147560576418016, "grad_norm": 0.13625507056713104, "learning_rate": 0.0005, "loss": 2.1128, "step": 84460 }, { "epoch": 0.3215136682323029, "grad_norm": 0.1250603199005127, "learning_rate": 0.0005, "loss": 2.1376, "step": 84470 }, { "epoch": 0.32155173070042553, "grad_norm": 0.1387881487607956, "learning_rate": 0.0005, "loss": 2.1379, "step": 84480 }, { "epoch": 0.32158979316854824, "grad_norm": 0.12051466107368469, "learning_rate": 0.0005, "loss": 2.1166, "step": 84490 }, { "epoch": 0.3216278556366709, "grad_norm": 0.11220446228981018, "learning_rate": 0.0005, "loss": 2.1285, "step": 84500 }, { "epoch": 0.3216659181047936, "grad_norm": 0.11351220309734344, "learning_rate": 0.0005, "loss": 2.1169, "step": 84510 }, { "epoch": 0.32170398057291627, "grad_norm": 0.12000728398561478, "learning_rate": 0.0005, "loss": 2.1248, "step": 84520 }, { "epoch": 0.3217420430410389, "grad_norm": 0.12402566522359848, "learning_rate": 0.0005, "loss": 2.1372, "step": 84530 }, { "epoch": 0.32178010550916164, "grad_norm": 0.13333047926425934, "learning_rate": 0.0005, "loss": 2.1203, "step": 84540 }, { "epoch": 0.3218181679772843, "grad_norm": 0.11884795874357224, "learning_rate": 0.0005, "loss": 2.1176, "step": 84550 }, { "epoch": 0.321856230445407, "grad_norm": 0.12012158334255219, "learning_rate": 0.0005, "loss": 2.1244, "step": 84560 }, { "epoch": 0.32189429291352967, "grad_norm": 0.12488909810781479, "learning_rate": 0.0005, "loss": 2.1353, "step": 84570 }, { "epoch": 0.3219323553816524, "grad_norm": 0.11611166596412659, "learning_rate": 0.0005, "loss": 2.1301, "step": 84580 }, { "epoch": 0.32197041784977504, "grad_norm": 0.11382875591516495, "learning_rate": 0.0005, "loss": 2.1123, "step": 84590 }, { "epoch": 0.32200848031789775, "grad_norm": 0.12993958592414856, "learning_rate": 0.0005, "loss": 2.1087, "step": 84600 }, { "epoch": 0.3220465427860204, "grad_norm": 0.12190189212560654, "learning_rate": 0.0005, "loss": 2.1323, "step": 84610 }, { "epoch": 0.3220846052541431, "grad_norm": 0.12783260643482208, "learning_rate": 0.0005, "loss": 2.1347, "step": 84620 }, { "epoch": 0.3221226677222658, "grad_norm": 0.12652941048145294, "learning_rate": 0.0005, "loss": 2.1106, "step": 84630 }, { "epoch": 0.3221607301903885, "grad_norm": 0.12840814888477325, "learning_rate": 0.0005, "loss": 2.1266, "step": 84640 }, { "epoch": 0.32219879265851115, "grad_norm": 0.13773638010025024, "learning_rate": 0.0005, "loss": 2.113, "step": 84650 }, { "epoch": 0.32223685512663386, "grad_norm": 0.11401660740375519, "learning_rate": 0.0005, "loss": 2.1173, "step": 84660 }, { "epoch": 0.3222749175947565, "grad_norm": 0.12564989924430847, "learning_rate": 0.0005, "loss": 2.1185, "step": 84670 }, { "epoch": 0.3223129800628792, "grad_norm": 0.13151569664478302, "learning_rate": 0.0005, "loss": 2.1153, "step": 84680 }, { "epoch": 0.3223510425310019, "grad_norm": 0.13308259844779968, "learning_rate": 0.0005, "loss": 2.1354, "step": 84690 }, { "epoch": 0.32238910499912454, "grad_norm": 0.13082093000411987, "learning_rate": 0.0005, "loss": 2.117, "step": 84700 }, { "epoch": 0.32242716746724726, "grad_norm": 0.12852692604064941, "learning_rate": 0.0005, "loss": 2.1013, "step": 84710 }, { "epoch": 0.3224652299353699, "grad_norm": 0.11865947395563126, "learning_rate": 0.0005, "loss": 2.13, "step": 84720 }, { "epoch": 0.3225032924034926, "grad_norm": 0.1241876482963562, "learning_rate": 0.0005, "loss": 2.1179, "step": 84730 }, { "epoch": 0.3225413548716153, "grad_norm": 0.12009057402610779, "learning_rate": 0.0005, "loss": 2.1195, "step": 84740 }, { "epoch": 0.322579417339738, "grad_norm": 0.12893769145011902, "learning_rate": 0.0005, "loss": 2.1299, "step": 84750 }, { "epoch": 0.32261747980786065, "grad_norm": 0.1296892762184143, "learning_rate": 0.0005, "loss": 2.1311, "step": 84760 }, { "epoch": 0.32265554227598336, "grad_norm": 0.13293935358524323, "learning_rate": 0.0005, "loss": 2.1437, "step": 84770 }, { "epoch": 0.322693604744106, "grad_norm": 0.1251651495695114, "learning_rate": 0.0005, "loss": 2.1248, "step": 84780 }, { "epoch": 0.32273166721222873, "grad_norm": 0.12732888758182526, "learning_rate": 0.0005, "loss": 2.1252, "step": 84790 }, { "epoch": 0.3227697296803514, "grad_norm": 0.11490562558174133, "learning_rate": 0.0005, "loss": 2.1028, "step": 84800 }, { "epoch": 0.32280779214847405, "grad_norm": 0.12102054059505463, "learning_rate": 0.0005, "loss": 2.1137, "step": 84810 }, { "epoch": 0.32284585461659676, "grad_norm": 0.12410101294517517, "learning_rate": 0.0005, "loss": 2.1206, "step": 84820 }, { "epoch": 0.3228839170847194, "grad_norm": 0.12932229042053223, "learning_rate": 0.0005, "loss": 2.1242, "step": 84830 }, { "epoch": 0.32292197955284213, "grad_norm": 0.10722965002059937, "learning_rate": 0.0005, "loss": 2.1129, "step": 84840 }, { "epoch": 0.3229600420209648, "grad_norm": 0.13229092955589294, "learning_rate": 0.0005, "loss": 2.1147, "step": 84850 }, { "epoch": 0.3229981044890875, "grad_norm": 0.11781799793243408, "learning_rate": 0.0005, "loss": 2.1326, "step": 84860 }, { "epoch": 0.32303616695721016, "grad_norm": 0.11863507330417633, "learning_rate": 0.0005, "loss": 2.1161, "step": 84870 }, { "epoch": 0.32307422942533287, "grad_norm": 0.12561804056167603, "learning_rate": 0.0005, "loss": 2.1277, "step": 84880 }, { "epoch": 0.3231122918934555, "grad_norm": 0.12400005757808685, "learning_rate": 0.0005, "loss": 2.1334, "step": 84890 }, { "epoch": 0.32315035436157824, "grad_norm": 0.11362763494253159, "learning_rate": 0.0005, "loss": 2.132, "step": 84900 }, { "epoch": 0.3231884168297009, "grad_norm": 0.11362208425998688, "learning_rate": 0.0005, "loss": 2.1299, "step": 84910 }, { "epoch": 0.3232264792978236, "grad_norm": 0.12535296380519867, "learning_rate": 0.0005, "loss": 2.1178, "step": 84920 }, { "epoch": 0.32326454176594627, "grad_norm": 0.12394122779369354, "learning_rate": 0.0005, "loss": 2.1202, "step": 84930 }, { "epoch": 0.323302604234069, "grad_norm": 0.131861612200737, "learning_rate": 0.0005, "loss": 2.1208, "step": 84940 }, { "epoch": 0.32334066670219164, "grad_norm": 0.11681771278381348, "learning_rate": 0.0005, "loss": 2.1174, "step": 84950 }, { "epoch": 0.3233787291703143, "grad_norm": 0.12034600973129272, "learning_rate": 0.0005, "loss": 2.1151, "step": 84960 }, { "epoch": 0.323416791638437, "grad_norm": 0.11895084381103516, "learning_rate": 0.0005, "loss": 2.1175, "step": 84970 }, { "epoch": 0.32345485410655966, "grad_norm": 0.12538011372089386, "learning_rate": 0.0005, "loss": 2.1264, "step": 84980 }, { "epoch": 0.3234929165746824, "grad_norm": 0.12758684158325195, "learning_rate": 0.0005, "loss": 2.1433, "step": 84990 }, { "epoch": 0.32353097904280503, "grad_norm": 0.12982970476150513, "learning_rate": 0.0005, "loss": 2.1306, "step": 85000 }, { "epoch": 0.32356904151092775, "grad_norm": 0.12789271771907806, "learning_rate": 0.0005, "loss": 2.1262, "step": 85010 }, { "epoch": 0.3236071039790504, "grad_norm": 0.12262456119060516, "learning_rate": 0.0005, "loss": 2.1116, "step": 85020 }, { "epoch": 0.3236451664471731, "grad_norm": 0.12088648229837418, "learning_rate": 0.0005, "loss": 2.1261, "step": 85030 }, { "epoch": 0.3236832289152958, "grad_norm": 0.11756960302591324, "learning_rate": 0.0005, "loss": 2.1047, "step": 85040 }, { "epoch": 0.3237212913834185, "grad_norm": 0.13255122303962708, "learning_rate": 0.0005, "loss": 2.1301, "step": 85050 }, { "epoch": 0.32375935385154114, "grad_norm": 0.12687934935092926, "learning_rate": 0.0005, "loss": 2.1282, "step": 85060 }, { "epoch": 0.32379741631966386, "grad_norm": 0.12454438954591751, "learning_rate": 0.0005, "loss": 2.1264, "step": 85070 }, { "epoch": 0.3238354787877865, "grad_norm": 0.29794633388519287, "learning_rate": 0.0005, "loss": 2.1375, "step": 85080 }, { "epoch": 0.3238735412559092, "grad_norm": 0.13008928298950195, "learning_rate": 0.0005, "loss": 2.1093, "step": 85090 }, { "epoch": 0.3239116037240319, "grad_norm": 0.12024632096290588, "learning_rate": 0.0005, "loss": 2.1316, "step": 85100 }, { "epoch": 0.32394966619215454, "grad_norm": 0.11600895971059799, "learning_rate": 0.0005, "loss": 2.1166, "step": 85110 }, { "epoch": 0.32398772866027725, "grad_norm": 0.12285245209932327, "learning_rate": 0.0005, "loss": 2.1249, "step": 85120 }, { "epoch": 0.3240257911283999, "grad_norm": 0.1324823647737503, "learning_rate": 0.0005, "loss": 2.1138, "step": 85130 }, { "epoch": 0.3240638535965226, "grad_norm": 0.1279059499502182, "learning_rate": 0.0005, "loss": 2.1202, "step": 85140 }, { "epoch": 0.3241019160646453, "grad_norm": 0.13273939490318298, "learning_rate": 0.0005, "loss": 2.1393, "step": 85150 }, { "epoch": 0.324139978532768, "grad_norm": 0.1353902965784073, "learning_rate": 0.0005, "loss": 2.1365, "step": 85160 }, { "epoch": 0.32417804100089065, "grad_norm": 0.13185188174247742, "learning_rate": 0.0005, "loss": 2.0979, "step": 85170 }, { "epoch": 0.32421610346901336, "grad_norm": 0.13154304027557373, "learning_rate": 0.0005, "loss": 2.129, "step": 85180 }, { "epoch": 0.324254165937136, "grad_norm": 0.12412336468696594, "learning_rate": 0.0005, "loss": 2.1246, "step": 85190 }, { "epoch": 0.32429222840525873, "grad_norm": 0.1248876303434372, "learning_rate": 0.0005, "loss": 2.1342, "step": 85200 }, { "epoch": 0.3243302908733814, "grad_norm": 0.13276709616184235, "learning_rate": 0.0005, "loss": 2.127, "step": 85210 }, { "epoch": 0.3243683533415041, "grad_norm": 0.13410021364688873, "learning_rate": 0.0005, "loss": 2.1159, "step": 85220 }, { "epoch": 0.32440641580962676, "grad_norm": 0.11405565589666367, "learning_rate": 0.0005, "loss": 2.1294, "step": 85230 }, { "epoch": 0.3244444782777494, "grad_norm": 0.13638851046562195, "learning_rate": 0.0005, "loss": 2.1375, "step": 85240 }, { "epoch": 0.32448254074587213, "grad_norm": 0.11063786596059799, "learning_rate": 0.0005, "loss": 2.1094, "step": 85250 }, { "epoch": 0.3245206032139948, "grad_norm": 0.12583479285240173, "learning_rate": 0.0005, "loss": 2.1092, "step": 85260 }, { "epoch": 0.3245586656821175, "grad_norm": 0.13034242391586304, "learning_rate": 0.0005, "loss": 2.1275, "step": 85270 }, { "epoch": 0.32459672815024015, "grad_norm": 0.13332554697990417, "learning_rate": 0.0005, "loss": 2.1272, "step": 85280 }, { "epoch": 0.32463479061836287, "grad_norm": 0.11839156597852707, "learning_rate": 0.0005, "loss": 2.1288, "step": 85290 }, { "epoch": 0.3246728530864855, "grad_norm": 0.13876213133335114, "learning_rate": 0.0005, "loss": 2.1133, "step": 85300 }, { "epoch": 0.32471091555460824, "grad_norm": 0.14040637016296387, "learning_rate": 0.0005, "loss": 2.1313, "step": 85310 }, { "epoch": 0.3247489780227309, "grad_norm": 0.12406309694051743, "learning_rate": 0.0005, "loss": 2.1362, "step": 85320 }, { "epoch": 0.3247870404908536, "grad_norm": 0.1268659383058548, "learning_rate": 0.0005, "loss": 2.1234, "step": 85330 }, { "epoch": 0.32482510295897626, "grad_norm": 0.12533274292945862, "learning_rate": 0.0005, "loss": 2.1326, "step": 85340 }, { "epoch": 0.324863165427099, "grad_norm": 0.11266963928937912, "learning_rate": 0.0005, "loss": 2.1243, "step": 85350 }, { "epoch": 0.32490122789522163, "grad_norm": 0.13690349459648132, "learning_rate": 0.0005, "loss": 2.1302, "step": 85360 }, { "epoch": 0.32493929036334435, "grad_norm": 0.11537948250770569, "learning_rate": 0.0005, "loss": 2.1325, "step": 85370 }, { "epoch": 0.324977352831467, "grad_norm": 0.11801768839359283, "learning_rate": 0.0005, "loss": 2.1284, "step": 85380 }, { "epoch": 0.32501541529958966, "grad_norm": 0.1329495757818222, "learning_rate": 0.0005, "loss": 2.1324, "step": 85390 }, { "epoch": 0.3250534777677124, "grad_norm": 0.14431969821453094, "learning_rate": 0.0005, "loss": 2.1113, "step": 85400 }, { "epoch": 0.32509154023583503, "grad_norm": 0.12717166543006897, "learning_rate": 0.0005, "loss": 2.1425, "step": 85410 }, { "epoch": 0.32512960270395774, "grad_norm": 0.13233216106891632, "learning_rate": 0.0005, "loss": 2.1295, "step": 85420 }, { "epoch": 0.3251676651720804, "grad_norm": 0.11458507180213928, "learning_rate": 0.0005, "loss": 2.1287, "step": 85430 }, { "epoch": 0.3252057276402031, "grad_norm": 0.12378555536270142, "learning_rate": 0.0005, "loss": 2.137, "step": 85440 }, { "epoch": 0.32524379010832577, "grad_norm": 0.1393895000219345, "learning_rate": 0.0005, "loss": 2.1345, "step": 85450 }, { "epoch": 0.3252818525764485, "grad_norm": 0.13885247707366943, "learning_rate": 0.0005, "loss": 2.108, "step": 85460 }, { "epoch": 0.32531991504457114, "grad_norm": 0.12107256054878235, "learning_rate": 0.0005, "loss": 2.1215, "step": 85470 }, { "epoch": 0.32535797751269385, "grad_norm": 0.129132479429245, "learning_rate": 0.0005, "loss": 2.1327, "step": 85480 }, { "epoch": 0.3253960399808165, "grad_norm": 0.12440992891788483, "learning_rate": 0.0005, "loss": 2.118, "step": 85490 }, { "epoch": 0.3254341024489392, "grad_norm": 0.12130912393331528, "learning_rate": 0.0005, "loss": 2.123, "step": 85500 }, { "epoch": 0.3254721649170619, "grad_norm": 0.13403859734535217, "learning_rate": 0.0005, "loss": 2.1135, "step": 85510 }, { "epoch": 0.3255102273851846, "grad_norm": 0.12442876398563385, "learning_rate": 0.0005, "loss": 2.105, "step": 85520 }, { "epoch": 0.32554828985330725, "grad_norm": 0.11490896344184875, "learning_rate": 0.0005, "loss": 2.1408, "step": 85530 }, { "epoch": 0.3255863523214299, "grad_norm": 0.1400223970413208, "learning_rate": 0.0005, "loss": 2.129, "step": 85540 }, { "epoch": 0.3256244147895526, "grad_norm": 0.12713545560836792, "learning_rate": 0.0005, "loss": 2.1248, "step": 85550 }, { "epoch": 0.3256624772576753, "grad_norm": 0.13676592707633972, "learning_rate": 0.0005, "loss": 2.1168, "step": 85560 }, { "epoch": 0.325700539725798, "grad_norm": 0.12908118963241577, "learning_rate": 0.0005, "loss": 2.1407, "step": 85570 }, { "epoch": 0.32573860219392065, "grad_norm": 0.38396722078323364, "learning_rate": 0.0005, "loss": 2.1267, "step": 85580 }, { "epoch": 0.32577666466204336, "grad_norm": 0.11782268434762955, "learning_rate": 0.0005, "loss": 2.1259, "step": 85590 }, { "epoch": 0.325814727130166, "grad_norm": 0.15799593925476074, "learning_rate": 0.0005, "loss": 2.1375, "step": 85600 }, { "epoch": 0.32585278959828873, "grad_norm": 0.11854618787765503, "learning_rate": 0.0005, "loss": 2.1401, "step": 85610 }, { "epoch": 0.3258908520664114, "grad_norm": 0.12333806604146957, "learning_rate": 0.0005, "loss": 2.114, "step": 85620 }, { "epoch": 0.3259289145345341, "grad_norm": 0.11984525620937347, "learning_rate": 0.0005, "loss": 2.119, "step": 85630 }, { "epoch": 0.32596697700265675, "grad_norm": 0.13362917304039001, "learning_rate": 0.0005, "loss": 2.1231, "step": 85640 }, { "epoch": 0.32600503947077947, "grad_norm": 0.12273038923740387, "learning_rate": 0.0005, "loss": 2.1223, "step": 85650 }, { "epoch": 0.3260431019389021, "grad_norm": 0.12973017990589142, "learning_rate": 0.0005, "loss": 2.134, "step": 85660 }, { "epoch": 0.3260811644070248, "grad_norm": 0.12842817604541779, "learning_rate": 0.0005, "loss": 2.1197, "step": 85670 }, { "epoch": 0.3261192268751475, "grad_norm": 0.12436743080615997, "learning_rate": 0.0005, "loss": 2.1262, "step": 85680 }, { "epoch": 0.32615728934327015, "grad_norm": 0.12389617413282394, "learning_rate": 0.0005, "loss": 2.1387, "step": 85690 }, { "epoch": 0.32619535181139286, "grad_norm": 0.139614075422287, "learning_rate": 0.0005, "loss": 2.1417, "step": 85700 }, { "epoch": 0.3262334142795155, "grad_norm": 0.1295105516910553, "learning_rate": 0.0005, "loss": 2.1291, "step": 85710 }, { "epoch": 0.32627147674763823, "grad_norm": 0.12636461853981018, "learning_rate": 0.0005, "loss": 2.1266, "step": 85720 }, { "epoch": 0.3263095392157609, "grad_norm": 0.13094234466552734, "learning_rate": 0.0005, "loss": 2.1235, "step": 85730 }, { "epoch": 0.3263476016838836, "grad_norm": 0.13378089666366577, "learning_rate": 0.0005, "loss": 2.1217, "step": 85740 }, { "epoch": 0.32638566415200626, "grad_norm": 0.12451636791229248, "learning_rate": 0.0005, "loss": 2.1343, "step": 85750 }, { "epoch": 0.326423726620129, "grad_norm": 0.12907682359218597, "learning_rate": 0.0005, "loss": 2.1231, "step": 85760 }, { "epoch": 0.32646178908825163, "grad_norm": 0.13439998030662537, "learning_rate": 0.0005, "loss": 2.1325, "step": 85770 }, { "epoch": 0.32649985155637434, "grad_norm": 0.12523990869522095, "learning_rate": 0.0005, "loss": 2.1219, "step": 85780 }, { "epoch": 0.326537914024497, "grad_norm": 0.14395292103290558, "learning_rate": 0.0005, "loss": 2.1248, "step": 85790 }, { "epoch": 0.3265759764926197, "grad_norm": 0.1267700046300888, "learning_rate": 0.0005, "loss": 2.1236, "step": 85800 }, { "epoch": 0.32661403896074237, "grad_norm": 0.12681162357330322, "learning_rate": 0.0005, "loss": 2.1315, "step": 85810 }, { "epoch": 0.326652101428865, "grad_norm": 0.11388924717903137, "learning_rate": 0.0005, "loss": 2.1246, "step": 85820 }, { "epoch": 0.32669016389698774, "grad_norm": 0.1388767659664154, "learning_rate": 0.0005, "loss": 2.1305, "step": 85830 }, { "epoch": 0.3267282263651104, "grad_norm": 0.11999952793121338, "learning_rate": 0.0005, "loss": 2.124, "step": 85840 }, { "epoch": 0.3267662888332331, "grad_norm": 0.12944869697093964, "learning_rate": 0.0005, "loss": 2.122, "step": 85850 }, { "epoch": 0.32680435130135577, "grad_norm": 0.12705813348293304, "learning_rate": 0.0005, "loss": 2.1256, "step": 85860 }, { "epoch": 0.3268424137694785, "grad_norm": 0.1447199285030365, "learning_rate": 0.0005, "loss": 2.1058, "step": 85870 }, { "epoch": 0.32688047623760114, "grad_norm": 0.14332392811775208, "learning_rate": 0.0005, "loss": 2.1272, "step": 85880 }, { "epoch": 0.32691853870572385, "grad_norm": 0.1288509964942932, "learning_rate": 0.0005, "loss": 2.128, "step": 85890 }, { "epoch": 0.3269566011738465, "grad_norm": 0.12456371635198593, "learning_rate": 0.0005, "loss": 2.1158, "step": 85900 }, { "epoch": 0.3269946636419692, "grad_norm": 0.13260520994663239, "learning_rate": 0.0005, "loss": 2.1177, "step": 85910 }, { "epoch": 0.3270327261100919, "grad_norm": 0.12597645819187164, "learning_rate": 0.0005, "loss": 2.1334, "step": 85920 }, { "epoch": 0.3270707885782146, "grad_norm": 0.13826020061969757, "learning_rate": 0.0005, "loss": 2.1245, "step": 85930 }, { "epoch": 0.32710885104633725, "grad_norm": 0.11652007699012756, "learning_rate": 0.0005, "loss": 2.1227, "step": 85940 }, { "epoch": 0.32714691351445996, "grad_norm": 0.1280617117881775, "learning_rate": 0.0005, "loss": 2.1279, "step": 85950 }, { "epoch": 0.3271849759825826, "grad_norm": 0.1129440888762474, "learning_rate": 0.0005, "loss": 2.1295, "step": 85960 }, { "epoch": 0.3272230384507053, "grad_norm": 0.1350637525320053, "learning_rate": 0.0005, "loss": 2.133, "step": 85970 }, { "epoch": 0.327261100918828, "grad_norm": 0.13293075561523438, "learning_rate": 0.0005, "loss": 2.1192, "step": 85980 }, { "epoch": 0.32729916338695064, "grad_norm": 0.12983955442905426, "learning_rate": 0.0005, "loss": 2.1086, "step": 85990 }, { "epoch": 0.32733722585507335, "grad_norm": 0.1335182785987854, "learning_rate": 0.0005, "loss": 2.1282, "step": 86000 }, { "epoch": 0.327375288323196, "grad_norm": 0.12860321998596191, "learning_rate": 0.0005, "loss": 2.1337, "step": 86010 }, { "epoch": 0.3274133507913187, "grad_norm": 0.12237963825464249, "learning_rate": 0.0005, "loss": 2.1175, "step": 86020 }, { "epoch": 0.3274514132594414, "grad_norm": 0.14233390986919403, "learning_rate": 0.0005, "loss": 2.1326, "step": 86030 }, { "epoch": 0.3274894757275641, "grad_norm": 0.11780981719493866, "learning_rate": 0.0005, "loss": 2.1043, "step": 86040 }, { "epoch": 0.32752753819568675, "grad_norm": 0.1323825567960739, "learning_rate": 0.0005, "loss": 2.1305, "step": 86050 }, { "epoch": 0.32756560066380946, "grad_norm": 0.1262124478816986, "learning_rate": 0.0005, "loss": 2.1206, "step": 86060 }, { "epoch": 0.3276036631319321, "grad_norm": 0.1186787560582161, "learning_rate": 0.0005, "loss": 2.1191, "step": 86070 }, { "epoch": 0.32764172560005483, "grad_norm": 0.11766122281551361, "learning_rate": 0.0005, "loss": 2.1318, "step": 86080 }, { "epoch": 0.3276797880681775, "grad_norm": 0.12597422301769257, "learning_rate": 0.0005, "loss": 2.1293, "step": 86090 }, { "epoch": 0.32771785053630015, "grad_norm": 0.1503901481628418, "learning_rate": 0.0005, "loss": 2.1137, "step": 86100 }, { "epoch": 0.32775591300442286, "grad_norm": 0.11765991151332855, "learning_rate": 0.0005, "loss": 2.1226, "step": 86110 }, { "epoch": 0.3277939754725455, "grad_norm": 0.11862947046756744, "learning_rate": 0.0005, "loss": 2.1304, "step": 86120 }, { "epoch": 0.32783203794066823, "grad_norm": 0.11336734145879745, "learning_rate": 0.0005, "loss": 2.1261, "step": 86130 }, { "epoch": 0.3278701004087909, "grad_norm": 0.12237170338630676, "learning_rate": 0.0005, "loss": 2.1416, "step": 86140 }, { "epoch": 0.3279081628769136, "grad_norm": 0.11985383182764053, "learning_rate": 0.0005, "loss": 2.1308, "step": 86150 }, { "epoch": 0.32794622534503626, "grad_norm": 0.12470897287130356, "learning_rate": 0.0005, "loss": 2.1348, "step": 86160 }, { "epoch": 0.32798428781315897, "grad_norm": 0.11982845515012741, "learning_rate": 0.0005, "loss": 2.1245, "step": 86170 }, { "epoch": 0.3280223502812816, "grad_norm": 0.13332238793373108, "learning_rate": 0.0005, "loss": 2.1225, "step": 86180 }, { "epoch": 0.32806041274940434, "grad_norm": 0.14622122049331665, "learning_rate": 0.0005, "loss": 2.1139, "step": 86190 }, { "epoch": 0.328098475217527, "grad_norm": 0.12992194294929504, "learning_rate": 0.0005, "loss": 2.1269, "step": 86200 }, { "epoch": 0.3281365376856497, "grad_norm": 0.12645530700683594, "learning_rate": 0.0005, "loss": 2.1302, "step": 86210 }, { "epoch": 0.32817460015377237, "grad_norm": 0.12694615125656128, "learning_rate": 0.0005, "loss": 2.1223, "step": 86220 }, { "epoch": 0.3282126626218951, "grad_norm": 0.13553451001644135, "learning_rate": 0.0005, "loss": 2.1368, "step": 86230 }, { "epoch": 0.32825072509001774, "grad_norm": 0.25713515281677246, "learning_rate": 0.0005, "loss": 2.1103, "step": 86240 }, { "epoch": 0.3282887875581404, "grad_norm": 0.13292460143566132, "learning_rate": 0.0005, "loss": 2.1288, "step": 86250 }, { "epoch": 0.3283268500262631, "grad_norm": 0.11702064424753189, "learning_rate": 0.0005, "loss": 2.1218, "step": 86260 }, { "epoch": 0.32836491249438576, "grad_norm": 0.1134921982884407, "learning_rate": 0.0005, "loss": 2.1232, "step": 86270 }, { "epoch": 0.3284029749625085, "grad_norm": 0.12800496816635132, "learning_rate": 0.0005, "loss": 2.1279, "step": 86280 }, { "epoch": 0.32844103743063113, "grad_norm": 0.12007319927215576, "learning_rate": 0.0005, "loss": 2.1273, "step": 86290 }, { "epoch": 0.32847909989875385, "grad_norm": 0.128435418009758, "learning_rate": 0.0005, "loss": 2.1078, "step": 86300 }, { "epoch": 0.3285171623668765, "grad_norm": 0.12666693329811096, "learning_rate": 0.0005, "loss": 2.126, "step": 86310 }, { "epoch": 0.3285552248349992, "grad_norm": 0.12615017592906952, "learning_rate": 0.0005, "loss": 2.1173, "step": 86320 }, { "epoch": 0.3285932873031219, "grad_norm": 0.1369655877351761, "learning_rate": 0.0005, "loss": 2.107, "step": 86330 }, { "epoch": 0.3286313497712446, "grad_norm": 0.13197752833366394, "learning_rate": 0.0005, "loss": 2.1207, "step": 86340 }, { "epoch": 0.32866941223936724, "grad_norm": 0.13014714419841766, "learning_rate": 0.0005, "loss": 2.1237, "step": 86350 }, { "epoch": 0.32870747470748996, "grad_norm": 0.12440134584903717, "learning_rate": 0.0005, "loss": 2.1163, "step": 86360 }, { "epoch": 0.3287455371756126, "grad_norm": 0.12102056294679642, "learning_rate": 0.0005, "loss": 2.1298, "step": 86370 }, { "epoch": 0.3287835996437353, "grad_norm": 0.1142989918589592, "learning_rate": 0.0005, "loss": 2.1197, "step": 86380 }, { "epoch": 0.328821662111858, "grad_norm": 0.12405013293027878, "learning_rate": 0.0005, "loss": 2.1143, "step": 86390 }, { "epoch": 0.32885972457998064, "grad_norm": 0.11448971182107925, "learning_rate": 0.0005, "loss": 2.1176, "step": 86400 }, { "epoch": 0.32889778704810335, "grad_norm": 0.1186433881521225, "learning_rate": 0.0005, "loss": 2.1207, "step": 86410 }, { "epoch": 0.328935849516226, "grad_norm": 0.12564805150032043, "learning_rate": 0.0005, "loss": 2.1133, "step": 86420 }, { "epoch": 0.3289739119843487, "grad_norm": 0.12833847105503082, "learning_rate": 0.0005, "loss": 2.119, "step": 86430 }, { "epoch": 0.3290119744524714, "grad_norm": 0.13104651868343353, "learning_rate": 0.0005, "loss": 2.1349, "step": 86440 }, { "epoch": 0.3290500369205941, "grad_norm": 0.1240994930267334, "learning_rate": 0.0005, "loss": 2.1306, "step": 86450 }, { "epoch": 0.32908809938871675, "grad_norm": 0.12632007896900177, "learning_rate": 0.0005, "loss": 2.139, "step": 86460 }, { "epoch": 0.32912616185683946, "grad_norm": 0.11452756077051163, "learning_rate": 0.0005, "loss": 2.1258, "step": 86470 }, { "epoch": 0.3291642243249621, "grad_norm": 0.11686919629573822, "learning_rate": 0.0005, "loss": 2.1344, "step": 86480 }, { "epoch": 0.32920228679308483, "grad_norm": 0.13337957859039307, "learning_rate": 0.0005, "loss": 2.1318, "step": 86490 }, { "epoch": 0.3292403492612075, "grad_norm": 0.11861047893762589, "learning_rate": 0.0005, "loss": 2.1274, "step": 86500 }, { "epoch": 0.3292784117293302, "grad_norm": 0.12581536173820496, "learning_rate": 0.0005, "loss": 2.1246, "step": 86510 }, { "epoch": 0.32931647419745286, "grad_norm": 0.11455560475587845, "learning_rate": 0.0005, "loss": 2.1332, "step": 86520 }, { "epoch": 0.32935453666557557, "grad_norm": 0.13580425083637238, "learning_rate": 0.0005, "loss": 2.1172, "step": 86530 }, { "epoch": 0.3293925991336982, "grad_norm": 0.12677060067653656, "learning_rate": 0.0005, "loss": 2.1262, "step": 86540 }, { "epoch": 0.3294306616018209, "grad_norm": 0.13795708119869232, "learning_rate": 0.0005, "loss": 2.1202, "step": 86550 }, { "epoch": 0.3294687240699436, "grad_norm": 0.11908993870019913, "learning_rate": 0.0005, "loss": 2.1223, "step": 86560 }, { "epoch": 0.32950678653806625, "grad_norm": 0.13094303011894226, "learning_rate": 0.0005, "loss": 2.1281, "step": 86570 }, { "epoch": 0.32954484900618897, "grad_norm": 0.12016043812036514, "learning_rate": 0.0005, "loss": 2.1318, "step": 86580 }, { "epoch": 0.3295829114743116, "grad_norm": 0.13373512029647827, "learning_rate": 0.0005, "loss": 2.1261, "step": 86590 }, { "epoch": 0.32962097394243434, "grad_norm": 0.13215504586696625, "learning_rate": 0.0005, "loss": 2.1213, "step": 86600 }, { "epoch": 0.329659036410557, "grad_norm": 0.12117471545934677, "learning_rate": 0.0005, "loss": 2.1197, "step": 86610 }, { "epoch": 0.3296970988786797, "grad_norm": 0.14617256820201874, "learning_rate": 0.0005, "loss": 2.1118, "step": 86620 }, { "epoch": 0.32973516134680236, "grad_norm": 0.12067513167858124, "learning_rate": 0.0005, "loss": 2.1171, "step": 86630 }, { "epoch": 0.3297732238149251, "grad_norm": 0.12430501729249954, "learning_rate": 0.0005, "loss": 2.1388, "step": 86640 }, { "epoch": 0.32981128628304773, "grad_norm": 0.12513650953769684, "learning_rate": 0.0005, "loss": 2.1249, "step": 86650 }, { "epoch": 0.32984934875117045, "grad_norm": 0.1374633014202118, "learning_rate": 0.0005, "loss": 2.1168, "step": 86660 }, { "epoch": 0.3298874112192931, "grad_norm": 0.13568641245365143, "learning_rate": 0.0005, "loss": 2.1405, "step": 86670 }, { "epoch": 0.32992547368741576, "grad_norm": 0.14530439674854279, "learning_rate": 0.0005, "loss": 2.1217, "step": 86680 }, { "epoch": 0.3299635361555385, "grad_norm": 0.13062728941440582, "learning_rate": 0.0005, "loss": 2.1271, "step": 86690 }, { "epoch": 0.33000159862366113, "grad_norm": 0.11495129764080048, "learning_rate": 0.0005, "loss": 2.118, "step": 86700 }, { "epoch": 0.33003966109178384, "grad_norm": 0.12458905577659607, "learning_rate": 0.0005, "loss": 2.1213, "step": 86710 }, { "epoch": 0.3300777235599065, "grad_norm": 0.12767943739891052, "learning_rate": 0.0005, "loss": 2.1261, "step": 86720 }, { "epoch": 0.3301157860280292, "grad_norm": 0.15019801259040833, "learning_rate": 0.0005, "loss": 2.1204, "step": 86730 }, { "epoch": 0.33015384849615187, "grad_norm": 0.13525643944740295, "learning_rate": 0.0005, "loss": 2.1292, "step": 86740 }, { "epoch": 0.3301919109642746, "grad_norm": 0.12708579003810883, "learning_rate": 0.0005, "loss": 2.1133, "step": 86750 }, { "epoch": 0.33022997343239724, "grad_norm": 0.12505242228507996, "learning_rate": 0.0005, "loss": 2.1383, "step": 86760 }, { "epoch": 0.33026803590051995, "grad_norm": 0.12326527386903763, "learning_rate": 0.0005, "loss": 2.1214, "step": 86770 }, { "epoch": 0.3303060983686426, "grad_norm": 0.12085497379302979, "learning_rate": 0.0005, "loss": 2.1156, "step": 86780 }, { "epoch": 0.3303441608367653, "grad_norm": 0.11513642221689224, "learning_rate": 0.0005, "loss": 2.1324, "step": 86790 }, { "epoch": 0.330382223304888, "grad_norm": 0.1289006471633911, "learning_rate": 0.0005, "loss": 2.1288, "step": 86800 }, { "epoch": 0.3304202857730107, "grad_norm": 0.12522073090076447, "learning_rate": 0.0005, "loss": 2.1308, "step": 86810 }, { "epoch": 0.33045834824113335, "grad_norm": 0.12671121954917908, "learning_rate": 0.0005, "loss": 2.1313, "step": 86820 }, { "epoch": 0.330496410709256, "grad_norm": 0.11735258996486664, "learning_rate": 0.0005, "loss": 2.127, "step": 86830 }, { "epoch": 0.3305344731773787, "grad_norm": 0.1259993314743042, "learning_rate": 0.0005, "loss": 2.1141, "step": 86840 }, { "epoch": 0.3305725356455014, "grad_norm": 0.10962878912687302, "learning_rate": 0.0005, "loss": 2.1225, "step": 86850 }, { "epoch": 0.3306105981136241, "grad_norm": 0.12624777853488922, "learning_rate": 0.0005, "loss": 2.1178, "step": 86860 }, { "epoch": 0.33064866058174675, "grad_norm": 0.11900725215673447, "learning_rate": 0.0005, "loss": 2.142, "step": 86870 }, { "epoch": 0.33068672304986946, "grad_norm": 0.13468994200229645, "learning_rate": 0.0005, "loss": 2.1158, "step": 86880 }, { "epoch": 0.3307247855179921, "grad_norm": 0.1127929762005806, "learning_rate": 0.0005, "loss": 2.1141, "step": 86890 }, { "epoch": 0.3307628479861148, "grad_norm": 0.11869233846664429, "learning_rate": 0.0005, "loss": 2.1035, "step": 86900 }, { "epoch": 0.3308009104542375, "grad_norm": 0.10383521020412445, "learning_rate": 0.0005, "loss": 2.1233, "step": 86910 }, { "epoch": 0.3308389729223602, "grad_norm": 0.1302202194929123, "learning_rate": 0.0005, "loss": 2.1, "step": 86920 }, { "epoch": 0.33087703539048285, "grad_norm": 0.12471529841423035, "learning_rate": 0.0005, "loss": 2.1098, "step": 86930 }, { "epoch": 0.33091509785860557, "grad_norm": 0.11835591495037079, "learning_rate": 0.0005, "loss": 2.1212, "step": 86940 }, { "epoch": 0.3309531603267282, "grad_norm": 0.1361277997493744, "learning_rate": 0.0005, "loss": 2.12, "step": 86950 }, { "epoch": 0.33099122279485094, "grad_norm": 0.16617321968078613, "learning_rate": 0.0005, "loss": 2.1261, "step": 86960 }, { "epoch": 0.3310292852629736, "grad_norm": 0.11649385094642639, "learning_rate": 0.0005, "loss": 2.1299, "step": 86970 }, { "epoch": 0.33106734773109625, "grad_norm": 0.1311468482017517, "learning_rate": 0.0005, "loss": 2.1329, "step": 86980 }, { "epoch": 0.33110541019921896, "grad_norm": 0.13043631613254547, "learning_rate": 0.0005, "loss": 2.1259, "step": 86990 }, { "epoch": 0.3311434726673416, "grad_norm": 0.1365789771080017, "learning_rate": 0.0005, "loss": 2.1201, "step": 87000 }, { "epoch": 0.33118153513546433, "grad_norm": 0.14579400420188904, "learning_rate": 0.0005, "loss": 2.1279, "step": 87010 }, { "epoch": 0.331219597603587, "grad_norm": 0.10959062725305557, "learning_rate": 0.0005, "loss": 2.1368, "step": 87020 }, { "epoch": 0.3312576600717097, "grad_norm": 0.12297790497541428, "learning_rate": 0.0005, "loss": 2.1303, "step": 87030 }, { "epoch": 0.33129572253983236, "grad_norm": 0.11930500715970993, "learning_rate": 0.0005, "loss": 2.1184, "step": 87040 }, { "epoch": 0.3313337850079551, "grad_norm": 0.12146482616662979, "learning_rate": 0.0005, "loss": 2.1123, "step": 87050 }, { "epoch": 0.33137184747607773, "grad_norm": 0.12271170318126678, "learning_rate": 0.0005, "loss": 2.1294, "step": 87060 }, { "epoch": 0.33140990994420044, "grad_norm": 0.11878249794244766, "learning_rate": 0.0005, "loss": 2.1046, "step": 87070 }, { "epoch": 0.3314479724123231, "grad_norm": 0.1282305121421814, "learning_rate": 0.0005, "loss": 2.1236, "step": 87080 }, { "epoch": 0.3314860348804458, "grad_norm": 0.13456635177135468, "learning_rate": 0.0005, "loss": 2.1109, "step": 87090 }, { "epoch": 0.33152409734856847, "grad_norm": 0.1312885731458664, "learning_rate": 0.0005, "loss": 2.0989, "step": 87100 }, { "epoch": 0.3315621598166911, "grad_norm": 0.1143750250339508, "learning_rate": 0.0005, "loss": 2.1119, "step": 87110 }, { "epoch": 0.33160022228481384, "grad_norm": 0.1455077975988388, "learning_rate": 0.0005, "loss": 2.105, "step": 87120 }, { "epoch": 0.3316382847529365, "grad_norm": 0.1323305070400238, "learning_rate": 0.0005, "loss": 2.1137, "step": 87130 }, { "epoch": 0.3316763472210592, "grad_norm": 0.1347663253545761, "learning_rate": 0.0005, "loss": 2.1214, "step": 87140 }, { "epoch": 0.33171440968918187, "grad_norm": 0.13488994538784027, "learning_rate": 0.0005, "loss": 2.1263, "step": 87150 }, { "epoch": 0.3317524721573046, "grad_norm": 0.130838081240654, "learning_rate": 0.0005, "loss": 2.125, "step": 87160 }, { "epoch": 0.33179053462542724, "grad_norm": 0.11739790439605713, "learning_rate": 0.0005, "loss": 2.1303, "step": 87170 }, { "epoch": 0.33182859709354995, "grad_norm": 0.13091625273227692, "learning_rate": 0.0005, "loss": 2.1209, "step": 87180 }, { "epoch": 0.3318666595616726, "grad_norm": 0.12075185775756836, "learning_rate": 0.0005, "loss": 2.1469, "step": 87190 }, { "epoch": 0.3319047220297953, "grad_norm": 0.12035005539655685, "learning_rate": 0.0005, "loss": 2.1206, "step": 87200 }, { "epoch": 0.331942784497918, "grad_norm": 0.12317599356174469, "learning_rate": 0.0005, "loss": 2.1206, "step": 87210 }, { "epoch": 0.3319808469660407, "grad_norm": 0.11673708260059357, "learning_rate": 0.0005, "loss": 2.1144, "step": 87220 }, { "epoch": 0.33201890943416335, "grad_norm": 0.11935292929410934, "learning_rate": 0.0005, "loss": 2.1168, "step": 87230 }, { "epoch": 0.33205697190228606, "grad_norm": 0.12443012744188309, "learning_rate": 0.0005, "loss": 2.1237, "step": 87240 }, { "epoch": 0.3320950343704087, "grad_norm": 0.12056247889995575, "learning_rate": 0.0005, "loss": 2.1366, "step": 87250 }, { "epoch": 0.33213309683853137, "grad_norm": 0.11743541806936264, "learning_rate": 0.0005, "loss": 2.1075, "step": 87260 }, { "epoch": 0.3321711593066541, "grad_norm": 0.12407179921865463, "learning_rate": 0.0005, "loss": 2.1213, "step": 87270 }, { "epoch": 0.33220922177477674, "grad_norm": 0.12037166208028793, "learning_rate": 0.0005, "loss": 2.1258, "step": 87280 }, { "epoch": 0.33224728424289945, "grad_norm": 0.11869774013757706, "learning_rate": 0.0005, "loss": 2.1339, "step": 87290 }, { "epoch": 0.3322853467110221, "grad_norm": 0.1175379678606987, "learning_rate": 0.0005, "loss": 2.1107, "step": 87300 }, { "epoch": 0.3323234091791448, "grad_norm": 0.12499644607305527, "learning_rate": 0.0005, "loss": 2.125, "step": 87310 }, { "epoch": 0.3323614716472675, "grad_norm": 0.12239792197942734, "learning_rate": 0.0005, "loss": 2.1187, "step": 87320 }, { "epoch": 0.3323995341153902, "grad_norm": 0.1392892301082611, "learning_rate": 0.0005, "loss": 2.1231, "step": 87330 }, { "epoch": 0.33243759658351285, "grad_norm": 0.13349878787994385, "learning_rate": 0.0005, "loss": 2.1331, "step": 87340 }, { "epoch": 0.33247565905163556, "grad_norm": 0.13460364937782288, "learning_rate": 0.0005, "loss": 2.1166, "step": 87350 }, { "epoch": 0.3325137215197582, "grad_norm": 0.1504037231206894, "learning_rate": 0.0005, "loss": 2.1379, "step": 87360 }, { "epoch": 0.33255178398788093, "grad_norm": 0.11479683220386505, "learning_rate": 0.0005, "loss": 2.1202, "step": 87370 }, { "epoch": 0.3325898464560036, "grad_norm": 0.11970684677362442, "learning_rate": 0.0005, "loss": 2.134, "step": 87380 }, { "epoch": 0.3326279089241263, "grad_norm": 0.11894545704126358, "learning_rate": 0.0005, "loss": 2.1228, "step": 87390 }, { "epoch": 0.33266597139224896, "grad_norm": 0.12392428517341614, "learning_rate": 0.0005, "loss": 2.1416, "step": 87400 }, { "epoch": 0.3327040338603716, "grad_norm": 0.13159038126468658, "learning_rate": 0.0005, "loss": 2.1206, "step": 87410 }, { "epoch": 0.33274209632849433, "grad_norm": 0.12846173346042633, "learning_rate": 0.0005, "loss": 2.1241, "step": 87420 }, { "epoch": 0.332780158796617, "grad_norm": 0.12723685801029205, "learning_rate": 0.0005, "loss": 2.1171, "step": 87430 }, { "epoch": 0.3328182212647397, "grad_norm": 0.1315564215183258, "learning_rate": 0.0005, "loss": 2.135, "step": 87440 }, { "epoch": 0.33285628373286236, "grad_norm": 0.12111755460500717, "learning_rate": 0.0005, "loss": 2.1212, "step": 87450 }, { "epoch": 0.33289434620098507, "grad_norm": 0.1542283296585083, "learning_rate": 0.0005, "loss": 2.1317, "step": 87460 }, { "epoch": 0.3329324086691077, "grad_norm": 0.12875567376613617, "learning_rate": 0.0005, "loss": 2.1183, "step": 87470 }, { "epoch": 0.33297047113723044, "grad_norm": 0.1334831416606903, "learning_rate": 0.0005, "loss": 2.1261, "step": 87480 }, { "epoch": 0.3330085336053531, "grad_norm": 0.12709291279315948, "learning_rate": 0.0005, "loss": 2.1181, "step": 87490 }, { "epoch": 0.3330465960734758, "grad_norm": 0.12286421656608582, "learning_rate": 0.0005, "loss": 2.112, "step": 87500 }, { "epoch": 0.33308465854159847, "grad_norm": 0.1331913024187088, "learning_rate": 0.0005, "loss": 2.133, "step": 87510 }, { "epoch": 0.3331227210097212, "grad_norm": 0.1212693378329277, "learning_rate": 0.0005, "loss": 2.1273, "step": 87520 }, { "epoch": 0.33316078347784384, "grad_norm": 0.11731184273958206, "learning_rate": 0.0005, "loss": 2.1318, "step": 87530 }, { "epoch": 0.3331988459459665, "grad_norm": 0.12474881857633591, "learning_rate": 0.0005, "loss": 2.1302, "step": 87540 }, { "epoch": 0.3332369084140892, "grad_norm": 0.11683713644742966, "learning_rate": 0.0005, "loss": 2.1292, "step": 87550 }, { "epoch": 0.33327497088221186, "grad_norm": 0.12048343569040298, "learning_rate": 0.0005, "loss": 2.1225, "step": 87560 }, { "epoch": 0.3333130333503346, "grad_norm": 0.12834185361862183, "learning_rate": 0.0005, "loss": 2.1162, "step": 87570 }, { "epoch": 0.33335109581845723, "grad_norm": 0.11707880347967148, "learning_rate": 0.0005, "loss": 2.1152, "step": 87580 }, { "epoch": 0.33338915828657995, "grad_norm": 0.13019756972789764, "learning_rate": 0.0005, "loss": 2.1278, "step": 87590 }, { "epoch": 0.3334272207547026, "grad_norm": 0.12006811797618866, "learning_rate": 0.0005, "loss": 2.1342, "step": 87600 }, { "epoch": 0.3334652832228253, "grad_norm": 0.11884687095880508, "learning_rate": 0.0005, "loss": 2.1287, "step": 87610 }, { "epoch": 0.33350334569094797, "grad_norm": 0.13654226064682007, "learning_rate": 0.0005, "loss": 2.1222, "step": 87620 }, { "epoch": 0.3335414081590707, "grad_norm": 0.12057172507047653, "learning_rate": 0.0005, "loss": 2.1138, "step": 87630 }, { "epoch": 0.33357947062719334, "grad_norm": 0.12495489418506622, "learning_rate": 0.0005, "loss": 2.1125, "step": 87640 }, { "epoch": 0.33361753309531605, "grad_norm": 0.11863405257463455, "learning_rate": 0.0005, "loss": 2.1163, "step": 87650 }, { "epoch": 0.3336555955634387, "grad_norm": 0.12780635058879852, "learning_rate": 0.0005, "loss": 2.1294, "step": 87660 }, { "epoch": 0.3336936580315614, "grad_norm": 0.13394132256507874, "learning_rate": 0.0005, "loss": 2.1143, "step": 87670 }, { "epoch": 0.3337317204996841, "grad_norm": 0.13047321140766144, "learning_rate": 0.0005, "loss": 2.1064, "step": 87680 }, { "epoch": 0.33376978296780674, "grad_norm": 0.1311761736869812, "learning_rate": 0.0005, "loss": 2.1129, "step": 87690 }, { "epoch": 0.33380784543592945, "grad_norm": 0.11715999990701675, "learning_rate": 0.0005, "loss": 2.1351, "step": 87700 }, { "epoch": 0.3338459079040521, "grad_norm": 0.11198131740093231, "learning_rate": 0.0005, "loss": 2.1222, "step": 87710 }, { "epoch": 0.3338839703721748, "grad_norm": 0.12425164878368378, "learning_rate": 0.0005, "loss": 2.1108, "step": 87720 }, { "epoch": 0.3339220328402975, "grad_norm": 0.12112827599048615, "learning_rate": 0.0005, "loss": 2.137, "step": 87730 }, { "epoch": 0.3339600953084202, "grad_norm": 0.11413715034723282, "learning_rate": 0.0005, "loss": 2.1249, "step": 87740 }, { "epoch": 0.33399815777654285, "grad_norm": 0.13012996315956116, "learning_rate": 0.0005, "loss": 2.1243, "step": 87750 }, { "epoch": 0.33403622024466556, "grad_norm": 0.12076626718044281, "learning_rate": 0.0005, "loss": 2.1294, "step": 87760 }, { "epoch": 0.3340742827127882, "grad_norm": 0.12360948324203491, "learning_rate": 0.0005, "loss": 2.1276, "step": 87770 }, { "epoch": 0.33411234518091093, "grad_norm": 0.11759869009256363, "learning_rate": 0.0005, "loss": 2.1313, "step": 87780 }, { "epoch": 0.3341504076490336, "grad_norm": 0.1118142306804657, "learning_rate": 0.0005, "loss": 2.1187, "step": 87790 }, { "epoch": 0.3341884701171563, "grad_norm": 0.13246989250183105, "learning_rate": 0.0005, "loss": 2.1085, "step": 87800 }, { "epoch": 0.33422653258527896, "grad_norm": 0.11729934811592102, "learning_rate": 0.0005, "loss": 2.1186, "step": 87810 }, { "epoch": 0.33426459505340167, "grad_norm": 0.14080481231212616, "learning_rate": 0.0005, "loss": 2.1313, "step": 87820 }, { "epoch": 0.3343026575215243, "grad_norm": 0.11630220711231232, "learning_rate": 0.0005, "loss": 2.1208, "step": 87830 }, { "epoch": 0.334340719989647, "grad_norm": 0.13849031925201416, "learning_rate": 0.0005, "loss": 2.1132, "step": 87840 }, { "epoch": 0.3343787824577697, "grad_norm": 0.12446040660142899, "learning_rate": 0.0005, "loss": 2.1177, "step": 87850 }, { "epoch": 0.33441684492589235, "grad_norm": 0.11606734991073608, "learning_rate": 0.0005, "loss": 2.1121, "step": 87860 }, { "epoch": 0.33445490739401507, "grad_norm": 0.1261448860168457, "learning_rate": 0.0005, "loss": 2.1107, "step": 87870 }, { "epoch": 0.3344929698621377, "grad_norm": 0.12158872187137604, "learning_rate": 0.0005, "loss": 2.1234, "step": 87880 }, { "epoch": 0.33453103233026044, "grad_norm": 0.1390082687139511, "learning_rate": 0.0005, "loss": 2.1325, "step": 87890 }, { "epoch": 0.3345690947983831, "grad_norm": 0.13956613838672638, "learning_rate": 0.0005, "loss": 2.1123, "step": 87900 }, { "epoch": 0.3346071572665058, "grad_norm": 0.13154958188533783, "learning_rate": 0.0005, "loss": 2.1222, "step": 87910 }, { "epoch": 0.33464521973462846, "grad_norm": 0.12404187023639679, "learning_rate": 0.0005, "loss": 2.1157, "step": 87920 }, { "epoch": 0.3346832822027512, "grad_norm": 0.1383303999900818, "learning_rate": 0.0005, "loss": 2.128, "step": 87930 }, { "epoch": 0.33472134467087383, "grad_norm": 0.11572207510471344, "learning_rate": 0.0005, "loss": 2.1131, "step": 87940 }, { "epoch": 0.33475940713899655, "grad_norm": 0.1205492839217186, "learning_rate": 0.0005, "loss": 2.1138, "step": 87950 }, { "epoch": 0.3347974696071192, "grad_norm": 0.1155189797282219, "learning_rate": 0.0005, "loss": 2.122, "step": 87960 }, { "epoch": 0.33483553207524186, "grad_norm": 0.12543031573295593, "learning_rate": 0.0005, "loss": 2.1139, "step": 87970 }, { "epoch": 0.33487359454336457, "grad_norm": 0.1279943287372589, "learning_rate": 0.0005, "loss": 2.1247, "step": 87980 }, { "epoch": 0.33491165701148723, "grad_norm": 0.1298796683549881, "learning_rate": 0.0005, "loss": 2.1197, "step": 87990 }, { "epoch": 0.33494971947960994, "grad_norm": 0.12139620631933212, "learning_rate": 0.0005, "loss": 2.1214, "step": 88000 }, { "epoch": 0.3349877819477326, "grad_norm": 0.12247146666049957, "learning_rate": 0.0005, "loss": 2.1198, "step": 88010 }, { "epoch": 0.3350258444158553, "grad_norm": 0.12139434367418289, "learning_rate": 0.0005, "loss": 2.1395, "step": 88020 }, { "epoch": 0.33506390688397797, "grad_norm": 0.12310953438282013, "learning_rate": 0.0005, "loss": 2.1059, "step": 88030 }, { "epoch": 0.3351019693521007, "grad_norm": 0.12263503670692444, "learning_rate": 0.0005, "loss": 2.1309, "step": 88040 }, { "epoch": 0.33514003182022334, "grad_norm": 0.12221545726060867, "learning_rate": 0.0005, "loss": 2.1193, "step": 88050 }, { "epoch": 0.33517809428834605, "grad_norm": 0.11760221421718597, "learning_rate": 0.0005, "loss": 2.1095, "step": 88060 }, { "epoch": 0.3352161567564687, "grad_norm": 0.13482467830181122, "learning_rate": 0.0005, "loss": 2.1307, "step": 88070 }, { "epoch": 0.3352542192245914, "grad_norm": 0.11433924734592438, "learning_rate": 0.0005, "loss": 2.1199, "step": 88080 }, { "epoch": 0.3352922816927141, "grad_norm": 0.12347762286663055, "learning_rate": 0.0005, "loss": 2.1314, "step": 88090 }, { "epoch": 0.3353303441608368, "grad_norm": 0.12823589146137238, "learning_rate": 0.0005, "loss": 2.1129, "step": 88100 }, { "epoch": 0.33536840662895945, "grad_norm": 0.12408977746963501, "learning_rate": 0.0005, "loss": 2.1294, "step": 88110 }, { "epoch": 0.3354064690970821, "grad_norm": 0.12411817908287048, "learning_rate": 0.0005, "loss": 2.1318, "step": 88120 }, { "epoch": 0.3354445315652048, "grad_norm": 0.13858748972415924, "learning_rate": 0.0005, "loss": 2.1232, "step": 88130 }, { "epoch": 0.3354825940333275, "grad_norm": 0.13134264945983887, "learning_rate": 0.0005, "loss": 2.1168, "step": 88140 }, { "epoch": 0.3355206565014502, "grad_norm": 0.12682613730430603, "learning_rate": 0.0005, "loss": 2.1301, "step": 88150 }, { "epoch": 0.33555871896957284, "grad_norm": 0.12588344514369965, "learning_rate": 0.0005, "loss": 2.1136, "step": 88160 }, { "epoch": 0.33559678143769556, "grad_norm": 0.11390385776758194, "learning_rate": 0.0005, "loss": 2.126, "step": 88170 }, { "epoch": 0.3356348439058182, "grad_norm": 0.13311776518821716, "learning_rate": 0.0005, "loss": 2.1286, "step": 88180 }, { "epoch": 0.3356729063739409, "grad_norm": 0.1160586029291153, "learning_rate": 0.0005, "loss": 2.1156, "step": 88190 }, { "epoch": 0.3357109688420636, "grad_norm": 0.13641859591007233, "learning_rate": 0.0005, "loss": 2.142, "step": 88200 }, { "epoch": 0.3357490313101863, "grad_norm": 0.12794005870819092, "learning_rate": 0.0005, "loss": 2.1311, "step": 88210 }, { "epoch": 0.33578709377830895, "grad_norm": 0.11959764361381531, "learning_rate": 0.0005, "loss": 2.1053, "step": 88220 }, { "epoch": 0.33582515624643167, "grad_norm": 0.1298590749502182, "learning_rate": 0.0005, "loss": 2.1023, "step": 88230 }, { "epoch": 0.3358632187145543, "grad_norm": 0.11784891039133072, "learning_rate": 0.0005, "loss": 2.1076, "step": 88240 }, { "epoch": 0.33590128118267704, "grad_norm": 0.13400663435459137, "learning_rate": 0.0005, "loss": 2.1153, "step": 88250 }, { "epoch": 0.3359393436507997, "grad_norm": 0.13214954733848572, "learning_rate": 0.0005, "loss": 2.1317, "step": 88260 }, { "epoch": 0.33597740611892235, "grad_norm": 0.13531705737113953, "learning_rate": 0.0005, "loss": 2.1263, "step": 88270 }, { "epoch": 0.33601546858704506, "grad_norm": 0.1262243390083313, "learning_rate": 0.0005, "loss": 2.1144, "step": 88280 }, { "epoch": 0.3360535310551677, "grad_norm": 0.11508592963218689, "learning_rate": 0.0005, "loss": 2.1253, "step": 88290 }, { "epoch": 0.33609159352329043, "grad_norm": 0.1250382363796234, "learning_rate": 0.0005, "loss": 2.1227, "step": 88300 }, { "epoch": 0.3361296559914131, "grad_norm": 0.11667264252901077, "learning_rate": 0.0005, "loss": 2.1265, "step": 88310 }, { "epoch": 0.3361677184595358, "grad_norm": 0.11802863329648972, "learning_rate": 0.0005, "loss": 2.1248, "step": 88320 }, { "epoch": 0.33620578092765846, "grad_norm": 0.11546743661165237, "learning_rate": 0.0005, "loss": 2.1209, "step": 88330 }, { "epoch": 0.3362438433957812, "grad_norm": 0.13666917383670807, "learning_rate": 0.0005, "loss": 2.1152, "step": 88340 }, { "epoch": 0.33628190586390383, "grad_norm": 0.13605958223342896, "learning_rate": 0.0005, "loss": 2.1149, "step": 88350 }, { "epoch": 0.33631996833202654, "grad_norm": 0.12794260680675507, "learning_rate": 0.0005, "loss": 2.1343, "step": 88360 }, { "epoch": 0.3363580308001492, "grad_norm": 0.11411519348621368, "learning_rate": 0.0005, "loss": 2.1192, "step": 88370 }, { "epoch": 0.3363960932682719, "grad_norm": 0.11841807514429092, "learning_rate": 0.0005, "loss": 2.1214, "step": 88380 }, { "epoch": 0.33643415573639457, "grad_norm": 0.10895001888275146, "learning_rate": 0.0005, "loss": 2.1479, "step": 88390 }, { "epoch": 0.3364722182045172, "grad_norm": 0.12098965793848038, "learning_rate": 0.0005, "loss": 2.1272, "step": 88400 }, { "epoch": 0.33651028067263994, "grad_norm": 0.13581803441047668, "learning_rate": 0.0005, "loss": 2.1269, "step": 88410 }, { "epoch": 0.3365483431407626, "grad_norm": 0.12655942142009735, "learning_rate": 0.0005, "loss": 2.1144, "step": 88420 }, { "epoch": 0.3365864056088853, "grad_norm": 0.12174165993928909, "learning_rate": 0.0005, "loss": 2.13, "step": 88430 }, { "epoch": 0.33662446807700797, "grad_norm": 0.14150436222553253, "learning_rate": 0.0005, "loss": 2.1344, "step": 88440 }, { "epoch": 0.3366625305451307, "grad_norm": 0.12937967479228973, "learning_rate": 0.0005, "loss": 2.133, "step": 88450 }, { "epoch": 0.33670059301325334, "grad_norm": 0.12161042541265488, "learning_rate": 0.0005, "loss": 2.1261, "step": 88460 }, { "epoch": 0.33673865548137605, "grad_norm": 0.1310884952545166, "learning_rate": 0.0005, "loss": 2.1174, "step": 88470 }, { "epoch": 0.3367767179494987, "grad_norm": 0.1272687315940857, "learning_rate": 0.0005, "loss": 2.129, "step": 88480 }, { "epoch": 0.3368147804176214, "grad_norm": 0.12220699340105057, "learning_rate": 0.0005, "loss": 2.1152, "step": 88490 }, { "epoch": 0.3368528428857441, "grad_norm": 0.1358078271150589, "learning_rate": 0.0005, "loss": 2.1389, "step": 88500 }, { "epoch": 0.3368909053538668, "grad_norm": 0.11216282099485397, "learning_rate": 0.0005, "loss": 2.1222, "step": 88510 }, { "epoch": 0.33692896782198944, "grad_norm": 0.12000705301761627, "learning_rate": 0.0005, "loss": 2.1101, "step": 88520 }, { "epoch": 0.33696703029011216, "grad_norm": 0.13831983506679535, "learning_rate": 0.0005, "loss": 2.125, "step": 88530 }, { "epoch": 0.3370050927582348, "grad_norm": 0.11740361899137497, "learning_rate": 0.0005, "loss": 2.1224, "step": 88540 }, { "epoch": 0.33704315522635747, "grad_norm": 0.11932548135519028, "learning_rate": 0.0005, "loss": 2.122, "step": 88550 }, { "epoch": 0.3370812176944802, "grad_norm": 0.11470109224319458, "learning_rate": 0.0005, "loss": 2.111, "step": 88560 }, { "epoch": 0.33711928016260284, "grad_norm": 0.1229340210556984, "learning_rate": 0.0005, "loss": 2.1151, "step": 88570 }, { "epoch": 0.33715734263072555, "grad_norm": 0.12469479441642761, "learning_rate": 0.0005, "loss": 2.1148, "step": 88580 }, { "epoch": 0.3371954050988482, "grad_norm": 0.12042783200740814, "learning_rate": 0.0005, "loss": 2.1168, "step": 88590 }, { "epoch": 0.3372334675669709, "grad_norm": 0.12501518428325653, "learning_rate": 0.0005, "loss": 2.1298, "step": 88600 }, { "epoch": 0.3372715300350936, "grad_norm": 0.12341202050447464, "learning_rate": 0.0005, "loss": 2.125, "step": 88610 }, { "epoch": 0.3373095925032163, "grad_norm": 0.1101434975862503, "learning_rate": 0.0005, "loss": 2.1248, "step": 88620 }, { "epoch": 0.33734765497133895, "grad_norm": 0.13599303364753723, "learning_rate": 0.0005, "loss": 2.1393, "step": 88630 }, { "epoch": 0.33738571743946166, "grad_norm": 0.1298111379146576, "learning_rate": 0.0005, "loss": 2.1288, "step": 88640 }, { "epoch": 0.3374237799075843, "grad_norm": 0.1254395842552185, "learning_rate": 0.0005, "loss": 2.1287, "step": 88650 }, { "epoch": 0.33746184237570703, "grad_norm": 0.13073401153087616, "learning_rate": 0.0005, "loss": 2.1323, "step": 88660 }, { "epoch": 0.3374999048438297, "grad_norm": 0.12592065334320068, "learning_rate": 0.0005, "loss": 2.1146, "step": 88670 }, { "epoch": 0.3375379673119524, "grad_norm": 0.13244742155075073, "learning_rate": 0.0005, "loss": 2.1178, "step": 88680 }, { "epoch": 0.33757602978007506, "grad_norm": 0.12912435829639435, "learning_rate": 0.0005, "loss": 2.1079, "step": 88690 }, { "epoch": 0.3376140922481977, "grad_norm": 0.13592544198036194, "learning_rate": 0.0005, "loss": 2.1244, "step": 88700 }, { "epoch": 0.33765215471632043, "grad_norm": 0.1403253972530365, "learning_rate": 0.0005, "loss": 2.1211, "step": 88710 }, { "epoch": 0.3376902171844431, "grad_norm": 0.14164945483207703, "learning_rate": 0.0005, "loss": 2.1096, "step": 88720 }, { "epoch": 0.3377282796525658, "grad_norm": 0.12777163088321686, "learning_rate": 0.0005, "loss": 2.1229, "step": 88730 }, { "epoch": 0.33776634212068846, "grad_norm": 0.11780541390180588, "learning_rate": 0.0005, "loss": 2.1275, "step": 88740 }, { "epoch": 0.33780440458881117, "grad_norm": 0.1289723813533783, "learning_rate": 0.0005, "loss": 2.143, "step": 88750 }, { "epoch": 0.3378424670569338, "grad_norm": 0.12288056313991547, "learning_rate": 0.0005, "loss": 2.1302, "step": 88760 }, { "epoch": 0.33788052952505654, "grad_norm": 0.11306439340114594, "learning_rate": 0.0005, "loss": 2.1226, "step": 88770 }, { "epoch": 0.3379185919931792, "grad_norm": 0.12616066634655, "learning_rate": 0.0005, "loss": 2.1183, "step": 88780 }, { "epoch": 0.3379566544613019, "grad_norm": 0.12859830260276794, "learning_rate": 0.0005, "loss": 2.127, "step": 88790 }, { "epoch": 0.33799471692942457, "grad_norm": 0.11339247971773148, "learning_rate": 0.0005, "loss": 2.1284, "step": 88800 }, { "epoch": 0.3380327793975473, "grad_norm": 0.1621411293745041, "learning_rate": 0.0005, "loss": 2.1262, "step": 88810 }, { "epoch": 0.33807084186566994, "grad_norm": 0.11376577615737915, "learning_rate": 0.0005, "loss": 2.1176, "step": 88820 }, { "epoch": 0.33810890433379265, "grad_norm": 0.14351233839988708, "learning_rate": 0.0005, "loss": 2.1068, "step": 88830 }, { "epoch": 0.3381469668019153, "grad_norm": 0.12574923038482666, "learning_rate": 0.0005, "loss": 2.1183, "step": 88840 }, { "epoch": 0.33818502927003796, "grad_norm": 0.1207105964422226, "learning_rate": 0.0005, "loss": 2.1192, "step": 88850 }, { "epoch": 0.3382230917381607, "grad_norm": 0.128650963306427, "learning_rate": 0.0005, "loss": 2.1156, "step": 88860 }, { "epoch": 0.33826115420628333, "grad_norm": 0.13034354150295258, "learning_rate": 0.0005, "loss": 2.1261, "step": 88870 }, { "epoch": 0.33829921667440604, "grad_norm": 0.1216464415192604, "learning_rate": 0.0005, "loss": 2.1293, "step": 88880 }, { "epoch": 0.3383372791425287, "grad_norm": 0.12070560455322266, "learning_rate": 0.0005, "loss": 2.1375, "step": 88890 }, { "epoch": 0.3383753416106514, "grad_norm": 0.11654239892959595, "learning_rate": 0.0005, "loss": 2.1085, "step": 88900 }, { "epoch": 0.33841340407877407, "grad_norm": 0.13917605578899384, "learning_rate": 0.0005, "loss": 2.1379, "step": 88910 }, { "epoch": 0.3384514665468968, "grad_norm": 0.10812585055828094, "learning_rate": 0.0005, "loss": 2.1275, "step": 88920 }, { "epoch": 0.33848952901501944, "grad_norm": 0.11935912817716599, "learning_rate": 0.0005, "loss": 2.1376, "step": 88930 }, { "epoch": 0.33852759148314215, "grad_norm": 0.1279357522726059, "learning_rate": 0.0005, "loss": 2.1144, "step": 88940 }, { "epoch": 0.3385656539512648, "grad_norm": 0.1319705843925476, "learning_rate": 0.0005, "loss": 2.1335, "step": 88950 }, { "epoch": 0.3386037164193875, "grad_norm": 0.129435196518898, "learning_rate": 0.0005, "loss": 2.1319, "step": 88960 }, { "epoch": 0.3386417788875102, "grad_norm": 0.13411614298820496, "learning_rate": 0.0005, "loss": 2.1199, "step": 88970 }, { "epoch": 0.33867984135563284, "grad_norm": 0.12597118318080902, "learning_rate": 0.0005, "loss": 2.1412, "step": 88980 }, { "epoch": 0.33871790382375555, "grad_norm": 0.1208195760846138, "learning_rate": 0.0005, "loss": 2.1174, "step": 88990 }, { "epoch": 0.3387559662918782, "grad_norm": 0.12813125550746918, "learning_rate": 0.0005, "loss": 2.1083, "step": 89000 }, { "epoch": 0.3387940287600009, "grad_norm": 0.11439248919487, "learning_rate": 0.0005, "loss": 2.1196, "step": 89010 }, { "epoch": 0.3388320912281236, "grad_norm": 0.12252800166606903, "learning_rate": 0.0005, "loss": 2.1267, "step": 89020 }, { "epoch": 0.3388701536962463, "grad_norm": 0.12275537848472595, "learning_rate": 0.0005, "loss": 2.1058, "step": 89030 }, { "epoch": 0.33890821616436895, "grad_norm": 0.12595809996128082, "learning_rate": 0.0005, "loss": 2.1146, "step": 89040 }, { "epoch": 0.33894627863249166, "grad_norm": 0.12871016561985016, "learning_rate": 0.0005, "loss": 2.1236, "step": 89050 }, { "epoch": 0.3389843411006143, "grad_norm": 0.16490459442138672, "learning_rate": 0.0005, "loss": 2.1192, "step": 89060 }, { "epoch": 0.33902240356873703, "grad_norm": 0.16081076860427856, "learning_rate": 0.0005, "loss": 2.1337, "step": 89070 }, { "epoch": 0.3390604660368597, "grad_norm": 0.12105927616357803, "learning_rate": 0.0005, "loss": 2.1102, "step": 89080 }, { "epoch": 0.3390985285049824, "grad_norm": 0.1160271167755127, "learning_rate": 0.0005, "loss": 2.1396, "step": 89090 }, { "epoch": 0.33913659097310506, "grad_norm": 0.11818355321884155, "learning_rate": 0.0005, "loss": 2.132, "step": 89100 }, { "epoch": 0.33917465344122777, "grad_norm": 0.12066550552845001, "learning_rate": 0.0005, "loss": 2.1195, "step": 89110 }, { "epoch": 0.3392127159093504, "grad_norm": 0.11974430084228516, "learning_rate": 0.0005, "loss": 2.1211, "step": 89120 }, { "epoch": 0.3392507783774731, "grad_norm": 0.1406737118959427, "learning_rate": 0.0005, "loss": 2.1155, "step": 89130 }, { "epoch": 0.3392888408455958, "grad_norm": 0.13470028340816498, "learning_rate": 0.0005, "loss": 2.1259, "step": 89140 }, { "epoch": 0.33932690331371845, "grad_norm": 0.13147686421871185, "learning_rate": 0.0005, "loss": 2.1142, "step": 89150 }, { "epoch": 0.33936496578184117, "grad_norm": 0.11957060545682907, "learning_rate": 0.0005, "loss": 2.1246, "step": 89160 }, { "epoch": 0.3394030282499638, "grad_norm": 0.12157806009054184, "learning_rate": 0.0005, "loss": 2.1179, "step": 89170 }, { "epoch": 0.33944109071808654, "grad_norm": 0.1386110782623291, "learning_rate": 0.0005, "loss": 2.1206, "step": 89180 }, { "epoch": 0.3394791531862092, "grad_norm": 0.11687417328357697, "learning_rate": 0.0005, "loss": 2.1158, "step": 89190 }, { "epoch": 0.3395172156543319, "grad_norm": 0.1264970302581787, "learning_rate": 0.0005, "loss": 2.1444, "step": 89200 }, { "epoch": 0.33955527812245456, "grad_norm": 0.13229866325855255, "learning_rate": 0.0005, "loss": 2.1236, "step": 89210 }, { "epoch": 0.3395933405905773, "grad_norm": 0.14732640981674194, "learning_rate": 0.0005, "loss": 2.1259, "step": 89220 }, { "epoch": 0.33963140305869993, "grad_norm": 0.12414790689945221, "learning_rate": 0.0005, "loss": 2.1229, "step": 89230 }, { "epoch": 0.33966946552682264, "grad_norm": 0.13430891931056976, "learning_rate": 0.0005, "loss": 2.1351, "step": 89240 }, { "epoch": 0.3397075279949453, "grad_norm": 0.11227301508188248, "learning_rate": 0.0005, "loss": 2.1197, "step": 89250 }, { "epoch": 0.339745590463068, "grad_norm": 0.14740775525569916, "learning_rate": 0.0005, "loss": 2.1353, "step": 89260 }, { "epoch": 0.33978365293119067, "grad_norm": 0.1203657016158104, "learning_rate": 0.0005, "loss": 2.1227, "step": 89270 }, { "epoch": 0.33982171539931333, "grad_norm": 0.12663409113883972, "learning_rate": 0.0005, "loss": 2.1268, "step": 89280 }, { "epoch": 0.33985977786743604, "grad_norm": 0.12828192114830017, "learning_rate": 0.0005, "loss": 2.1053, "step": 89290 }, { "epoch": 0.3398978403355587, "grad_norm": 0.1220042034983635, "learning_rate": 0.0005, "loss": 2.1223, "step": 89300 }, { "epoch": 0.3399359028036814, "grad_norm": 0.13182102143764496, "learning_rate": 0.0005, "loss": 2.1128, "step": 89310 }, { "epoch": 0.33997396527180407, "grad_norm": 0.13868823647499084, "learning_rate": 0.0005, "loss": 2.1287, "step": 89320 }, { "epoch": 0.3400120277399268, "grad_norm": 0.11847683787345886, "learning_rate": 0.0005, "loss": 2.1272, "step": 89330 }, { "epoch": 0.34005009020804944, "grad_norm": 0.11458242684602737, "learning_rate": 0.0005, "loss": 2.1331, "step": 89340 }, { "epoch": 0.34008815267617215, "grad_norm": 0.12344954907894135, "learning_rate": 0.0005, "loss": 2.1283, "step": 89350 }, { "epoch": 0.3401262151442948, "grad_norm": 0.12169872969388962, "learning_rate": 0.0005, "loss": 2.1105, "step": 89360 }, { "epoch": 0.3401642776124175, "grad_norm": 0.1187531128525734, "learning_rate": 0.0005, "loss": 2.1264, "step": 89370 }, { "epoch": 0.3402023400805402, "grad_norm": 0.1197567880153656, "learning_rate": 0.0005, "loss": 2.1139, "step": 89380 }, { "epoch": 0.3402404025486629, "grad_norm": 0.11214889585971832, "learning_rate": 0.0005, "loss": 2.1317, "step": 89390 }, { "epoch": 0.34027846501678555, "grad_norm": 0.12737496197223663, "learning_rate": 0.0005, "loss": 2.1167, "step": 89400 }, { "epoch": 0.3403165274849082, "grad_norm": 0.12622858583927155, "learning_rate": 0.0005, "loss": 2.118, "step": 89410 }, { "epoch": 0.3403545899530309, "grad_norm": 0.12642420828342438, "learning_rate": 0.0005, "loss": 2.1289, "step": 89420 }, { "epoch": 0.3403926524211536, "grad_norm": 0.1310669183731079, "learning_rate": 0.0005, "loss": 2.1288, "step": 89430 }, { "epoch": 0.3404307148892763, "grad_norm": 0.22463572025299072, "learning_rate": 0.0005, "loss": 2.1124, "step": 89440 }, { "epoch": 0.34046877735739894, "grad_norm": 0.12545807659626007, "learning_rate": 0.0005, "loss": 2.1315, "step": 89450 }, { "epoch": 0.34050683982552166, "grad_norm": 0.13150864839553833, "learning_rate": 0.0005, "loss": 2.1397, "step": 89460 }, { "epoch": 0.3405449022936443, "grad_norm": 0.12236011028289795, "learning_rate": 0.0005, "loss": 2.1336, "step": 89470 }, { "epoch": 0.340582964761767, "grad_norm": 0.1419258564710617, "learning_rate": 0.0005, "loss": 2.1126, "step": 89480 }, { "epoch": 0.3406210272298897, "grad_norm": 0.13559618592262268, "learning_rate": 0.0005, "loss": 2.1148, "step": 89490 }, { "epoch": 0.3406590896980124, "grad_norm": 0.13465294241905212, "learning_rate": 0.0005, "loss": 2.1071, "step": 89500 }, { "epoch": 0.34069715216613505, "grad_norm": 0.11645340174436569, "learning_rate": 0.0005, "loss": 2.1206, "step": 89510 }, { "epoch": 0.34073521463425777, "grad_norm": 0.1335042268037796, "learning_rate": 0.0005, "loss": 2.1028, "step": 89520 }, { "epoch": 0.3407732771023804, "grad_norm": 0.11522427201271057, "learning_rate": 0.0005, "loss": 2.1294, "step": 89530 }, { "epoch": 0.34081133957050314, "grad_norm": 0.11598663032054901, "learning_rate": 0.0005, "loss": 2.1244, "step": 89540 }, { "epoch": 0.3408494020386258, "grad_norm": 0.1153661459684372, "learning_rate": 0.0005, "loss": 2.1358, "step": 89550 }, { "epoch": 0.34088746450674845, "grad_norm": 0.13112501800060272, "learning_rate": 0.0005, "loss": 2.1275, "step": 89560 }, { "epoch": 0.34092552697487116, "grad_norm": 0.15203246474266052, "learning_rate": 0.0005, "loss": 2.0977, "step": 89570 }, { "epoch": 0.3409635894429938, "grad_norm": 0.11616481095552444, "learning_rate": 0.0005, "loss": 2.123, "step": 89580 }, { "epoch": 0.34100165191111653, "grad_norm": 0.11862931400537491, "learning_rate": 0.0005, "loss": 2.1372, "step": 89590 }, { "epoch": 0.3410397143792392, "grad_norm": 0.1210283637046814, "learning_rate": 0.0005, "loss": 2.1138, "step": 89600 }, { "epoch": 0.3410777768473619, "grad_norm": 0.13654400408267975, "learning_rate": 0.0005, "loss": 2.1242, "step": 89610 }, { "epoch": 0.34111583931548456, "grad_norm": 0.1305961310863495, "learning_rate": 0.0005, "loss": 2.1151, "step": 89620 }, { "epoch": 0.34115390178360727, "grad_norm": 0.12389110773801804, "learning_rate": 0.0005, "loss": 2.1386, "step": 89630 }, { "epoch": 0.34119196425172993, "grad_norm": 0.11891574412584305, "learning_rate": 0.0005, "loss": 2.1271, "step": 89640 }, { "epoch": 0.34123002671985264, "grad_norm": 0.1385021209716797, "learning_rate": 0.0005, "loss": 2.1274, "step": 89650 }, { "epoch": 0.3412680891879753, "grad_norm": 0.12695041298866272, "learning_rate": 0.0005, "loss": 2.1297, "step": 89660 }, { "epoch": 0.341306151656098, "grad_norm": 0.1301238238811493, "learning_rate": 0.0005, "loss": 2.113, "step": 89670 }, { "epoch": 0.34134421412422067, "grad_norm": 0.1111309602856636, "learning_rate": 0.0005, "loss": 2.1135, "step": 89680 }, { "epoch": 0.3413822765923434, "grad_norm": 0.11588682234287262, "learning_rate": 0.0005, "loss": 2.123, "step": 89690 }, { "epoch": 0.34142033906046604, "grad_norm": 0.12726850807666779, "learning_rate": 0.0005, "loss": 2.1194, "step": 89700 }, { "epoch": 0.3414584015285887, "grad_norm": 0.13112305104732513, "learning_rate": 0.0005, "loss": 2.1189, "step": 89710 }, { "epoch": 0.3414964639967114, "grad_norm": 0.12595675885677338, "learning_rate": 0.0005, "loss": 2.115, "step": 89720 }, { "epoch": 0.34153452646483407, "grad_norm": 0.1312045007944107, "learning_rate": 0.0005, "loss": 2.1178, "step": 89730 }, { "epoch": 0.3415725889329568, "grad_norm": 0.1294623613357544, "learning_rate": 0.0005, "loss": 2.1247, "step": 89740 }, { "epoch": 0.34161065140107943, "grad_norm": 0.11580775678157806, "learning_rate": 0.0005, "loss": 2.1333, "step": 89750 }, { "epoch": 0.34164871386920215, "grad_norm": 0.11577937006950378, "learning_rate": 0.0005, "loss": 2.1317, "step": 89760 }, { "epoch": 0.3416867763373248, "grad_norm": 0.13450708985328674, "learning_rate": 0.0005, "loss": 2.1218, "step": 89770 }, { "epoch": 0.3417248388054475, "grad_norm": 0.12490272521972656, "learning_rate": 0.0005, "loss": 2.1103, "step": 89780 }, { "epoch": 0.3417629012735702, "grad_norm": 0.14420974254608154, "learning_rate": 0.0005, "loss": 2.1368, "step": 89790 }, { "epoch": 0.3418009637416929, "grad_norm": 0.12905338406562805, "learning_rate": 0.0005, "loss": 2.1271, "step": 89800 }, { "epoch": 0.34183902620981554, "grad_norm": 0.12013151496648788, "learning_rate": 0.0005, "loss": 2.1189, "step": 89810 }, { "epoch": 0.34187708867793826, "grad_norm": 0.12324290722608566, "learning_rate": 0.0005, "loss": 2.1202, "step": 89820 }, { "epoch": 0.3419151511460609, "grad_norm": 0.12030022591352463, "learning_rate": 0.0005, "loss": 2.1151, "step": 89830 }, { "epoch": 0.34195321361418357, "grad_norm": 0.12494122236967087, "learning_rate": 0.0005, "loss": 2.1184, "step": 89840 }, { "epoch": 0.3419912760823063, "grad_norm": 0.11866031587123871, "learning_rate": 0.0005, "loss": 2.1172, "step": 89850 }, { "epoch": 0.34202933855042894, "grad_norm": 0.12402195483446121, "learning_rate": 0.0005, "loss": 2.1238, "step": 89860 }, { "epoch": 0.34206740101855165, "grad_norm": 0.12285647541284561, "learning_rate": 0.0005, "loss": 2.1272, "step": 89870 }, { "epoch": 0.3421054634866743, "grad_norm": 0.1358739733695984, "learning_rate": 0.0005, "loss": 2.1178, "step": 89880 }, { "epoch": 0.342143525954797, "grad_norm": 0.12848158180713654, "learning_rate": 0.0005, "loss": 2.1277, "step": 89890 }, { "epoch": 0.3421815884229197, "grad_norm": 0.1267717331647873, "learning_rate": 0.0005, "loss": 2.1171, "step": 89900 }, { "epoch": 0.3422196508910424, "grad_norm": 0.13753780722618103, "learning_rate": 0.0005, "loss": 2.1238, "step": 89910 }, { "epoch": 0.34225771335916505, "grad_norm": 0.12299558520317078, "learning_rate": 0.0005, "loss": 2.1159, "step": 89920 }, { "epoch": 0.34229577582728776, "grad_norm": 0.12542882561683655, "learning_rate": 0.0005, "loss": 2.1359, "step": 89930 }, { "epoch": 0.3423338382954104, "grad_norm": 0.14531518518924713, "learning_rate": 0.0005, "loss": 2.1186, "step": 89940 }, { "epoch": 0.34237190076353313, "grad_norm": 0.1264420747756958, "learning_rate": 0.0005, "loss": 2.1346, "step": 89950 }, { "epoch": 0.3424099632316558, "grad_norm": 0.12023679912090302, "learning_rate": 0.0005, "loss": 2.1383, "step": 89960 }, { "epoch": 0.3424480256997785, "grad_norm": 0.11134123802185059, "learning_rate": 0.0005, "loss": 2.1234, "step": 89970 }, { "epoch": 0.34248608816790116, "grad_norm": 0.12393639236688614, "learning_rate": 0.0005, "loss": 2.1263, "step": 89980 }, { "epoch": 0.3425241506360238, "grad_norm": 0.12131131440401077, "learning_rate": 0.0005, "loss": 2.1285, "step": 89990 }, { "epoch": 0.34256221310414653, "grad_norm": 0.12403381615877151, "learning_rate": 0.0005, "loss": 2.1062, "step": 90000 }, { "epoch": 0.3426002755722692, "grad_norm": 0.11932516098022461, "learning_rate": 0.0005, "loss": 2.126, "step": 90010 }, { "epoch": 0.3426383380403919, "grad_norm": 0.12505844235420227, "learning_rate": 0.0005, "loss": 2.1174, "step": 90020 }, { "epoch": 0.34267640050851456, "grad_norm": 0.13085445761680603, "learning_rate": 0.0005, "loss": 2.1339, "step": 90030 }, { "epoch": 0.34271446297663727, "grad_norm": 0.12016001343727112, "learning_rate": 0.0005, "loss": 2.1316, "step": 90040 }, { "epoch": 0.3427525254447599, "grad_norm": 0.13243511319160461, "learning_rate": 0.0005, "loss": 2.1385, "step": 90050 }, { "epoch": 0.34279058791288264, "grad_norm": 0.12323072552680969, "learning_rate": 0.0005, "loss": 2.144, "step": 90060 }, { "epoch": 0.3428286503810053, "grad_norm": 0.12400073558092117, "learning_rate": 0.0005, "loss": 2.1161, "step": 90070 }, { "epoch": 0.342866712849128, "grad_norm": 0.12671291828155518, "learning_rate": 0.0005, "loss": 2.1284, "step": 90080 }, { "epoch": 0.34290477531725067, "grad_norm": 0.1255476474761963, "learning_rate": 0.0005, "loss": 2.1233, "step": 90090 }, { "epoch": 0.3429428377853734, "grad_norm": 0.1331200748682022, "learning_rate": 0.0005, "loss": 2.1181, "step": 90100 }, { "epoch": 0.34298090025349603, "grad_norm": 0.12602712213993073, "learning_rate": 0.0005, "loss": 2.133, "step": 90110 }, { "epoch": 0.34301896272161875, "grad_norm": 0.12500569224357605, "learning_rate": 0.0005, "loss": 2.1184, "step": 90120 }, { "epoch": 0.3430570251897414, "grad_norm": 0.12748053669929504, "learning_rate": 0.0005, "loss": 2.1214, "step": 90130 }, { "epoch": 0.34309508765786406, "grad_norm": 0.12714222073554993, "learning_rate": 0.0005, "loss": 2.1187, "step": 90140 }, { "epoch": 0.3431331501259868, "grad_norm": 0.1465597301721573, "learning_rate": 0.0005, "loss": 2.1384, "step": 90150 }, { "epoch": 0.34317121259410943, "grad_norm": 0.13284499943256378, "learning_rate": 0.0005, "loss": 2.1199, "step": 90160 }, { "epoch": 0.34320927506223214, "grad_norm": 0.12137099355459213, "learning_rate": 0.0005, "loss": 2.1302, "step": 90170 }, { "epoch": 0.3432473375303548, "grad_norm": 0.12611745297908783, "learning_rate": 0.0005, "loss": 2.1236, "step": 90180 }, { "epoch": 0.3432853999984775, "grad_norm": 0.12008702009916306, "learning_rate": 0.0005, "loss": 2.1216, "step": 90190 }, { "epoch": 0.34332346246660017, "grad_norm": 0.12618735432624817, "learning_rate": 0.0005, "loss": 2.116, "step": 90200 }, { "epoch": 0.3433615249347229, "grad_norm": 0.13112717866897583, "learning_rate": 0.0005, "loss": 2.1263, "step": 90210 }, { "epoch": 0.34339958740284554, "grad_norm": 0.12244976311922073, "learning_rate": 0.0005, "loss": 2.1199, "step": 90220 }, { "epoch": 0.34343764987096825, "grad_norm": 0.12034299969673157, "learning_rate": 0.0005, "loss": 2.1061, "step": 90230 }, { "epoch": 0.3434757123390909, "grad_norm": 0.1241462454199791, "learning_rate": 0.0005, "loss": 2.1099, "step": 90240 }, { "epoch": 0.3435137748072136, "grad_norm": 0.11790863424539566, "learning_rate": 0.0005, "loss": 2.1154, "step": 90250 }, { "epoch": 0.3435518372753363, "grad_norm": 0.13333600759506226, "learning_rate": 0.0005, "loss": 2.1299, "step": 90260 }, { "epoch": 0.34358989974345894, "grad_norm": 0.1280602663755417, "learning_rate": 0.0005, "loss": 2.1348, "step": 90270 }, { "epoch": 0.34362796221158165, "grad_norm": 0.13984990119934082, "learning_rate": 0.0005, "loss": 2.1279, "step": 90280 }, { "epoch": 0.3436660246797043, "grad_norm": 0.1236758604645729, "learning_rate": 0.0005, "loss": 2.1278, "step": 90290 }, { "epoch": 0.343704087147827, "grad_norm": 0.11663752049207687, "learning_rate": 0.0005, "loss": 2.1221, "step": 90300 }, { "epoch": 0.3437421496159497, "grad_norm": 0.10988294333219528, "learning_rate": 0.0005, "loss": 2.116, "step": 90310 }, { "epoch": 0.3437802120840724, "grad_norm": 0.11985557526350021, "learning_rate": 0.0005, "loss": 2.1404, "step": 90320 }, { "epoch": 0.34381827455219505, "grad_norm": 0.13246393203735352, "learning_rate": 0.0005, "loss": 2.1186, "step": 90330 }, { "epoch": 0.34385633702031776, "grad_norm": 0.11569251865148544, "learning_rate": 0.0005, "loss": 2.1351, "step": 90340 }, { "epoch": 0.3438943994884404, "grad_norm": 0.12747006118297577, "learning_rate": 0.0005, "loss": 2.1395, "step": 90350 }, { "epoch": 0.34393246195656313, "grad_norm": 0.12384822964668274, "learning_rate": 0.0005, "loss": 2.1213, "step": 90360 }, { "epoch": 0.3439705244246858, "grad_norm": 0.1292153000831604, "learning_rate": 0.0005, "loss": 2.1276, "step": 90370 }, { "epoch": 0.3440085868928085, "grad_norm": 0.1250191479921341, "learning_rate": 0.0005, "loss": 2.1259, "step": 90380 }, { "epoch": 0.34404664936093116, "grad_norm": 0.12791825830936432, "learning_rate": 0.0005, "loss": 2.1377, "step": 90390 }, { "epoch": 0.34408471182905387, "grad_norm": 0.1329050213098526, "learning_rate": 0.0005, "loss": 2.1138, "step": 90400 }, { "epoch": 0.3441227742971765, "grad_norm": 0.12665830552577972, "learning_rate": 0.0005, "loss": 2.1225, "step": 90410 }, { "epoch": 0.3441608367652992, "grad_norm": 0.12122794985771179, "learning_rate": 0.0005, "loss": 2.1223, "step": 90420 }, { "epoch": 0.3441988992334219, "grad_norm": 0.12537881731987, "learning_rate": 0.0005, "loss": 2.1208, "step": 90430 }, { "epoch": 0.34423696170154455, "grad_norm": 0.11910541355609894, "learning_rate": 0.0005, "loss": 2.1359, "step": 90440 }, { "epoch": 0.34427502416966727, "grad_norm": 0.1352231651544571, "learning_rate": 0.0005, "loss": 2.1147, "step": 90450 }, { "epoch": 0.3443130866377899, "grad_norm": 0.1324281394481659, "learning_rate": 0.0005, "loss": 2.1176, "step": 90460 }, { "epoch": 0.34435114910591264, "grad_norm": 0.11365267634391785, "learning_rate": 0.0005, "loss": 2.1095, "step": 90470 }, { "epoch": 0.3443892115740353, "grad_norm": 0.12739905714988708, "learning_rate": 0.0005, "loss": 2.1299, "step": 90480 }, { "epoch": 0.344427274042158, "grad_norm": 0.11905629932880402, "learning_rate": 0.0005, "loss": 2.1142, "step": 90490 }, { "epoch": 0.34446533651028066, "grad_norm": 0.11469315737485886, "learning_rate": 0.0005, "loss": 2.1287, "step": 90500 }, { "epoch": 0.3445033989784034, "grad_norm": 0.11756088584661484, "learning_rate": 0.0005, "loss": 2.1284, "step": 90510 }, { "epoch": 0.34454146144652603, "grad_norm": 0.12300854176282883, "learning_rate": 0.0005, "loss": 2.1385, "step": 90520 }, { "epoch": 0.34457952391464874, "grad_norm": 0.1245260089635849, "learning_rate": 0.0005, "loss": 2.1159, "step": 90530 }, { "epoch": 0.3446175863827714, "grad_norm": 0.12596364319324493, "learning_rate": 0.0005, "loss": 2.122, "step": 90540 }, { "epoch": 0.3446556488508941, "grad_norm": 0.12460871785879135, "learning_rate": 0.0005, "loss": 2.1225, "step": 90550 }, { "epoch": 0.34469371131901677, "grad_norm": 0.1315487176179886, "learning_rate": 0.0005, "loss": 2.1298, "step": 90560 }, { "epoch": 0.34473177378713943, "grad_norm": 0.12347906827926636, "learning_rate": 0.0005, "loss": 2.1247, "step": 90570 }, { "epoch": 0.34476983625526214, "grad_norm": 0.11345997452735901, "learning_rate": 0.0005, "loss": 2.1126, "step": 90580 }, { "epoch": 0.3448078987233848, "grad_norm": 0.13715878129005432, "learning_rate": 0.0005, "loss": 2.1342, "step": 90590 }, { "epoch": 0.3448459611915075, "grad_norm": 0.13639040291309357, "learning_rate": 0.0005, "loss": 2.11, "step": 90600 }, { "epoch": 0.34488402365963017, "grad_norm": 0.1313633918762207, "learning_rate": 0.0005, "loss": 2.1246, "step": 90610 }, { "epoch": 0.3449220861277529, "grad_norm": 0.13886944949626923, "learning_rate": 0.0005, "loss": 2.1178, "step": 90620 }, { "epoch": 0.34496014859587554, "grad_norm": 0.12198083847761154, "learning_rate": 0.0005, "loss": 2.1176, "step": 90630 }, { "epoch": 0.34499821106399825, "grad_norm": 0.1195736899971962, "learning_rate": 0.0005, "loss": 2.1156, "step": 90640 }, { "epoch": 0.3450362735321209, "grad_norm": 0.12475568056106567, "learning_rate": 0.0005, "loss": 2.1213, "step": 90650 }, { "epoch": 0.3450743360002436, "grad_norm": 0.12828674912452698, "learning_rate": 0.0005, "loss": 2.111, "step": 90660 }, { "epoch": 0.3451123984683663, "grad_norm": 0.12407656759023666, "learning_rate": 0.0005, "loss": 2.1337, "step": 90670 }, { "epoch": 0.345150460936489, "grad_norm": 0.13938532769680023, "learning_rate": 0.0005, "loss": 2.1266, "step": 90680 }, { "epoch": 0.34518852340461165, "grad_norm": 0.11502964049577713, "learning_rate": 0.0005, "loss": 2.1119, "step": 90690 }, { "epoch": 0.3452265858727343, "grad_norm": 0.14421029388904572, "learning_rate": 0.0005, "loss": 2.1237, "step": 90700 }, { "epoch": 0.345264648340857, "grad_norm": 0.12795130908489227, "learning_rate": 0.0005, "loss": 2.1303, "step": 90710 }, { "epoch": 0.3453027108089797, "grad_norm": 0.11910250782966614, "learning_rate": 0.0005, "loss": 2.1294, "step": 90720 }, { "epoch": 0.3453407732771024, "grad_norm": 0.11697063595056534, "learning_rate": 0.0005, "loss": 2.1174, "step": 90730 }, { "epoch": 0.34537883574522504, "grad_norm": 0.12007717788219452, "learning_rate": 0.0005, "loss": 2.1271, "step": 90740 }, { "epoch": 0.34541689821334776, "grad_norm": 0.13864076137542725, "learning_rate": 0.0005, "loss": 2.1203, "step": 90750 }, { "epoch": 0.3454549606814704, "grad_norm": 0.13291937112808228, "learning_rate": 0.0005, "loss": 2.1183, "step": 90760 }, { "epoch": 0.3454930231495931, "grad_norm": 0.12697722017765045, "learning_rate": 0.0005, "loss": 2.1338, "step": 90770 }, { "epoch": 0.3455310856177158, "grad_norm": 0.11438869684934616, "learning_rate": 0.0005, "loss": 2.1268, "step": 90780 }, { "epoch": 0.3455691480858385, "grad_norm": 0.14781485497951508, "learning_rate": 0.0005, "loss": 2.1414, "step": 90790 }, { "epoch": 0.34560721055396115, "grad_norm": 0.13403859734535217, "learning_rate": 0.0005, "loss": 2.1212, "step": 90800 }, { "epoch": 0.34564527302208387, "grad_norm": 0.11257542669773102, "learning_rate": 0.0005, "loss": 2.1185, "step": 90810 }, { "epoch": 0.3456833354902065, "grad_norm": 0.1264079213142395, "learning_rate": 0.0005, "loss": 2.1133, "step": 90820 }, { "epoch": 0.34572139795832924, "grad_norm": 0.11463584005832672, "learning_rate": 0.0005, "loss": 2.1127, "step": 90830 }, { "epoch": 0.3457594604264519, "grad_norm": 0.1329016238451004, "learning_rate": 0.0005, "loss": 2.1065, "step": 90840 }, { "epoch": 0.34579752289457455, "grad_norm": 0.13102352619171143, "learning_rate": 0.0005, "loss": 2.1131, "step": 90850 }, { "epoch": 0.34583558536269726, "grad_norm": 0.12339334189891815, "learning_rate": 0.0005, "loss": 2.1311, "step": 90860 }, { "epoch": 0.3458736478308199, "grad_norm": 0.12185164541006088, "learning_rate": 0.0005, "loss": 2.1226, "step": 90870 }, { "epoch": 0.34591171029894263, "grad_norm": 0.13989120721817017, "learning_rate": 0.0005, "loss": 2.1282, "step": 90880 }, { "epoch": 0.3459497727670653, "grad_norm": 0.11912752687931061, "learning_rate": 0.0005, "loss": 2.1176, "step": 90890 }, { "epoch": 0.345987835235188, "grad_norm": 0.12951073050498962, "learning_rate": 0.0005, "loss": 2.1169, "step": 90900 }, { "epoch": 0.34602589770331066, "grad_norm": 0.11748843640089035, "learning_rate": 0.0005, "loss": 2.133, "step": 90910 }, { "epoch": 0.34606396017143337, "grad_norm": 0.12727637588977814, "learning_rate": 0.0005, "loss": 2.1381, "step": 90920 }, { "epoch": 0.34610202263955603, "grad_norm": 0.12068556994199753, "learning_rate": 0.0005, "loss": 2.1201, "step": 90930 }, { "epoch": 0.34614008510767874, "grad_norm": 0.11873731017112732, "learning_rate": 0.0005, "loss": 2.1327, "step": 90940 }, { "epoch": 0.3461781475758014, "grad_norm": 0.12427987158298492, "learning_rate": 0.0005, "loss": 2.1216, "step": 90950 }, { "epoch": 0.3462162100439241, "grad_norm": 0.13062496483325958, "learning_rate": 0.0005, "loss": 2.1223, "step": 90960 }, { "epoch": 0.34625427251204677, "grad_norm": 0.15881308913230896, "learning_rate": 0.0005, "loss": 2.1178, "step": 90970 }, { "epoch": 0.3462923349801695, "grad_norm": 0.12678012251853943, "learning_rate": 0.0005, "loss": 2.1214, "step": 90980 }, { "epoch": 0.34633039744829214, "grad_norm": 0.12431074678897858, "learning_rate": 0.0005, "loss": 2.1351, "step": 90990 }, { "epoch": 0.3463684599164148, "grad_norm": 0.1260952353477478, "learning_rate": 0.0005, "loss": 2.1323, "step": 91000 }, { "epoch": 0.3464065223845375, "grad_norm": 0.11722179502248764, "learning_rate": 0.0005, "loss": 2.1158, "step": 91010 }, { "epoch": 0.34644458485266016, "grad_norm": 0.12878122925758362, "learning_rate": 0.0005, "loss": 2.1247, "step": 91020 }, { "epoch": 0.3464826473207829, "grad_norm": 0.13498681783676147, "learning_rate": 0.0005, "loss": 2.1245, "step": 91030 }, { "epoch": 0.34652070978890553, "grad_norm": 0.12687091529369354, "learning_rate": 0.0005, "loss": 2.1099, "step": 91040 }, { "epoch": 0.34655877225702825, "grad_norm": 0.1323506087064743, "learning_rate": 0.0005, "loss": 2.1221, "step": 91050 }, { "epoch": 0.3465968347251509, "grad_norm": 0.12858256697654724, "learning_rate": 0.0005, "loss": 2.1162, "step": 91060 }, { "epoch": 0.3466348971932736, "grad_norm": 0.12089366465806961, "learning_rate": 0.0005, "loss": 2.1063, "step": 91070 }, { "epoch": 0.3466729596613963, "grad_norm": 0.1293216347694397, "learning_rate": 0.0005, "loss": 2.1071, "step": 91080 }, { "epoch": 0.346711022129519, "grad_norm": 0.12396867573261261, "learning_rate": 0.0005, "loss": 2.1227, "step": 91090 }, { "epoch": 0.34674908459764164, "grad_norm": 0.1382405161857605, "learning_rate": 0.0005, "loss": 2.1327, "step": 91100 }, { "epoch": 0.34678714706576436, "grad_norm": 0.13462446630001068, "learning_rate": 0.0005, "loss": 2.1278, "step": 91110 }, { "epoch": 0.346825209533887, "grad_norm": 0.1211245208978653, "learning_rate": 0.0005, "loss": 2.1065, "step": 91120 }, { "epoch": 0.3468632720020097, "grad_norm": 0.11953513324260712, "learning_rate": 0.0005, "loss": 2.1305, "step": 91130 }, { "epoch": 0.3469013344701324, "grad_norm": 0.1239633709192276, "learning_rate": 0.0005, "loss": 2.1303, "step": 91140 }, { "epoch": 0.34693939693825504, "grad_norm": 0.13313403725624084, "learning_rate": 0.0005, "loss": 2.1289, "step": 91150 }, { "epoch": 0.34697745940637775, "grad_norm": 0.13562420010566711, "learning_rate": 0.0005, "loss": 2.1228, "step": 91160 }, { "epoch": 0.3470155218745004, "grad_norm": 0.11659242957830429, "learning_rate": 0.0005, "loss": 2.1266, "step": 91170 }, { "epoch": 0.3470535843426231, "grad_norm": 0.1301209181547165, "learning_rate": 0.0005, "loss": 2.1218, "step": 91180 }, { "epoch": 0.3470916468107458, "grad_norm": 0.12229889631271362, "learning_rate": 0.0005, "loss": 2.1185, "step": 91190 }, { "epoch": 0.3471297092788685, "grad_norm": 0.12382488697767258, "learning_rate": 0.0005, "loss": 2.1112, "step": 91200 }, { "epoch": 0.34716777174699115, "grad_norm": 0.1378564089536667, "learning_rate": 0.0005, "loss": 2.1203, "step": 91210 }, { "epoch": 0.34720583421511386, "grad_norm": 0.1293146312236786, "learning_rate": 0.0005, "loss": 2.118, "step": 91220 }, { "epoch": 0.3472438966832365, "grad_norm": 0.11996651440858841, "learning_rate": 0.0005, "loss": 2.1244, "step": 91230 }, { "epoch": 0.34728195915135923, "grad_norm": 0.12518168985843658, "learning_rate": 0.0005, "loss": 2.1293, "step": 91240 }, { "epoch": 0.3473200216194819, "grad_norm": 0.12766574323177338, "learning_rate": 0.0005, "loss": 2.127, "step": 91250 }, { "epoch": 0.3473580840876046, "grad_norm": 0.1208205297589302, "learning_rate": 0.0005, "loss": 2.1336, "step": 91260 }, { "epoch": 0.34739614655572726, "grad_norm": 0.11901720613241196, "learning_rate": 0.0005, "loss": 2.1367, "step": 91270 }, { "epoch": 0.3474342090238499, "grad_norm": 0.14112527668476105, "learning_rate": 0.0005, "loss": 2.1211, "step": 91280 }, { "epoch": 0.34747227149197263, "grad_norm": 0.12192676961421967, "learning_rate": 0.0005, "loss": 2.1392, "step": 91290 }, { "epoch": 0.3475103339600953, "grad_norm": 0.11997781693935394, "learning_rate": 0.0005, "loss": 2.1216, "step": 91300 }, { "epoch": 0.347548396428218, "grad_norm": 0.12274051457643509, "learning_rate": 0.0005, "loss": 2.12, "step": 91310 }, { "epoch": 0.34758645889634066, "grad_norm": 0.13235563039779663, "learning_rate": 0.0005, "loss": 2.1268, "step": 91320 }, { "epoch": 0.34762452136446337, "grad_norm": 0.13290190696716309, "learning_rate": 0.0005, "loss": 2.1193, "step": 91330 }, { "epoch": 0.347662583832586, "grad_norm": 0.11767739802598953, "learning_rate": 0.0005, "loss": 2.126, "step": 91340 }, { "epoch": 0.34770064630070874, "grad_norm": 0.13191862404346466, "learning_rate": 0.0005, "loss": 2.1172, "step": 91350 }, { "epoch": 0.3477387087688314, "grad_norm": 0.12816275656223297, "learning_rate": 0.0005, "loss": 2.1291, "step": 91360 }, { "epoch": 0.3477767712369541, "grad_norm": 0.11478374153375626, "learning_rate": 0.0005, "loss": 2.1244, "step": 91370 }, { "epoch": 0.34781483370507676, "grad_norm": 0.139333575963974, "learning_rate": 0.0005, "loss": 2.1256, "step": 91380 }, { "epoch": 0.3478528961731995, "grad_norm": 0.12281625717878342, "learning_rate": 0.0005, "loss": 2.1325, "step": 91390 }, { "epoch": 0.34789095864132213, "grad_norm": 0.12818287312984467, "learning_rate": 0.0005, "loss": 2.1188, "step": 91400 }, { "epoch": 0.34792902110944485, "grad_norm": 0.12466471642255783, "learning_rate": 0.0005, "loss": 2.1279, "step": 91410 }, { "epoch": 0.3479670835775675, "grad_norm": 0.1379898339509964, "learning_rate": 0.0005, "loss": 2.1083, "step": 91420 }, { "epoch": 0.34800514604569016, "grad_norm": 0.13826188445091248, "learning_rate": 0.0005, "loss": 2.1323, "step": 91430 }, { "epoch": 0.3480432085138129, "grad_norm": 0.12144706398248672, "learning_rate": 0.0005, "loss": 2.1296, "step": 91440 }, { "epoch": 0.34808127098193553, "grad_norm": 0.12366820126771927, "learning_rate": 0.0005, "loss": 2.1253, "step": 91450 }, { "epoch": 0.34811933345005824, "grad_norm": 0.12699466943740845, "learning_rate": 0.0005, "loss": 2.1373, "step": 91460 }, { "epoch": 0.3481573959181809, "grad_norm": 0.12522569298744202, "learning_rate": 0.0005, "loss": 2.1264, "step": 91470 }, { "epoch": 0.3481954583863036, "grad_norm": 0.1333567202091217, "learning_rate": 0.0005, "loss": 2.1332, "step": 91480 }, { "epoch": 0.34823352085442627, "grad_norm": 0.13545624911785126, "learning_rate": 0.0005, "loss": 2.1147, "step": 91490 }, { "epoch": 0.348271583322549, "grad_norm": 0.12455707043409348, "learning_rate": 0.0005, "loss": 2.1386, "step": 91500 }, { "epoch": 0.34830964579067164, "grad_norm": 0.12056753784418106, "learning_rate": 0.0005, "loss": 2.1144, "step": 91510 }, { "epoch": 0.34834770825879435, "grad_norm": 0.12929877638816833, "learning_rate": 0.0005, "loss": 2.1301, "step": 91520 }, { "epoch": 0.348385770726917, "grad_norm": 0.1246948316693306, "learning_rate": 0.0005, "loss": 2.1197, "step": 91530 }, { "epoch": 0.3484238331950397, "grad_norm": 0.11321297287940979, "learning_rate": 0.0005, "loss": 2.1164, "step": 91540 }, { "epoch": 0.3484618956631624, "grad_norm": 0.12944476306438446, "learning_rate": 0.0005, "loss": 2.1172, "step": 91550 }, { "epoch": 0.3484999581312851, "grad_norm": 0.11018373817205429, "learning_rate": 0.0005, "loss": 2.1227, "step": 91560 }, { "epoch": 0.34853802059940775, "grad_norm": 0.12235118448734283, "learning_rate": 0.0005, "loss": 2.125, "step": 91570 }, { "epoch": 0.3485760830675304, "grad_norm": 0.12135972082614899, "learning_rate": 0.0005, "loss": 2.1336, "step": 91580 }, { "epoch": 0.3486141455356531, "grad_norm": 0.13681411743164062, "learning_rate": 0.0005, "loss": 2.1294, "step": 91590 }, { "epoch": 0.3486522080037758, "grad_norm": 0.12161792814731598, "learning_rate": 0.0005, "loss": 2.1266, "step": 91600 }, { "epoch": 0.3486902704718985, "grad_norm": 0.11866001039743423, "learning_rate": 0.0005, "loss": 2.1343, "step": 91610 }, { "epoch": 0.34872833294002115, "grad_norm": 0.11633715033531189, "learning_rate": 0.0005, "loss": 2.1273, "step": 91620 }, { "epoch": 0.34876639540814386, "grad_norm": 0.12331175059080124, "learning_rate": 0.0005, "loss": 2.1253, "step": 91630 }, { "epoch": 0.3488044578762665, "grad_norm": 0.13465796411037445, "learning_rate": 0.0005, "loss": 2.1195, "step": 91640 }, { "epoch": 0.34884252034438923, "grad_norm": 0.39071884751319885, "learning_rate": 0.0005, "loss": 2.1374, "step": 91650 }, { "epoch": 0.3488805828125119, "grad_norm": 0.1384720355272293, "learning_rate": 0.0005, "loss": 2.1225, "step": 91660 }, { "epoch": 0.3489186452806346, "grad_norm": 0.12433448433876038, "learning_rate": 0.0005, "loss": 2.1358, "step": 91670 }, { "epoch": 0.34895670774875726, "grad_norm": 0.12369529902935028, "learning_rate": 0.0005, "loss": 2.1231, "step": 91680 }, { "epoch": 0.34899477021687997, "grad_norm": 0.12780912220478058, "learning_rate": 0.0005, "loss": 2.1172, "step": 91690 }, { "epoch": 0.3490328326850026, "grad_norm": 0.13243108987808228, "learning_rate": 0.0005, "loss": 2.1443, "step": 91700 }, { "epoch": 0.3490708951531253, "grad_norm": 0.12568865716457367, "learning_rate": 0.0005, "loss": 2.1158, "step": 91710 }, { "epoch": 0.349108957621248, "grad_norm": 0.13068446516990662, "learning_rate": 0.0005, "loss": 2.1268, "step": 91720 }, { "epoch": 0.34914702008937065, "grad_norm": 0.13560396432876587, "learning_rate": 0.0005, "loss": 2.1169, "step": 91730 }, { "epoch": 0.34918508255749336, "grad_norm": 0.16247346997261047, "learning_rate": 0.0005, "loss": 2.1264, "step": 91740 }, { "epoch": 0.349223145025616, "grad_norm": 0.13477447628974915, "learning_rate": 0.0005, "loss": 2.1244, "step": 91750 }, { "epoch": 0.34926120749373873, "grad_norm": 0.12002983689308167, "learning_rate": 0.0005, "loss": 2.1263, "step": 91760 }, { "epoch": 0.3492992699618614, "grad_norm": 0.12221094965934753, "learning_rate": 0.0005, "loss": 2.1364, "step": 91770 }, { "epoch": 0.3493373324299841, "grad_norm": 0.14205977320671082, "learning_rate": 0.0005, "loss": 2.1302, "step": 91780 }, { "epoch": 0.34937539489810676, "grad_norm": 0.11442865431308746, "learning_rate": 0.0005, "loss": 2.1309, "step": 91790 }, { "epoch": 0.3494134573662295, "grad_norm": 0.13150253891944885, "learning_rate": 0.0005, "loss": 2.1163, "step": 91800 }, { "epoch": 0.34945151983435213, "grad_norm": 0.13062317669391632, "learning_rate": 0.0005, "loss": 2.1275, "step": 91810 }, { "epoch": 0.34948958230247484, "grad_norm": 0.12943986058235168, "learning_rate": 0.0005, "loss": 2.1262, "step": 91820 }, { "epoch": 0.3495276447705975, "grad_norm": 0.12897132337093353, "learning_rate": 0.0005, "loss": 2.1215, "step": 91830 }, { "epoch": 0.3495657072387202, "grad_norm": 0.12168525159358978, "learning_rate": 0.0005, "loss": 2.1144, "step": 91840 }, { "epoch": 0.34960376970684287, "grad_norm": 0.12652556598186493, "learning_rate": 0.0005, "loss": 2.1256, "step": 91850 }, { "epoch": 0.34964183217496553, "grad_norm": 0.14768041670322418, "learning_rate": 0.0005, "loss": 2.1205, "step": 91860 }, { "epoch": 0.34967989464308824, "grad_norm": 0.11618136614561081, "learning_rate": 0.0005, "loss": 2.1348, "step": 91870 }, { "epoch": 0.3497179571112109, "grad_norm": 0.12934279441833496, "learning_rate": 0.0005, "loss": 2.1075, "step": 91880 }, { "epoch": 0.3497560195793336, "grad_norm": 0.12195584177970886, "learning_rate": 0.0005, "loss": 2.128, "step": 91890 }, { "epoch": 0.34979408204745627, "grad_norm": 0.1398545652627945, "learning_rate": 0.0005, "loss": 2.1142, "step": 91900 }, { "epoch": 0.349832144515579, "grad_norm": 0.12671756744384766, "learning_rate": 0.0005, "loss": 2.1232, "step": 91910 }, { "epoch": 0.34987020698370164, "grad_norm": 0.12397027760744095, "learning_rate": 0.0005, "loss": 2.1215, "step": 91920 }, { "epoch": 0.34990826945182435, "grad_norm": 0.11226258426904678, "learning_rate": 0.0005, "loss": 2.1259, "step": 91930 }, { "epoch": 0.349946331919947, "grad_norm": 0.12149665504693985, "learning_rate": 0.0005, "loss": 2.1333, "step": 91940 }, { "epoch": 0.3499843943880697, "grad_norm": 0.11136379092931747, "learning_rate": 0.0005, "loss": 2.1222, "step": 91950 }, { "epoch": 0.3500224568561924, "grad_norm": 0.1287529319524765, "learning_rate": 0.0005, "loss": 2.1262, "step": 91960 }, { "epoch": 0.3500605193243151, "grad_norm": 0.12232114374637604, "learning_rate": 0.0005, "loss": 2.1112, "step": 91970 }, { "epoch": 0.35009858179243775, "grad_norm": 0.12176519632339478, "learning_rate": 0.0005, "loss": 2.1131, "step": 91980 }, { "epoch": 0.35013664426056046, "grad_norm": 0.11770909279584885, "learning_rate": 0.0005, "loss": 2.1171, "step": 91990 }, { "epoch": 0.3501747067286831, "grad_norm": 0.12266077101230621, "learning_rate": 0.0005, "loss": 2.1159, "step": 92000 }, { "epoch": 0.3502127691968058, "grad_norm": 0.13001757860183716, "learning_rate": 0.0005, "loss": 2.1253, "step": 92010 }, { "epoch": 0.3502508316649285, "grad_norm": 0.1514846533536911, "learning_rate": 0.0005, "loss": 2.1344, "step": 92020 }, { "epoch": 0.35028889413305114, "grad_norm": 0.12914305925369263, "learning_rate": 0.0005, "loss": 2.12, "step": 92030 }, { "epoch": 0.35032695660117386, "grad_norm": 0.12426555901765823, "learning_rate": 0.0005, "loss": 2.1126, "step": 92040 }, { "epoch": 0.3503650190692965, "grad_norm": 0.11400419473648071, "learning_rate": 0.0005, "loss": 2.1191, "step": 92050 }, { "epoch": 0.3504030815374192, "grad_norm": 0.12072566151618958, "learning_rate": 0.0005, "loss": 2.1216, "step": 92060 }, { "epoch": 0.3504411440055419, "grad_norm": 0.12593691051006317, "learning_rate": 0.0005, "loss": 2.115, "step": 92070 }, { "epoch": 0.3504792064736646, "grad_norm": 0.11472805589437485, "learning_rate": 0.0005, "loss": 2.1161, "step": 92080 }, { "epoch": 0.35051726894178725, "grad_norm": 0.11309553682804108, "learning_rate": 0.0005, "loss": 2.1322, "step": 92090 }, { "epoch": 0.35055533140990996, "grad_norm": 0.12516191601753235, "learning_rate": 0.0005, "loss": 2.1231, "step": 92100 }, { "epoch": 0.3505933938780326, "grad_norm": 0.14064562320709229, "learning_rate": 0.0005, "loss": 2.1299, "step": 92110 }, { "epoch": 0.35063145634615533, "grad_norm": 0.13177159428596497, "learning_rate": 0.0005, "loss": 2.1392, "step": 92120 }, { "epoch": 0.350669518814278, "grad_norm": 0.13190452754497528, "learning_rate": 0.0005, "loss": 2.1254, "step": 92130 }, { "epoch": 0.35070758128240065, "grad_norm": 0.11451554298400879, "learning_rate": 0.0005, "loss": 2.141, "step": 92140 }, { "epoch": 0.35074564375052336, "grad_norm": 0.11608679592609406, "learning_rate": 0.0005, "loss": 2.1067, "step": 92150 }, { "epoch": 0.350783706218646, "grad_norm": 0.14462372660636902, "learning_rate": 0.0005, "loss": 2.125, "step": 92160 }, { "epoch": 0.35082176868676873, "grad_norm": 0.12858840823173523, "learning_rate": 0.0005, "loss": 2.1237, "step": 92170 }, { "epoch": 0.3508598311548914, "grad_norm": 0.13132420182228088, "learning_rate": 0.0005, "loss": 2.1229, "step": 92180 }, { "epoch": 0.3508978936230141, "grad_norm": 0.1324557065963745, "learning_rate": 0.0005, "loss": 2.1132, "step": 92190 }, { "epoch": 0.35093595609113676, "grad_norm": 0.1943078637123108, "learning_rate": 0.0005, "loss": 2.1104, "step": 92200 }, { "epoch": 0.35097401855925947, "grad_norm": 0.11292409151792526, "learning_rate": 0.0005, "loss": 2.1354, "step": 92210 }, { "epoch": 0.35101208102738213, "grad_norm": 0.1135173812508583, "learning_rate": 0.0005, "loss": 2.1249, "step": 92220 }, { "epoch": 0.35105014349550484, "grad_norm": 0.1146763414144516, "learning_rate": 0.0005, "loss": 2.1378, "step": 92230 }, { "epoch": 0.3510882059636275, "grad_norm": 0.1284039467573166, "learning_rate": 0.0005, "loss": 2.092, "step": 92240 }, { "epoch": 0.3511262684317502, "grad_norm": 0.11903073638677597, "learning_rate": 0.0005, "loss": 2.1342, "step": 92250 }, { "epoch": 0.35116433089987287, "grad_norm": 0.12131869047880173, "learning_rate": 0.0005, "loss": 2.1219, "step": 92260 }, { "epoch": 0.3512023933679956, "grad_norm": 0.12632091343402863, "learning_rate": 0.0005, "loss": 2.1131, "step": 92270 }, { "epoch": 0.35124045583611824, "grad_norm": 0.12733492255210876, "learning_rate": 0.0005, "loss": 2.1081, "step": 92280 }, { "epoch": 0.3512785183042409, "grad_norm": 0.12252689898014069, "learning_rate": 0.0005, "loss": 2.1214, "step": 92290 }, { "epoch": 0.3513165807723636, "grad_norm": 0.12945452332496643, "learning_rate": 0.0005, "loss": 2.1276, "step": 92300 }, { "epoch": 0.35135464324048626, "grad_norm": 0.13178656995296478, "learning_rate": 0.0005, "loss": 2.1176, "step": 92310 }, { "epoch": 0.351392705708609, "grad_norm": 0.13196706771850586, "learning_rate": 0.0005, "loss": 2.1173, "step": 92320 }, { "epoch": 0.35143076817673163, "grad_norm": 0.11777035146951675, "learning_rate": 0.0005, "loss": 2.1334, "step": 92330 }, { "epoch": 0.35146883064485435, "grad_norm": 0.12045314908027649, "learning_rate": 0.0005, "loss": 2.1274, "step": 92340 }, { "epoch": 0.351506893112977, "grad_norm": 0.12759415805339813, "learning_rate": 0.0005, "loss": 2.1129, "step": 92350 }, { "epoch": 0.3515449555810997, "grad_norm": 0.1253875195980072, "learning_rate": 0.0005, "loss": 2.1144, "step": 92360 }, { "epoch": 0.3515830180492224, "grad_norm": 0.10823439806699753, "learning_rate": 0.0005, "loss": 2.128, "step": 92370 }, { "epoch": 0.3516210805173451, "grad_norm": 0.11525721102952957, "learning_rate": 0.0005, "loss": 2.1339, "step": 92380 }, { "epoch": 0.35165914298546774, "grad_norm": 0.1259388029575348, "learning_rate": 0.0005, "loss": 2.1131, "step": 92390 }, { "epoch": 0.35169720545359046, "grad_norm": 0.12903520464897156, "learning_rate": 0.0005, "loss": 2.1342, "step": 92400 }, { "epoch": 0.3517352679217131, "grad_norm": 0.12380896508693695, "learning_rate": 0.0005, "loss": 2.1252, "step": 92410 }, { "epoch": 0.3517733303898358, "grad_norm": 0.12490128725767136, "learning_rate": 0.0005, "loss": 2.1322, "step": 92420 }, { "epoch": 0.3518113928579585, "grad_norm": 0.12302028387784958, "learning_rate": 0.0005, "loss": 2.1262, "step": 92430 }, { "epoch": 0.35184945532608114, "grad_norm": 0.1265297383069992, "learning_rate": 0.0005, "loss": 2.1232, "step": 92440 }, { "epoch": 0.35188751779420385, "grad_norm": 0.12258687615394592, "learning_rate": 0.0005, "loss": 2.1095, "step": 92450 }, { "epoch": 0.3519255802623265, "grad_norm": 0.12869144976139069, "learning_rate": 0.0005, "loss": 2.1198, "step": 92460 }, { "epoch": 0.3519636427304492, "grad_norm": 0.11812689155340195, "learning_rate": 0.0005, "loss": 2.1158, "step": 92470 }, { "epoch": 0.3520017051985719, "grad_norm": 0.12622421979904175, "learning_rate": 0.0005, "loss": 2.1338, "step": 92480 }, { "epoch": 0.3520397676666946, "grad_norm": 0.13387994468212128, "learning_rate": 0.0005, "loss": 2.114, "step": 92490 }, { "epoch": 0.35207783013481725, "grad_norm": 0.12207360565662384, "learning_rate": 0.0005, "loss": 2.1303, "step": 92500 }, { "epoch": 0.35211589260293996, "grad_norm": 0.12587909400463104, "learning_rate": 0.0005, "loss": 2.1317, "step": 92510 }, { "epoch": 0.3521539550710626, "grad_norm": 0.12186160683631897, "learning_rate": 0.0005, "loss": 2.127, "step": 92520 }, { "epoch": 0.35219201753918533, "grad_norm": 0.11623741686344147, "learning_rate": 0.0005, "loss": 2.1291, "step": 92530 }, { "epoch": 0.352230080007308, "grad_norm": 0.11479505896568298, "learning_rate": 0.0005, "loss": 2.1264, "step": 92540 }, { "epoch": 0.3522681424754307, "grad_norm": 0.11657773703336716, "learning_rate": 0.0005, "loss": 2.1277, "step": 92550 }, { "epoch": 0.35230620494355336, "grad_norm": 0.14515730738639832, "learning_rate": 0.0005, "loss": 2.125, "step": 92560 }, { "epoch": 0.352344267411676, "grad_norm": 0.11519404500722885, "learning_rate": 0.0005, "loss": 2.1312, "step": 92570 }, { "epoch": 0.35238232987979873, "grad_norm": 0.11878593266010284, "learning_rate": 0.0005, "loss": 2.1204, "step": 92580 }, { "epoch": 0.3524203923479214, "grad_norm": 0.11939465999603271, "learning_rate": 0.0005, "loss": 2.1118, "step": 92590 }, { "epoch": 0.3524584548160441, "grad_norm": 0.128925159573555, "learning_rate": 0.0005, "loss": 2.1039, "step": 92600 }, { "epoch": 0.35249651728416675, "grad_norm": 0.11379945278167725, "learning_rate": 0.0005, "loss": 2.1243, "step": 92610 }, { "epoch": 0.35253457975228947, "grad_norm": 0.12493978440761566, "learning_rate": 0.0005, "loss": 2.1193, "step": 92620 }, { "epoch": 0.3525726422204121, "grad_norm": 0.12576165795326233, "learning_rate": 0.0005, "loss": 2.115, "step": 92630 }, { "epoch": 0.35261070468853484, "grad_norm": 0.1327223777770996, "learning_rate": 0.0005, "loss": 2.1215, "step": 92640 }, { "epoch": 0.3526487671566575, "grad_norm": 0.13390015065670013, "learning_rate": 0.0005, "loss": 2.1287, "step": 92650 }, { "epoch": 0.3526868296247802, "grad_norm": 0.13608437776565552, "learning_rate": 0.0005, "loss": 2.1276, "step": 92660 }, { "epoch": 0.35272489209290286, "grad_norm": 0.1304791420698166, "learning_rate": 0.0005, "loss": 2.1086, "step": 92670 }, { "epoch": 0.3527629545610256, "grad_norm": 0.13058798015117645, "learning_rate": 0.0005, "loss": 2.1114, "step": 92680 }, { "epoch": 0.35280101702914823, "grad_norm": 0.12364275008440018, "learning_rate": 0.0005, "loss": 2.1319, "step": 92690 }, { "epoch": 0.35283907949727095, "grad_norm": 0.12345042079687119, "learning_rate": 0.0005, "loss": 2.1148, "step": 92700 }, { "epoch": 0.3528771419653936, "grad_norm": 0.13718070089817047, "learning_rate": 0.0005, "loss": 2.1126, "step": 92710 }, { "epoch": 0.35291520443351626, "grad_norm": 0.12440600246191025, "learning_rate": 0.0005, "loss": 2.1294, "step": 92720 }, { "epoch": 0.352953266901639, "grad_norm": 0.1204405426979065, "learning_rate": 0.0005, "loss": 2.1239, "step": 92730 }, { "epoch": 0.35299132936976163, "grad_norm": 0.1262776255607605, "learning_rate": 0.0005, "loss": 2.1101, "step": 92740 }, { "epoch": 0.35302939183788434, "grad_norm": 0.11918480694293976, "learning_rate": 0.0005, "loss": 2.1329, "step": 92750 }, { "epoch": 0.353067454306007, "grad_norm": 0.13499636948108673, "learning_rate": 0.0005, "loss": 2.1224, "step": 92760 }, { "epoch": 0.3531055167741297, "grad_norm": 0.13470661640167236, "learning_rate": 0.0005, "loss": 2.1248, "step": 92770 }, { "epoch": 0.35314357924225237, "grad_norm": 0.1193150132894516, "learning_rate": 0.0005, "loss": 2.1288, "step": 92780 }, { "epoch": 0.3531816417103751, "grad_norm": 0.13073119521141052, "learning_rate": 0.0005, "loss": 2.1174, "step": 92790 }, { "epoch": 0.35321970417849774, "grad_norm": 0.13316971063613892, "learning_rate": 0.0005, "loss": 2.1257, "step": 92800 }, { "epoch": 0.35325776664662045, "grad_norm": 0.12979532778263092, "learning_rate": 0.0005, "loss": 2.1237, "step": 92810 }, { "epoch": 0.3532958291147431, "grad_norm": 0.1356252282857895, "learning_rate": 0.0005, "loss": 2.1323, "step": 92820 }, { "epoch": 0.3533338915828658, "grad_norm": 0.12186180055141449, "learning_rate": 0.0005, "loss": 2.1157, "step": 92830 }, { "epoch": 0.3533719540509885, "grad_norm": 0.11846879124641418, "learning_rate": 0.0005, "loss": 2.1308, "step": 92840 }, { "epoch": 0.3534100165191112, "grad_norm": 0.1233963742852211, "learning_rate": 0.0005, "loss": 2.1269, "step": 92850 }, { "epoch": 0.35344807898723385, "grad_norm": 0.1270616352558136, "learning_rate": 0.0005, "loss": 2.1188, "step": 92860 }, { "epoch": 0.3534861414553565, "grad_norm": 0.12139040976762772, "learning_rate": 0.0005, "loss": 2.1194, "step": 92870 }, { "epoch": 0.3535242039234792, "grad_norm": 0.1251436024904251, "learning_rate": 0.0005, "loss": 2.1189, "step": 92880 }, { "epoch": 0.3535622663916019, "grad_norm": 0.12637275457382202, "learning_rate": 0.0005, "loss": 2.1111, "step": 92890 }, { "epoch": 0.3536003288597246, "grad_norm": 0.1288340538740158, "learning_rate": 0.0005, "loss": 2.1157, "step": 92900 }, { "epoch": 0.35363839132784725, "grad_norm": 0.13565371930599213, "learning_rate": 0.0005, "loss": 2.1308, "step": 92910 }, { "epoch": 0.35367645379596996, "grad_norm": 0.1296985149383545, "learning_rate": 0.0005, "loss": 2.1136, "step": 92920 }, { "epoch": 0.3537145162640926, "grad_norm": 0.1276034265756607, "learning_rate": 0.0005, "loss": 2.1146, "step": 92930 }, { "epoch": 0.35375257873221533, "grad_norm": 0.13032260537147522, "learning_rate": 0.0005, "loss": 2.1148, "step": 92940 }, { "epoch": 0.353790641200338, "grad_norm": 0.11970576643943787, "learning_rate": 0.0005, "loss": 2.1269, "step": 92950 }, { "epoch": 0.3538287036684607, "grad_norm": 0.11574655026197433, "learning_rate": 0.0005, "loss": 2.122, "step": 92960 }, { "epoch": 0.35386676613658335, "grad_norm": 0.12114018946886063, "learning_rate": 0.0005, "loss": 2.1075, "step": 92970 }, { "epoch": 0.35390482860470607, "grad_norm": 0.12086793035268784, "learning_rate": 0.0005, "loss": 2.129, "step": 92980 }, { "epoch": 0.3539428910728287, "grad_norm": 0.12161669135093689, "learning_rate": 0.0005, "loss": 2.14, "step": 92990 }, { "epoch": 0.3539809535409514, "grad_norm": 0.13499213755130768, "learning_rate": 0.0005, "loss": 2.1255, "step": 93000 }, { "epoch": 0.3540190160090741, "grad_norm": 0.12085911631584167, "learning_rate": 0.0005, "loss": 2.1271, "step": 93010 }, { "epoch": 0.35405707847719675, "grad_norm": 0.11759299039840698, "learning_rate": 0.0005, "loss": 2.1243, "step": 93020 }, { "epoch": 0.35409514094531946, "grad_norm": 0.12203952670097351, "learning_rate": 0.0005, "loss": 2.1162, "step": 93030 }, { "epoch": 0.3541332034134421, "grad_norm": 0.12227179110050201, "learning_rate": 0.0005, "loss": 2.1157, "step": 93040 }, { "epoch": 0.35417126588156483, "grad_norm": 0.12413538992404938, "learning_rate": 0.0005, "loss": 2.124, "step": 93050 }, { "epoch": 0.3542093283496875, "grad_norm": 0.13979873061180115, "learning_rate": 0.0005, "loss": 2.1202, "step": 93060 }, { "epoch": 0.3542473908178102, "grad_norm": 0.11567272990942001, "learning_rate": 0.0005, "loss": 2.1219, "step": 93070 }, { "epoch": 0.35428545328593286, "grad_norm": 0.11851345747709274, "learning_rate": 0.0005, "loss": 2.1397, "step": 93080 }, { "epoch": 0.3543235157540556, "grad_norm": 0.13208027184009552, "learning_rate": 0.0005, "loss": 2.1125, "step": 93090 }, { "epoch": 0.35436157822217823, "grad_norm": 0.13782580196857452, "learning_rate": 0.0005, "loss": 2.1134, "step": 93100 }, { "epoch": 0.35439964069030094, "grad_norm": 0.15525998175144196, "learning_rate": 0.0005, "loss": 2.1332, "step": 93110 }, { "epoch": 0.3544377031584236, "grad_norm": 0.12536172568798065, "learning_rate": 0.0005, "loss": 2.1263, "step": 93120 }, { "epoch": 0.3544757656265463, "grad_norm": 0.13931910693645477, "learning_rate": 0.0005, "loss": 2.1437, "step": 93130 }, { "epoch": 0.35451382809466897, "grad_norm": 0.12072119861841202, "learning_rate": 0.0005, "loss": 2.1149, "step": 93140 }, { "epoch": 0.3545518905627916, "grad_norm": 0.13006591796875, "learning_rate": 0.0005, "loss": 2.1302, "step": 93150 }, { "epoch": 0.35458995303091434, "grad_norm": 0.12391817569732666, "learning_rate": 0.0005, "loss": 2.1187, "step": 93160 }, { "epoch": 0.354628015499037, "grad_norm": 0.12410353124141693, "learning_rate": 0.0005, "loss": 2.1313, "step": 93170 }, { "epoch": 0.3546660779671597, "grad_norm": 0.13124974071979523, "learning_rate": 0.0005, "loss": 2.1166, "step": 93180 }, { "epoch": 0.35470414043528237, "grad_norm": 0.13084763288497925, "learning_rate": 0.0005, "loss": 2.1146, "step": 93190 }, { "epoch": 0.3547422029034051, "grad_norm": 0.1214420273900032, "learning_rate": 0.0005, "loss": 2.1157, "step": 93200 }, { "epoch": 0.35478026537152774, "grad_norm": 0.13202619552612305, "learning_rate": 0.0005, "loss": 2.1217, "step": 93210 }, { "epoch": 0.35481832783965045, "grad_norm": 0.13319921493530273, "learning_rate": 0.0005, "loss": 2.1167, "step": 93220 }, { "epoch": 0.3548563903077731, "grad_norm": 0.13513195514678955, "learning_rate": 0.0005, "loss": 2.136, "step": 93230 }, { "epoch": 0.3548944527758958, "grad_norm": 0.13121256232261658, "learning_rate": 0.0005, "loss": 2.1214, "step": 93240 }, { "epoch": 0.3549325152440185, "grad_norm": 0.11843890696763992, "learning_rate": 0.0005, "loss": 2.1093, "step": 93250 }, { "epoch": 0.3549705777121412, "grad_norm": 0.13226059079170227, "learning_rate": 0.0005, "loss": 2.1334, "step": 93260 }, { "epoch": 0.35500864018026385, "grad_norm": 0.12598729133605957, "learning_rate": 0.0005, "loss": 2.1136, "step": 93270 }, { "epoch": 0.35504670264838656, "grad_norm": 0.13033129274845123, "learning_rate": 0.0005, "loss": 2.1164, "step": 93280 }, { "epoch": 0.3550847651165092, "grad_norm": 0.12485304474830627, "learning_rate": 0.0005, "loss": 2.1214, "step": 93290 }, { "epoch": 0.3551228275846319, "grad_norm": 0.11563688516616821, "learning_rate": 0.0005, "loss": 2.127, "step": 93300 }, { "epoch": 0.3551608900527546, "grad_norm": 0.1208873763680458, "learning_rate": 0.0005, "loss": 2.1191, "step": 93310 }, { "epoch": 0.35519895252087724, "grad_norm": 0.11897026002407074, "learning_rate": 0.0005, "loss": 2.1173, "step": 93320 }, { "epoch": 0.35523701498899996, "grad_norm": 0.13029348850250244, "learning_rate": 0.0005, "loss": 2.1204, "step": 93330 }, { "epoch": 0.3552750774571226, "grad_norm": 0.1318322718143463, "learning_rate": 0.0005, "loss": 2.1289, "step": 93340 }, { "epoch": 0.3553131399252453, "grad_norm": 0.12738265097141266, "learning_rate": 0.0005, "loss": 2.1299, "step": 93350 }, { "epoch": 0.355351202393368, "grad_norm": 0.11214686185121536, "learning_rate": 0.0005, "loss": 2.1244, "step": 93360 }, { "epoch": 0.3553892648614907, "grad_norm": 0.12007127702236176, "learning_rate": 0.0005, "loss": 2.1282, "step": 93370 }, { "epoch": 0.35542732732961335, "grad_norm": 0.15367311239242554, "learning_rate": 0.0005, "loss": 2.1075, "step": 93380 }, { "epoch": 0.35546538979773606, "grad_norm": 0.13861685991287231, "learning_rate": 0.0005, "loss": 2.1297, "step": 93390 }, { "epoch": 0.3555034522658587, "grad_norm": 0.13355308771133423, "learning_rate": 0.0005, "loss": 2.1212, "step": 93400 }, { "epoch": 0.35554151473398143, "grad_norm": 0.11894602328538895, "learning_rate": 0.0005, "loss": 2.1132, "step": 93410 }, { "epoch": 0.3555795772021041, "grad_norm": 0.11944910138845444, "learning_rate": 0.0005, "loss": 2.1212, "step": 93420 }, { "epoch": 0.3556176396702268, "grad_norm": 0.11500494927167892, "learning_rate": 0.0005, "loss": 2.1414, "step": 93430 }, { "epoch": 0.35565570213834946, "grad_norm": 0.13844679296016693, "learning_rate": 0.0005, "loss": 2.0977, "step": 93440 }, { "epoch": 0.3556937646064721, "grad_norm": 0.14892104268074036, "learning_rate": 0.0005, "loss": 2.129, "step": 93450 }, { "epoch": 0.35573182707459483, "grad_norm": 0.12289411574602127, "learning_rate": 0.0005, "loss": 2.123, "step": 93460 }, { "epoch": 0.3557698895427175, "grad_norm": 0.12943007051944733, "learning_rate": 0.0005, "loss": 2.1217, "step": 93470 }, { "epoch": 0.3558079520108402, "grad_norm": 0.13010786473751068, "learning_rate": 0.0005, "loss": 2.1454, "step": 93480 }, { "epoch": 0.35584601447896286, "grad_norm": 0.12736776471138, "learning_rate": 0.0005, "loss": 2.1204, "step": 93490 }, { "epoch": 0.35588407694708557, "grad_norm": 0.15535278618335724, "learning_rate": 0.0005, "loss": 2.1219, "step": 93500 }, { "epoch": 0.3559221394152082, "grad_norm": 0.12439072877168655, "learning_rate": 0.0005, "loss": 2.1329, "step": 93510 }, { "epoch": 0.35596020188333094, "grad_norm": 0.11703763902187347, "learning_rate": 0.0005, "loss": 2.1102, "step": 93520 }, { "epoch": 0.3559982643514536, "grad_norm": 0.12202954292297363, "learning_rate": 0.0005, "loss": 2.124, "step": 93530 }, { "epoch": 0.3560363268195763, "grad_norm": 0.12256588786840439, "learning_rate": 0.0005, "loss": 2.1165, "step": 93540 }, { "epoch": 0.35607438928769897, "grad_norm": 0.14434875547885895, "learning_rate": 0.0005, "loss": 2.1223, "step": 93550 }, { "epoch": 0.3561124517558217, "grad_norm": 0.11983123421669006, "learning_rate": 0.0005, "loss": 2.1147, "step": 93560 }, { "epoch": 0.35615051422394434, "grad_norm": 0.11797554790973663, "learning_rate": 0.0005, "loss": 2.1144, "step": 93570 }, { "epoch": 0.356188576692067, "grad_norm": 0.1287076622247696, "learning_rate": 0.0005, "loss": 2.1093, "step": 93580 }, { "epoch": 0.3562266391601897, "grad_norm": 0.13552536070346832, "learning_rate": 0.0005, "loss": 2.1002, "step": 93590 }, { "epoch": 0.35626470162831236, "grad_norm": 0.12075095623731613, "learning_rate": 0.0005, "loss": 2.1267, "step": 93600 }, { "epoch": 0.3563027640964351, "grad_norm": 0.12171369045972824, "learning_rate": 0.0005, "loss": 2.1231, "step": 93610 }, { "epoch": 0.35634082656455773, "grad_norm": 0.12191025167703629, "learning_rate": 0.0005, "loss": 2.125, "step": 93620 }, { "epoch": 0.35637888903268045, "grad_norm": 0.11536803096532822, "learning_rate": 0.0005, "loss": 2.132, "step": 93630 }, { "epoch": 0.3564169515008031, "grad_norm": 0.12162362784147263, "learning_rate": 0.0005, "loss": 2.1313, "step": 93640 }, { "epoch": 0.3564550139689258, "grad_norm": 0.12384018301963806, "learning_rate": 0.0005, "loss": 2.1189, "step": 93650 }, { "epoch": 0.3564930764370485, "grad_norm": 0.11889617145061493, "learning_rate": 0.0005, "loss": 2.1184, "step": 93660 }, { "epoch": 0.3565311389051712, "grad_norm": 0.140035942196846, "learning_rate": 0.0005, "loss": 2.1327, "step": 93670 }, { "epoch": 0.35656920137329384, "grad_norm": 0.14636938273906708, "learning_rate": 0.0005, "loss": 2.1276, "step": 93680 }, { "epoch": 0.35660726384141656, "grad_norm": 0.12248191982507706, "learning_rate": 0.0005, "loss": 2.1163, "step": 93690 }, { "epoch": 0.3566453263095392, "grad_norm": 0.12582148611545563, "learning_rate": 0.0005, "loss": 2.122, "step": 93700 }, { "epoch": 0.3566833887776619, "grad_norm": 0.11646874994039536, "learning_rate": 0.0005, "loss": 2.1271, "step": 93710 }, { "epoch": 0.3567214512457846, "grad_norm": 0.12089702486991882, "learning_rate": 0.0005, "loss": 2.1155, "step": 93720 }, { "epoch": 0.35675951371390724, "grad_norm": 0.12172345072031021, "learning_rate": 0.0005, "loss": 2.1116, "step": 93730 }, { "epoch": 0.35679757618202995, "grad_norm": 0.1264006495475769, "learning_rate": 0.0005, "loss": 2.142, "step": 93740 }, { "epoch": 0.3568356386501526, "grad_norm": 0.12129916995763779, "learning_rate": 0.0005, "loss": 2.1374, "step": 93750 }, { "epoch": 0.3568737011182753, "grad_norm": 0.12129603326320648, "learning_rate": 0.0005, "loss": 2.1236, "step": 93760 }, { "epoch": 0.356911763586398, "grad_norm": 0.12828706204891205, "learning_rate": 0.0005, "loss": 2.1119, "step": 93770 }, { "epoch": 0.3569498260545207, "grad_norm": 0.11421766877174377, "learning_rate": 0.0005, "loss": 2.115, "step": 93780 }, { "epoch": 0.35698788852264335, "grad_norm": 0.12098051607608795, "learning_rate": 0.0005, "loss": 2.1403, "step": 93790 }, { "epoch": 0.35702595099076606, "grad_norm": 0.1217089295387268, "learning_rate": 0.0005, "loss": 2.1282, "step": 93800 }, { "epoch": 0.3570640134588887, "grad_norm": 0.11992437392473221, "learning_rate": 0.0005, "loss": 2.1214, "step": 93810 }, { "epoch": 0.35710207592701143, "grad_norm": 0.12545369565486908, "learning_rate": 0.0005, "loss": 2.1113, "step": 93820 }, { "epoch": 0.3571401383951341, "grad_norm": 0.1325383186340332, "learning_rate": 0.0005, "loss": 2.1281, "step": 93830 }, { "epoch": 0.3571782008632568, "grad_norm": 0.12268385291099548, "learning_rate": 0.0005, "loss": 2.1179, "step": 93840 }, { "epoch": 0.35721626333137946, "grad_norm": 0.12395138293504715, "learning_rate": 0.0005, "loss": 2.1215, "step": 93850 }, { "epoch": 0.35725432579950217, "grad_norm": 0.13802285492420197, "learning_rate": 0.0005, "loss": 2.1175, "step": 93860 }, { "epoch": 0.3572923882676248, "grad_norm": 0.13328036665916443, "learning_rate": 0.0005, "loss": 2.1272, "step": 93870 }, { "epoch": 0.3573304507357475, "grad_norm": 0.1284911185503006, "learning_rate": 0.0005, "loss": 2.1131, "step": 93880 }, { "epoch": 0.3573685132038702, "grad_norm": 0.12316421419382095, "learning_rate": 0.0005, "loss": 2.1158, "step": 93890 }, { "epoch": 0.35740657567199285, "grad_norm": 0.13757121562957764, "learning_rate": 0.0005, "loss": 2.1361, "step": 93900 }, { "epoch": 0.35744463814011557, "grad_norm": 0.12331431359052658, "learning_rate": 0.0005, "loss": 2.1191, "step": 93910 }, { "epoch": 0.3574827006082382, "grad_norm": 0.13232360780239105, "learning_rate": 0.0005, "loss": 2.1225, "step": 93920 }, { "epoch": 0.35752076307636094, "grad_norm": 0.12024432420730591, "learning_rate": 0.0005, "loss": 2.1197, "step": 93930 }, { "epoch": 0.3575588255444836, "grad_norm": 0.11617932468652725, "learning_rate": 0.0005, "loss": 2.1149, "step": 93940 }, { "epoch": 0.3575968880126063, "grad_norm": 0.12654536962509155, "learning_rate": 0.0005, "loss": 2.1101, "step": 93950 }, { "epoch": 0.35763495048072896, "grad_norm": 0.12844809889793396, "learning_rate": 0.0005, "loss": 2.1228, "step": 93960 }, { "epoch": 0.3576730129488517, "grad_norm": 0.12008248269557953, "learning_rate": 0.0005, "loss": 2.1309, "step": 93970 }, { "epoch": 0.35771107541697433, "grad_norm": 0.1400655061006546, "learning_rate": 0.0005, "loss": 2.1159, "step": 93980 }, { "epoch": 0.35774913788509705, "grad_norm": 0.11512552946805954, "learning_rate": 0.0005, "loss": 2.1067, "step": 93990 }, { "epoch": 0.3577872003532197, "grad_norm": 0.12251134216785431, "learning_rate": 0.0005, "loss": 2.1316, "step": 94000 }, { "epoch": 0.35782526282134236, "grad_norm": 0.13219940662384033, "learning_rate": 0.0005, "loss": 2.1144, "step": 94010 }, { "epoch": 0.3578633252894651, "grad_norm": 0.11719072610139847, "learning_rate": 0.0005, "loss": 2.14, "step": 94020 }, { "epoch": 0.35790138775758773, "grad_norm": 0.127188578248024, "learning_rate": 0.0005, "loss": 2.1248, "step": 94030 }, { "epoch": 0.35793945022571044, "grad_norm": 0.12552744150161743, "learning_rate": 0.0005, "loss": 2.1265, "step": 94040 }, { "epoch": 0.3579775126938331, "grad_norm": 0.12586472928524017, "learning_rate": 0.0005, "loss": 2.1201, "step": 94050 }, { "epoch": 0.3580155751619558, "grad_norm": 0.12713229656219482, "learning_rate": 0.0005, "loss": 2.1314, "step": 94060 }, { "epoch": 0.35805363763007847, "grad_norm": 0.12572598457336426, "learning_rate": 0.0005, "loss": 2.107, "step": 94070 }, { "epoch": 0.3580917000982012, "grad_norm": 0.12956538796424866, "learning_rate": 0.0005, "loss": 2.1232, "step": 94080 }, { "epoch": 0.35812976256632384, "grad_norm": 0.12850606441497803, "learning_rate": 0.0005, "loss": 2.1039, "step": 94090 }, { "epoch": 0.35816782503444655, "grad_norm": 0.12590013444423676, "learning_rate": 0.0005, "loss": 2.1271, "step": 94100 }, { "epoch": 0.3582058875025692, "grad_norm": 0.12473782151937485, "learning_rate": 0.0005, "loss": 2.1279, "step": 94110 }, { "epoch": 0.3582439499706919, "grad_norm": 0.11266183108091354, "learning_rate": 0.0005, "loss": 2.1318, "step": 94120 }, { "epoch": 0.3582820124388146, "grad_norm": 0.15109241008758545, "learning_rate": 0.0005, "loss": 2.1173, "step": 94130 }, { "epoch": 0.3583200749069373, "grad_norm": 0.11439945548772812, "learning_rate": 0.0005, "loss": 2.1249, "step": 94140 }, { "epoch": 0.35835813737505995, "grad_norm": 0.13953430950641632, "learning_rate": 0.0005, "loss": 2.1277, "step": 94150 }, { "epoch": 0.3583961998431826, "grad_norm": 0.13271038234233856, "learning_rate": 0.0005, "loss": 2.11, "step": 94160 }, { "epoch": 0.3584342623113053, "grad_norm": 0.13171426951885223, "learning_rate": 0.0005, "loss": 2.127, "step": 94170 }, { "epoch": 0.358472324779428, "grad_norm": 0.12254917621612549, "learning_rate": 0.0005, "loss": 2.1235, "step": 94180 }, { "epoch": 0.3585103872475507, "grad_norm": 0.11646232008934021, "learning_rate": 0.0005, "loss": 2.108, "step": 94190 }, { "epoch": 0.35854844971567335, "grad_norm": 0.14167553186416626, "learning_rate": 0.0005, "loss": 2.1456, "step": 94200 }, { "epoch": 0.35858651218379606, "grad_norm": 0.12849067151546478, "learning_rate": 0.0005, "loss": 2.122, "step": 94210 }, { "epoch": 0.3586245746519187, "grad_norm": 0.13245843350887299, "learning_rate": 0.0005, "loss": 2.1376, "step": 94220 }, { "epoch": 0.3586626371200414, "grad_norm": 0.13472813367843628, "learning_rate": 0.0005, "loss": 2.1272, "step": 94230 }, { "epoch": 0.3587006995881641, "grad_norm": 0.11261474341154099, "learning_rate": 0.0005, "loss": 2.1274, "step": 94240 }, { "epoch": 0.3587387620562868, "grad_norm": 0.13049879670143127, "learning_rate": 0.0005, "loss": 2.1262, "step": 94250 }, { "epoch": 0.35877682452440945, "grad_norm": 0.12245085835456848, "learning_rate": 0.0005, "loss": 2.1188, "step": 94260 }, { "epoch": 0.35881488699253217, "grad_norm": 0.13632707297801971, "learning_rate": 0.0005, "loss": 2.1366, "step": 94270 }, { "epoch": 0.3588529494606548, "grad_norm": 0.12433923035860062, "learning_rate": 0.0005, "loss": 2.1308, "step": 94280 }, { "epoch": 0.35889101192877754, "grad_norm": 0.12938573956489563, "learning_rate": 0.0005, "loss": 2.1289, "step": 94290 }, { "epoch": 0.3589290743969002, "grad_norm": 0.1309065967798233, "learning_rate": 0.0005, "loss": 2.1243, "step": 94300 }, { "epoch": 0.35896713686502285, "grad_norm": 0.12268606573343277, "learning_rate": 0.0005, "loss": 2.1344, "step": 94310 }, { "epoch": 0.35900519933314556, "grad_norm": 0.1260475069284439, "learning_rate": 0.0005, "loss": 2.1067, "step": 94320 }, { "epoch": 0.3590432618012682, "grad_norm": 0.128811776638031, "learning_rate": 0.0005, "loss": 2.1139, "step": 94330 }, { "epoch": 0.35908132426939093, "grad_norm": 0.12514546513557434, "learning_rate": 0.0005, "loss": 2.1164, "step": 94340 }, { "epoch": 0.3591193867375136, "grad_norm": 0.12161403149366379, "learning_rate": 0.0005, "loss": 2.1251, "step": 94350 }, { "epoch": 0.3591574492056363, "grad_norm": 0.1290997415781021, "learning_rate": 0.0005, "loss": 2.1202, "step": 94360 }, { "epoch": 0.35919551167375896, "grad_norm": 0.12186875194311142, "learning_rate": 0.0005, "loss": 2.1232, "step": 94370 }, { "epoch": 0.3592335741418817, "grad_norm": 0.13205765187740326, "learning_rate": 0.0005, "loss": 2.1173, "step": 94380 }, { "epoch": 0.35927163661000433, "grad_norm": 0.12835603952407837, "learning_rate": 0.0005, "loss": 2.1121, "step": 94390 }, { "epoch": 0.35930969907812704, "grad_norm": 0.1322995126247406, "learning_rate": 0.0005, "loss": 2.129, "step": 94400 }, { "epoch": 0.3593477615462497, "grad_norm": 0.12406829744577408, "learning_rate": 0.0005, "loss": 2.1268, "step": 94410 }, { "epoch": 0.3593858240143724, "grad_norm": 0.12375843524932861, "learning_rate": 0.0005, "loss": 2.1132, "step": 94420 }, { "epoch": 0.35942388648249507, "grad_norm": 0.1409797966480255, "learning_rate": 0.0005, "loss": 2.113, "step": 94430 }, { "epoch": 0.3594619489506177, "grad_norm": 0.12111986428499222, "learning_rate": 0.0005, "loss": 2.119, "step": 94440 }, { "epoch": 0.35950001141874044, "grad_norm": 0.11834825575351715, "learning_rate": 0.0005, "loss": 2.1285, "step": 94450 }, { "epoch": 0.3595380738868631, "grad_norm": 0.12570960819721222, "learning_rate": 0.0005, "loss": 2.1382, "step": 94460 }, { "epoch": 0.3595761363549858, "grad_norm": 0.12362303584814072, "learning_rate": 0.0005, "loss": 2.1253, "step": 94470 }, { "epoch": 0.35961419882310847, "grad_norm": 0.1270592361688614, "learning_rate": 0.0005, "loss": 2.1176, "step": 94480 }, { "epoch": 0.3596522612912312, "grad_norm": 0.12973882257938385, "learning_rate": 0.0005, "loss": 2.1143, "step": 94490 }, { "epoch": 0.35969032375935384, "grad_norm": 0.12367293983697891, "learning_rate": 0.0005, "loss": 2.1143, "step": 94500 }, { "epoch": 0.35972838622747655, "grad_norm": 0.1229577362537384, "learning_rate": 0.0005, "loss": 2.119, "step": 94510 }, { "epoch": 0.3597664486955992, "grad_norm": 0.1185227707028389, "learning_rate": 0.0005, "loss": 2.1273, "step": 94520 }, { "epoch": 0.3598045111637219, "grad_norm": 0.11892779171466827, "learning_rate": 0.0005, "loss": 2.1108, "step": 94530 }, { "epoch": 0.3598425736318446, "grad_norm": 0.12867958843708038, "learning_rate": 0.0005, "loss": 2.1328, "step": 94540 }, { "epoch": 0.3598806360999673, "grad_norm": 0.14746211469173431, "learning_rate": 0.0005, "loss": 2.116, "step": 94550 }, { "epoch": 0.35991869856808995, "grad_norm": 0.12404578179121017, "learning_rate": 0.0005, "loss": 2.1235, "step": 94560 }, { "epoch": 0.35995676103621266, "grad_norm": 0.12142115831375122, "learning_rate": 0.0005, "loss": 2.1138, "step": 94570 }, { "epoch": 0.3599948235043353, "grad_norm": 0.12541688978672028, "learning_rate": 0.0005, "loss": 2.1235, "step": 94580 }, { "epoch": 0.36003288597245797, "grad_norm": 0.1460968405008316, "learning_rate": 0.0005, "loss": 2.1159, "step": 94590 }, { "epoch": 0.3600709484405807, "grad_norm": 0.12225353717803955, "learning_rate": 0.0005, "loss": 2.1264, "step": 94600 }, { "epoch": 0.36010901090870334, "grad_norm": 0.12600895762443542, "learning_rate": 0.0005, "loss": 2.1159, "step": 94610 }, { "epoch": 0.36014707337682605, "grad_norm": 0.13123752176761627, "learning_rate": 0.0005, "loss": 2.1297, "step": 94620 }, { "epoch": 0.3601851358449487, "grad_norm": 0.12744736671447754, "learning_rate": 0.0005, "loss": 2.1263, "step": 94630 }, { "epoch": 0.3602231983130714, "grad_norm": 0.11714782565832138, "learning_rate": 0.0005, "loss": 2.1213, "step": 94640 }, { "epoch": 0.3602612607811941, "grad_norm": 0.11957629770040512, "learning_rate": 0.0005, "loss": 2.1196, "step": 94650 }, { "epoch": 0.3602993232493168, "grad_norm": 0.12496066093444824, "learning_rate": 0.0005, "loss": 2.1226, "step": 94660 }, { "epoch": 0.36033738571743945, "grad_norm": 0.12312795221805573, "learning_rate": 0.0005, "loss": 2.1202, "step": 94670 }, { "epoch": 0.36037544818556216, "grad_norm": 0.1335715502500534, "learning_rate": 0.0005, "loss": 2.1213, "step": 94680 }, { "epoch": 0.3604135106536848, "grad_norm": 0.12358417361974716, "learning_rate": 0.0005, "loss": 2.1196, "step": 94690 }, { "epoch": 0.36045157312180753, "grad_norm": 0.11705588549375534, "learning_rate": 0.0005, "loss": 2.1193, "step": 94700 }, { "epoch": 0.3604896355899302, "grad_norm": 0.12685005366802216, "learning_rate": 0.0005, "loss": 2.1445, "step": 94710 }, { "epoch": 0.3605276980580529, "grad_norm": 0.13659855723381042, "learning_rate": 0.0005, "loss": 2.1213, "step": 94720 }, { "epoch": 0.36056576052617556, "grad_norm": 0.11961235851049423, "learning_rate": 0.0005, "loss": 2.118, "step": 94730 }, { "epoch": 0.3606038229942982, "grad_norm": 0.12005238980054855, "learning_rate": 0.0005, "loss": 2.1347, "step": 94740 }, { "epoch": 0.36064188546242093, "grad_norm": 0.13385742902755737, "learning_rate": 0.0005, "loss": 2.126, "step": 94750 }, { "epoch": 0.3606799479305436, "grad_norm": 0.1370827555656433, "learning_rate": 0.0005, "loss": 2.1245, "step": 94760 }, { "epoch": 0.3607180103986663, "grad_norm": 0.12504178285598755, "learning_rate": 0.0005, "loss": 2.1244, "step": 94770 }, { "epoch": 0.36075607286678896, "grad_norm": 0.127493217587471, "learning_rate": 0.0005, "loss": 2.1197, "step": 94780 }, { "epoch": 0.36079413533491167, "grad_norm": 0.11956959217786789, "learning_rate": 0.0005, "loss": 2.112, "step": 94790 }, { "epoch": 0.3608321978030343, "grad_norm": 0.1345692276954651, "learning_rate": 0.0005, "loss": 2.1233, "step": 94800 }, { "epoch": 0.36087026027115704, "grad_norm": 0.12534931302070618, "learning_rate": 0.0005, "loss": 2.113, "step": 94810 }, { "epoch": 0.3609083227392797, "grad_norm": 0.12161685526371002, "learning_rate": 0.0005, "loss": 2.1042, "step": 94820 }, { "epoch": 0.3609463852074024, "grad_norm": 0.13285884261131287, "learning_rate": 0.0005, "loss": 2.1081, "step": 94830 }, { "epoch": 0.36098444767552507, "grad_norm": 0.12510840594768524, "learning_rate": 0.0005, "loss": 2.1347, "step": 94840 }, { "epoch": 0.3610225101436478, "grad_norm": 0.11829493939876556, "learning_rate": 0.0005, "loss": 2.1248, "step": 94850 }, { "epoch": 0.36106057261177044, "grad_norm": 0.1258384734392166, "learning_rate": 0.0005, "loss": 2.1044, "step": 94860 }, { "epoch": 0.3610986350798931, "grad_norm": 0.1173219308257103, "learning_rate": 0.0005, "loss": 2.1217, "step": 94870 }, { "epoch": 0.3611366975480158, "grad_norm": 0.1262979507446289, "learning_rate": 0.0005, "loss": 2.1299, "step": 94880 }, { "epoch": 0.36117476001613846, "grad_norm": 0.12923787534236908, "learning_rate": 0.0005, "loss": 2.1233, "step": 94890 }, { "epoch": 0.3612128224842612, "grad_norm": 0.12655524909496307, "learning_rate": 0.0005, "loss": 2.1209, "step": 94900 }, { "epoch": 0.36125088495238383, "grad_norm": 0.12080805003643036, "learning_rate": 0.0005, "loss": 2.1218, "step": 94910 }, { "epoch": 0.36128894742050655, "grad_norm": 0.11724785715341568, "learning_rate": 0.0005, "loss": 2.1187, "step": 94920 }, { "epoch": 0.3613270098886292, "grad_norm": 0.12395143508911133, "learning_rate": 0.0005, "loss": 2.1268, "step": 94930 }, { "epoch": 0.3613650723567519, "grad_norm": 0.12734508514404297, "learning_rate": 0.0005, "loss": 2.1339, "step": 94940 }, { "epoch": 0.3614031348248746, "grad_norm": 0.11923015862703323, "learning_rate": 0.0005, "loss": 2.1024, "step": 94950 }, { "epoch": 0.3614411972929973, "grad_norm": 0.11938813328742981, "learning_rate": 0.0005, "loss": 2.1299, "step": 94960 }, { "epoch": 0.36147925976111994, "grad_norm": 0.12107255309820175, "learning_rate": 0.0005, "loss": 2.1269, "step": 94970 }, { "epoch": 0.36151732222924265, "grad_norm": 0.11876919120550156, "learning_rate": 0.0005, "loss": 2.1133, "step": 94980 }, { "epoch": 0.3615553846973653, "grad_norm": 0.1253984272480011, "learning_rate": 0.0005, "loss": 2.1261, "step": 94990 }, { "epoch": 0.361593447165488, "grad_norm": 0.15340100228786469, "learning_rate": 0.0005, "loss": 2.1195, "step": 95000 }, { "epoch": 0.3616315096336107, "grad_norm": 0.12683463096618652, "learning_rate": 0.0005, "loss": 2.1206, "step": 95010 }, { "epoch": 0.36166957210173334, "grad_norm": 0.13147446513175964, "learning_rate": 0.0005, "loss": 2.1161, "step": 95020 }, { "epoch": 0.36170763456985605, "grad_norm": 0.11206177622079849, "learning_rate": 0.0005, "loss": 2.1241, "step": 95030 }, { "epoch": 0.3617456970379787, "grad_norm": 0.1205473318696022, "learning_rate": 0.0005, "loss": 2.1181, "step": 95040 }, { "epoch": 0.3617837595061014, "grad_norm": 0.12132357805967331, "learning_rate": 0.0005, "loss": 2.1174, "step": 95050 }, { "epoch": 0.3618218219742241, "grad_norm": 0.1197810173034668, "learning_rate": 0.0005, "loss": 2.115, "step": 95060 }, { "epoch": 0.3618598844423468, "grad_norm": 0.12061937898397446, "learning_rate": 0.0005, "loss": 2.1147, "step": 95070 }, { "epoch": 0.36189794691046945, "grad_norm": 0.12413901835680008, "learning_rate": 0.0005, "loss": 2.1237, "step": 95080 }, { "epoch": 0.36193600937859216, "grad_norm": 0.13919556140899658, "learning_rate": 0.0005, "loss": 2.1057, "step": 95090 }, { "epoch": 0.3619740718467148, "grad_norm": 0.12080375850200653, "learning_rate": 0.0005, "loss": 2.1188, "step": 95100 }, { "epoch": 0.36201213431483753, "grad_norm": 0.12811049818992615, "learning_rate": 0.0005, "loss": 2.1377, "step": 95110 }, { "epoch": 0.3620501967829602, "grad_norm": 0.1322176456451416, "learning_rate": 0.0005, "loss": 2.1331, "step": 95120 }, { "epoch": 0.3620882592510829, "grad_norm": 0.13195084035396576, "learning_rate": 0.0005, "loss": 2.1093, "step": 95130 }, { "epoch": 0.36212632171920556, "grad_norm": 0.1257820427417755, "learning_rate": 0.0005, "loss": 2.1168, "step": 95140 }, { "epoch": 0.36216438418732827, "grad_norm": 0.12405338138341904, "learning_rate": 0.0005, "loss": 2.1227, "step": 95150 }, { "epoch": 0.3622024466554509, "grad_norm": 0.12733334302902222, "learning_rate": 0.0005, "loss": 2.1383, "step": 95160 }, { "epoch": 0.3622405091235736, "grad_norm": 0.14176435768604279, "learning_rate": 0.0005, "loss": 2.1314, "step": 95170 }, { "epoch": 0.3622785715916963, "grad_norm": 0.11715825647115707, "learning_rate": 0.0005, "loss": 2.11, "step": 95180 }, { "epoch": 0.36231663405981895, "grad_norm": 0.12377908080816269, "learning_rate": 0.0005, "loss": 2.1319, "step": 95190 }, { "epoch": 0.36235469652794167, "grad_norm": 0.1338021606206894, "learning_rate": 0.0005, "loss": 2.1166, "step": 95200 }, { "epoch": 0.3623927589960643, "grad_norm": 0.1310984194278717, "learning_rate": 0.0005, "loss": 2.1212, "step": 95210 }, { "epoch": 0.36243082146418704, "grad_norm": 0.13077163696289062, "learning_rate": 0.0005, "loss": 2.1179, "step": 95220 }, { "epoch": 0.3624688839323097, "grad_norm": 0.1179090291261673, "learning_rate": 0.0005, "loss": 2.1314, "step": 95230 }, { "epoch": 0.3625069464004324, "grad_norm": 0.12353909015655518, "learning_rate": 0.0005, "loss": 2.1112, "step": 95240 }, { "epoch": 0.36254500886855506, "grad_norm": 0.12887458503246307, "learning_rate": 0.0005, "loss": 2.1239, "step": 95250 }, { "epoch": 0.3625830713366778, "grad_norm": 0.12522532045841217, "learning_rate": 0.0005, "loss": 2.1203, "step": 95260 }, { "epoch": 0.36262113380480043, "grad_norm": 0.12584923207759857, "learning_rate": 0.0005, "loss": 2.1199, "step": 95270 }, { "epoch": 0.36265919627292315, "grad_norm": 0.12872099876403809, "learning_rate": 0.0005, "loss": 2.1298, "step": 95280 }, { "epoch": 0.3626972587410458, "grad_norm": 0.12893973290920258, "learning_rate": 0.0005, "loss": 2.1304, "step": 95290 }, { "epoch": 0.36273532120916846, "grad_norm": 0.12009762972593307, "learning_rate": 0.0005, "loss": 2.1171, "step": 95300 }, { "epoch": 0.3627733836772912, "grad_norm": 0.13374431431293488, "learning_rate": 0.0005, "loss": 2.1214, "step": 95310 }, { "epoch": 0.36281144614541383, "grad_norm": 0.11058095097541809, "learning_rate": 0.0005, "loss": 2.123, "step": 95320 }, { "epoch": 0.36284950861353654, "grad_norm": 0.13277429342269897, "learning_rate": 0.0005, "loss": 2.1278, "step": 95330 }, { "epoch": 0.3628875710816592, "grad_norm": 0.11551333218812943, "learning_rate": 0.0005, "loss": 2.117, "step": 95340 }, { "epoch": 0.3629256335497819, "grad_norm": 0.14505021274089813, "learning_rate": 0.0005, "loss": 2.1291, "step": 95350 }, { "epoch": 0.36296369601790457, "grad_norm": 0.12307033687829971, "learning_rate": 0.0005, "loss": 2.1222, "step": 95360 }, { "epoch": 0.3630017584860273, "grad_norm": 0.14109376072883606, "learning_rate": 0.0005, "loss": 2.1286, "step": 95370 }, { "epoch": 0.36303982095414994, "grad_norm": 0.12665635347366333, "learning_rate": 0.0005, "loss": 2.1054, "step": 95380 }, { "epoch": 0.36307788342227265, "grad_norm": 0.1169692873954773, "learning_rate": 0.0005, "loss": 2.1327, "step": 95390 }, { "epoch": 0.3631159458903953, "grad_norm": 0.1254958063364029, "learning_rate": 0.0005, "loss": 2.1191, "step": 95400 }, { "epoch": 0.363154008358518, "grad_norm": 0.13526247441768646, "learning_rate": 0.0005, "loss": 2.1304, "step": 95410 }, { "epoch": 0.3631920708266407, "grad_norm": 0.12990276515483856, "learning_rate": 0.0005, "loss": 2.1385, "step": 95420 }, { "epoch": 0.3632301332947634, "grad_norm": 0.1282341182231903, "learning_rate": 0.0005, "loss": 2.1064, "step": 95430 }, { "epoch": 0.36326819576288605, "grad_norm": 0.12656110525131226, "learning_rate": 0.0005, "loss": 2.1267, "step": 95440 }, { "epoch": 0.3633062582310087, "grad_norm": 0.11851034313440323, "learning_rate": 0.0005, "loss": 2.1237, "step": 95450 }, { "epoch": 0.3633443206991314, "grad_norm": 0.12452059984207153, "learning_rate": 0.0005, "loss": 2.1296, "step": 95460 }, { "epoch": 0.3633823831672541, "grad_norm": 0.1234283372759819, "learning_rate": 0.0005, "loss": 2.132, "step": 95470 }, { "epoch": 0.3634204456353768, "grad_norm": 0.126150444149971, "learning_rate": 0.0005, "loss": 2.1236, "step": 95480 }, { "epoch": 0.36345850810349944, "grad_norm": 0.12968218326568604, "learning_rate": 0.0005, "loss": 2.1124, "step": 95490 }, { "epoch": 0.36349657057162216, "grad_norm": 0.14867307245731354, "learning_rate": 0.0005, "loss": 2.1172, "step": 95500 }, { "epoch": 0.3635346330397448, "grad_norm": 0.12558214366436005, "learning_rate": 0.0005, "loss": 2.1156, "step": 95510 }, { "epoch": 0.3635726955078675, "grad_norm": 0.12529504299163818, "learning_rate": 0.0005, "loss": 2.1138, "step": 95520 }, { "epoch": 0.3636107579759902, "grad_norm": 0.11289634555578232, "learning_rate": 0.0005, "loss": 2.1198, "step": 95530 }, { "epoch": 0.3636488204441129, "grad_norm": 0.12560494244098663, "learning_rate": 0.0005, "loss": 2.1047, "step": 95540 }, { "epoch": 0.36368688291223555, "grad_norm": 0.13954435288906097, "learning_rate": 0.0005, "loss": 2.1409, "step": 95550 }, { "epoch": 0.36372494538035827, "grad_norm": 0.12962469458580017, "learning_rate": 0.0005, "loss": 2.1297, "step": 95560 }, { "epoch": 0.3637630078484809, "grad_norm": 0.12025895714759827, "learning_rate": 0.0005, "loss": 2.1124, "step": 95570 }, { "epoch": 0.36380107031660364, "grad_norm": 0.1236664354801178, "learning_rate": 0.0005, "loss": 2.1256, "step": 95580 }, { "epoch": 0.3638391327847263, "grad_norm": 0.12834928929805756, "learning_rate": 0.0005, "loss": 2.1114, "step": 95590 }, { "epoch": 0.36387719525284895, "grad_norm": 0.12065238505601883, "learning_rate": 0.0005, "loss": 2.1198, "step": 95600 }, { "epoch": 0.36391525772097166, "grad_norm": 0.12980596721172333, "learning_rate": 0.0005, "loss": 2.117, "step": 95610 }, { "epoch": 0.3639533201890943, "grad_norm": 0.12471610307693481, "learning_rate": 0.0005, "loss": 2.1258, "step": 95620 }, { "epoch": 0.36399138265721703, "grad_norm": 0.1360977739095688, "learning_rate": 0.0005, "loss": 2.1231, "step": 95630 }, { "epoch": 0.3640294451253397, "grad_norm": 0.12732630968093872, "learning_rate": 0.0005, "loss": 2.1266, "step": 95640 }, { "epoch": 0.3640675075934624, "grad_norm": 0.12732800841331482, "learning_rate": 0.0005, "loss": 2.1238, "step": 95650 }, { "epoch": 0.36410557006158506, "grad_norm": 0.12266694754362106, "learning_rate": 0.0005, "loss": 2.1269, "step": 95660 }, { "epoch": 0.3641436325297078, "grad_norm": 0.14251597225666046, "learning_rate": 0.0005, "loss": 2.1121, "step": 95670 }, { "epoch": 0.36418169499783043, "grad_norm": 0.1284990906715393, "learning_rate": 0.0005, "loss": 2.1183, "step": 95680 }, { "epoch": 0.36421975746595314, "grad_norm": 0.12726518511772156, "learning_rate": 0.0005, "loss": 2.1103, "step": 95690 }, { "epoch": 0.3642578199340758, "grad_norm": 0.12449892610311508, "learning_rate": 0.0005, "loss": 2.1229, "step": 95700 }, { "epoch": 0.3642958824021985, "grad_norm": 0.11769961565732956, "learning_rate": 0.0005, "loss": 2.1273, "step": 95710 }, { "epoch": 0.36433394487032117, "grad_norm": 0.12047085165977478, "learning_rate": 0.0005, "loss": 2.1239, "step": 95720 }, { "epoch": 0.3643720073384439, "grad_norm": 0.11222784221172333, "learning_rate": 0.0005, "loss": 2.1104, "step": 95730 }, { "epoch": 0.36441006980656654, "grad_norm": 0.12319763749837875, "learning_rate": 0.0005, "loss": 2.1185, "step": 95740 }, { "epoch": 0.3644481322746892, "grad_norm": 0.11679398268461227, "learning_rate": 0.0005, "loss": 2.1165, "step": 95750 }, { "epoch": 0.3644861947428119, "grad_norm": 0.1261565238237381, "learning_rate": 0.0005, "loss": 2.1133, "step": 95760 }, { "epoch": 0.36452425721093457, "grad_norm": 0.12498561292886734, "learning_rate": 0.0005, "loss": 2.1421, "step": 95770 }, { "epoch": 0.3645623196790573, "grad_norm": 0.12172937393188477, "learning_rate": 0.0005, "loss": 2.1087, "step": 95780 }, { "epoch": 0.36460038214717994, "grad_norm": 0.11952987313270569, "learning_rate": 0.0005, "loss": 2.1102, "step": 95790 }, { "epoch": 0.36463844461530265, "grad_norm": 0.12958964705467224, "learning_rate": 0.0005, "loss": 2.1171, "step": 95800 }, { "epoch": 0.3646765070834253, "grad_norm": 0.12317771464586258, "learning_rate": 0.0005, "loss": 2.121, "step": 95810 }, { "epoch": 0.364714569551548, "grad_norm": 0.11658994853496552, "learning_rate": 0.0005, "loss": 2.119, "step": 95820 }, { "epoch": 0.3647526320196707, "grad_norm": 0.11464099586009979, "learning_rate": 0.0005, "loss": 2.1208, "step": 95830 }, { "epoch": 0.3647906944877934, "grad_norm": 0.1333150416612625, "learning_rate": 0.0005, "loss": 2.1213, "step": 95840 }, { "epoch": 0.36482875695591604, "grad_norm": 0.1296979784965515, "learning_rate": 0.0005, "loss": 2.1185, "step": 95850 }, { "epoch": 0.36486681942403876, "grad_norm": 0.12045649439096451, "learning_rate": 0.0005, "loss": 2.1189, "step": 95860 }, { "epoch": 0.3649048818921614, "grad_norm": 0.12735560536384583, "learning_rate": 0.0005, "loss": 2.1229, "step": 95870 }, { "epoch": 0.36494294436028407, "grad_norm": 0.13241159915924072, "learning_rate": 0.0005, "loss": 2.1152, "step": 95880 }, { "epoch": 0.3649810068284068, "grad_norm": 0.1125500351190567, "learning_rate": 0.0005, "loss": 2.1255, "step": 95890 }, { "epoch": 0.36501906929652944, "grad_norm": 0.1374804973602295, "learning_rate": 0.0005, "loss": 2.1292, "step": 95900 }, { "epoch": 0.36505713176465215, "grad_norm": 0.11424729973077774, "learning_rate": 0.0005, "loss": 2.1092, "step": 95910 }, { "epoch": 0.3650951942327748, "grad_norm": 0.12060584872961044, "learning_rate": 0.0005, "loss": 2.1162, "step": 95920 }, { "epoch": 0.3651332567008975, "grad_norm": 0.12491651624441147, "learning_rate": 0.0005, "loss": 2.1124, "step": 95930 }, { "epoch": 0.3651713191690202, "grad_norm": 0.12484659254550934, "learning_rate": 0.0005, "loss": 2.1006, "step": 95940 }, { "epoch": 0.3652093816371429, "grad_norm": 0.11688201874494553, "learning_rate": 0.0005, "loss": 2.1151, "step": 95950 }, { "epoch": 0.36524744410526555, "grad_norm": 0.12248838692903519, "learning_rate": 0.0005, "loss": 2.1292, "step": 95960 }, { "epoch": 0.36528550657338826, "grad_norm": 0.12331437319517136, "learning_rate": 0.0005, "loss": 2.1394, "step": 95970 }, { "epoch": 0.3653235690415109, "grad_norm": 0.11248493939638138, "learning_rate": 0.0005, "loss": 2.1287, "step": 95980 }, { "epoch": 0.36536163150963363, "grad_norm": 0.12089279294013977, "learning_rate": 0.0005, "loss": 2.1194, "step": 95990 }, { "epoch": 0.3653996939777563, "grad_norm": 0.12537023425102234, "learning_rate": 0.0005, "loss": 2.1235, "step": 96000 }, { "epoch": 0.365437756445879, "grad_norm": 0.11896590143442154, "learning_rate": 0.0005, "loss": 2.1262, "step": 96010 }, { "epoch": 0.36547581891400166, "grad_norm": 0.12212135642766953, "learning_rate": 0.0005, "loss": 2.1275, "step": 96020 }, { "epoch": 0.3655138813821243, "grad_norm": 0.1215914934873581, "learning_rate": 0.0005, "loss": 2.124, "step": 96030 }, { "epoch": 0.36555194385024703, "grad_norm": 0.12669996917247772, "learning_rate": 0.0005, "loss": 2.1216, "step": 96040 }, { "epoch": 0.3655900063183697, "grad_norm": 0.11996883153915405, "learning_rate": 0.0005, "loss": 2.134, "step": 96050 }, { "epoch": 0.3656280687864924, "grad_norm": 0.11836867034435272, "learning_rate": 0.0005, "loss": 2.112, "step": 96060 }, { "epoch": 0.36566613125461506, "grad_norm": 0.1243426501750946, "learning_rate": 0.0005, "loss": 2.1147, "step": 96070 }, { "epoch": 0.36570419372273777, "grad_norm": 0.12142115086317062, "learning_rate": 0.0005, "loss": 2.1255, "step": 96080 }, { "epoch": 0.3657422561908604, "grad_norm": 0.12365086376667023, "learning_rate": 0.0005, "loss": 2.1248, "step": 96090 }, { "epoch": 0.36578031865898314, "grad_norm": 0.12358164042234421, "learning_rate": 0.0005, "loss": 2.1361, "step": 96100 }, { "epoch": 0.3658183811271058, "grad_norm": 0.13264146447181702, "learning_rate": 0.0005, "loss": 2.1337, "step": 96110 }, { "epoch": 0.3658564435952285, "grad_norm": 0.1243383064866066, "learning_rate": 0.0005, "loss": 2.108, "step": 96120 }, { "epoch": 0.36589450606335117, "grad_norm": 0.11371131241321564, "learning_rate": 0.0005, "loss": 2.1426, "step": 96130 }, { "epoch": 0.3659325685314739, "grad_norm": 0.10895078629255295, "learning_rate": 0.0005, "loss": 2.1392, "step": 96140 }, { "epoch": 0.36597063099959654, "grad_norm": 0.11829078942537308, "learning_rate": 0.0005, "loss": 2.1141, "step": 96150 }, { "epoch": 0.36600869346771925, "grad_norm": 0.12832999229431152, "learning_rate": 0.0005, "loss": 2.1318, "step": 96160 }, { "epoch": 0.3660467559358419, "grad_norm": 0.11920901387929916, "learning_rate": 0.0005, "loss": 2.1117, "step": 96170 }, { "epoch": 0.36608481840396456, "grad_norm": 0.129286989569664, "learning_rate": 0.0005, "loss": 2.1067, "step": 96180 }, { "epoch": 0.3661228808720873, "grad_norm": 0.12464142590761185, "learning_rate": 0.0005, "loss": 2.1321, "step": 96190 }, { "epoch": 0.36616094334020993, "grad_norm": 0.12098777294158936, "learning_rate": 0.0005, "loss": 2.1196, "step": 96200 }, { "epoch": 0.36619900580833264, "grad_norm": 0.11722879111766815, "learning_rate": 0.0005, "loss": 2.1187, "step": 96210 }, { "epoch": 0.3662370682764553, "grad_norm": 0.13351798057556152, "learning_rate": 0.0005, "loss": 2.1125, "step": 96220 }, { "epoch": 0.366275130744578, "grad_norm": 0.12770894169807434, "learning_rate": 0.0005, "loss": 2.1084, "step": 96230 }, { "epoch": 0.36631319321270067, "grad_norm": 0.11819111555814743, "learning_rate": 0.0005, "loss": 2.1231, "step": 96240 }, { "epoch": 0.3663512556808234, "grad_norm": 0.12946787476539612, "learning_rate": 0.0005, "loss": 2.1201, "step": 96250 }, { "epoch": 0.36638931814894604, "grad_norm": 0.1193518415093422, "learning_rate": 0.0005, "loss": 2.1271, "step": 96260 }, { "epoch": 0.36642738061706875, "grad_norm": 0.1213909387588501, "learning_rate": 0.0005, "loss": 2.1231, "step": 96270 }, { "epoch": 0.3664654430851914, "grad_norm": 0.13134944438934326, "learning_rate": 0.0005, "loss": 2.1224, "step": 96280 }, { "epoch": 0.3665035055533141, "grad_norm": 0.12113825231790543, "learning_rate": 0.0005, "loss": 2.1225, "step": 96290 }, { "epoch": 0.3665415680214368, "grad_norm": 0.11982209980487823, "learning_rate": 0.0005, "loss": 2.1186, "step": 96300 }, { "epoch": 0.36657963048955944, "grad_norm": 0.13760797679424286, "learning_rate": 0.0005, "loss": 2.1125, "step": 96310 }, { "epoch": 0.36661769295768215, "grad_norm": 0.12309988588094711, "learning_rate": 0.0005, "loss": 2.1176, "step": 96320 }, { "epoch": 0.3666557554258048, "grad_norm": 0.12127954512834549, "learning_rate": 0.0005, "loss": 2.1175, "step": 96330 }, { "epoch": 0.3666938178939275, "grad_norm": 0.12880012392997742, "learning_rate": 0.0005, "loss": 2.1141, "step": 96340 }, { "epoch": 0.3667318803620502, "grad_norm": 0.12680794298648834, "learning_rate": 0.0005, "loss": 2.1221, "step": 96350 }, { "epoch": 0.3667699428301729, "grad_norm": 0.12031067907810211, "learning_rate": 0.0005, "loss": 2.1205, "step": 96360 }, { "epoch": 0.36680800529829555, "grad_norm": 0.1160052940249443, "learning_rate": 0.0005, "loss": 2.1229, "step": 96370 }, { "epoch": 0.36684606776641826, "grad_norm": 0.12784983217716217, "learning_rate": 0.0005, "loss": 2.1197, "step": 96380 }, { "epoch": 0.3668841302345409, "grad_norm": 0.12080098688602448, "learning_rate": 0.0005, "loss": 2.1376, "step": 96390 }, { "epoch": 0.36692219270266363, "grad_norm": 0.14079447090625763, "learning_rate": 0.0005, "loss": 2.1113, "step": 96400 }, { "epoch": 0.3669602551707863, "grad_norm": 0.12535206973552704, "learning_rate": 0.0005, "loss": 2.1254, "step": 96410 }, { "epoch": 0.366998317638909, "grad_norm": 0.13023337721824646, "learning_rate": 0.0005, "loss": 2.1184, "step": 96420 }, { "epoch": 0.36703638010703166, "grad_norm": 0.11447879672050476, "learning_rate": 0.0005, "loss": 2.1272, "step": 96430 }, { "epoch": 0.36707444257515437, "grad_norm": 0.12368776649236679, "learning_rate": 0.0005, "loss": 2.1247, "step": 96440 }, { "epoch": 0.367112505043277, "grad_norm": 0.1184663251042366, "learning_rate": 0.0005, "loss": 2.1195, "step": 96450 }, { "epoch": 0.3671505675113997, "grad_norm": 0.1336742341518402, "learning_rate": 0.0005, "loss": 2.1182, "step": 96460 }, { "epoch": 0.3671886299795224, "grad_norm": 0.12953265011310577, "learning_rate": 0.0005, "loss": 2.1318, "step": 96470 }, { "epoch": 0.36722669244764505, "grad_norm": 0.12758080661296844, "learning_rate": 0.0005, "loss": 2.1112, "step": 96480 }, { "epoch": 0.36726475491576777, "grad_norm": 0.12322807312011719, "learning_rate": 0.0005, "loss": 2.1202, "step": 96490 }, { "epoch": 0.3673028173838904, "grad_norm": 0.12046000361442566, "learning_rate": 0.0005, "loss": 2.1092, "step": 96500 }, { "epoch": 0.36734087985201314, "grad_norm": 0.12544408440589905, "learning_rate": 0.0005, "loss": 2.1161, "step": 96510 }, { "epoch": 0.3673789423201358, "grad_norm": 0.1448279321193695, "learning_rate": 0.0005, "loss": 2.0988, "step": 96520 }, { "epoch": 0.3674170047882585, "grad_norm": 0.13275164365768433, "learning_rate": 0.0005, "loss": 2.1271, "step": 96530 }, { "epoch": 0.36745506725638116, "grad_norm": 0.11812781542539597, "learning_rate": 0.0005, "loss": 2.1129, "step": 96540 }, { "epoch": 0.3674931297245039, "grad_norm": 0.11883515864610672, "learning_rate": 0.0005, "loss": 2.1239, "step": 96550 }, { "epoch": 0.36753119219262653, "grad_norm": 0.12725114822387695, "learning_rate": 0.0005, "loss": 2.1185, "step": 96560 }, { "epoch": 0.36756925466074925, "grad_norm": 0.23462848365306854, "learning_rate": 0.0005, "loss": 2.1122, "step": 96570 }, { "epoch": 0.3676073171288719, "grad_norm": 0.12591758370399475, "learning_rate": 0.0005, "loss": 2.1214, "step": 96580 }, { "epoch": 0.3676453795969946, "grad_norm": 0.1261547952890396, "learning_rate": 0.0005, "loss": 2.1164, "step": 96590 }, { "epoch": 0.36768344206511727, "grad_norm": 0.11906035989522934, "learning_rate": 0.0005, "loss": 2.1084, "step": 96600 }, { "epoch": 0.36772150453323993, "grad_norm": 0.12901455163955688, "learning_rate": 0.0005, "loss": 2.1194, "step": 96610 }, { "epoch": 0.36775956700136264, "grad_norm": 0.12110006809234619, "learning_rate": 0.0005, "loss": 2.1181, "step": 96620 }, { "epoch": 0.3677976294694853, "grad_norm": 0.13759082555770874, "learning_rate": 0.0005, "loss": 2.1246, "step": 96630 }, { "epoch": 0.367835691937608, "grad_norm": 0.1277286857366562, "learning_rate": 0.0005, "loss": 2.1171, "step": 96640 }, { "epoch": 0.36787375440573067, "grad_norm": 0.12250223755836487, "learning_rate": 0.0005, "loss": 2.1263, "step": 96650 }, { "epoch": 0.3679118168738534, "grad_norm": 0.1282496452331543, "learning_rate": 0.0005, "loss": 2.1427, "step": 96660 }, { "epoch": 0.36794987934197604, "grad_norm": 0.12418284267187119, "learning_rate": 0.0005, "loss": 2.12, "step": 96670 }, { "epoch": 0.36798794181009875, "grad_norm": 0.12446416169404984, "learning_rate": 0.0005, "loss": 2.1139, "step": 96680 }, { "epoch": 0.3680260042782214, "grad_norm": 0.12037511169910431, "learning_rate": 0.0005, "loss": 2.1192, "step": 96690 }, { "epoch": 0.3680640667463441, "grad_norm": 0.12514127790927887, "learning_rate": 0.0005, "loss": 2.1237, "step": 96700 }, { "epoch": 0.3681021292144668, "grad_norm": 0.12305998057126999, "learning_rate": 0.0005, "loss": 2.1299, "step": 96710 }, { "epoch": 0.3681401916825895, "grad_norm": 0.12293251603841782, "learning_rate": 0.0005, "loss": 2.1176, "step": 96720 }, { "epoch": 0.36817825415071215, "grad_norm": 0.1332688182592392, "learning_rate": 0.0005, "loss": 2.1313, "step": 96730 }, { "epoch": 0.3682163166188348, "grad_norm": 0.13076834380626678, "learning_rate": 0.0005, "loss": 2.1213, "step": 96740 }, { "epoch": 0.3682543790869575, "grad_norm": 0.11440906673669815, "learning_rate": 0.0005, "loss": 2.1153, "step": 96750 }, { "epoch": 0.3682924415550802, "grad_norm": 0.12589947879314423, "learning_rate": 0.0005, "loss": 2.1165, "step": 96760 }, { "epoch": 0.3683305040232029, "grad_norm": 0.1251448392868042, "learning_rate": 0.0005, "loss": 2.1277, "step": 96770 }, { "epoch": 0.36836856649132554, "grad_norm": 0.12304763495922089, "learning_rate": 0.0005, "loss": 2.1207, "step": 96780 }, { "epoch": 0.36840662895944826, "grad_norm": 0.12357057631015778, "learning_rate": 0.0005, "loss": 2.1178, "step": 96790 }, { "epoch": 0.3684446914275709, "grad_norm": 0.12945859134197235, "learning_rate": 0.0005, "loss": 2.1169, "step": 96800 }, { "epoch": 0.3684827538956936, "grad_norm": 0.13287395238876343, "learning_rate": 0.0005, "loss": 2.1311, "step": 96810 }, { "epoch": 0.3685208163638163, "grad_norm": 0.11265331506729126, "learning_rate": 0.0005, "loss": 2.1171, "step": 96820 }, { "epoch": 0.368558878831939, "grad_norm": 0.12684476375579834, "learning_rate": 0.0005, "loss": 2.1181, "step": 96830 }, { "epoch": 0.36859694130006165, "grad_norm": 0.12589377164840698, "learning_rate": 0.0005, "loss": 2.1128, "step": 96840 }, { "epoch": 0.36863500376818437, "grad_norm": 0.12472228705883026, "learning_rate": 0.0005, "loss": 2.1216, "step": 96850 }, { "epoch": 0.368673066236307, "grad_norm": 0.12273325026035309, "learning_rate": 0.0005, "loss": 2.113, "step": 96860 }, { "epoch": 0.36871112870442974, "grad_norm": 0.11925730854272842, "learning_rate": 0.0005, "loss": 2.1237, "step": 96870 }, { "epoch": 0.3687491911725524, "grad_norm": 0.13875854015350342, "learning_rate": 0.0005, "loss": 2.1186, "step": 96880 }, { "epoch": 0.36878725364067505, "grad_norm": 0.13009196519851685, "learning_rate": 0.0005, "loss": 2.1129, "step": 96890 }, { "epoch": 0.36882531610879776, "grad_norm": 0.11982984095811844, "learning_rate": 0.0005, "loss": 2.1153, "step": 96900 }, { "epoch": 0.3688633785769204, "grad_norm": 0.12012947350740433, "learning_rate": 0.0005, "loss": 2.1242, "step": 96910 }, { "epoch": 0.36890144104504313, "grad_norm": 0.145431786775589, "learning_rate": 0.0005, "loss": 2.1211, "step": 96920 }, { "epoch": 0.3689395035131658, "grad_norm": 0.11987671256065369, "learning_rate": 0.0005, "loss": 2.1185, "step": 96930 }, { "epoch": 0.3689775659812885, "grad_norm": 0.13114087283611298, "learning_rate": 0.0005, "loss": 2.1312, "step": 96940 }, { "epoch": 0.36901562844941116, "grad_norm": 0.12345308065414429, "learning_rate": 0.0005, "loss": 2.1194, "step": 96950 }, { "epoch": 0.36905369091753387, "grad_norm": 0.11484279483556747, "learning_rate": 0.0005, "loss": 2.123, "step": 96960 }, { "epoch": 0.36909175338565653, "grad_norm": 0.12712082266807556, "learning_rate": 0.0005, "loss": 2.1213, "step": 96970 }, { "epoch": 0.36912981585377924, "grad_norm": 0.11770997196435928, "learning_rate": 0.0005, "loss": 2.1222, "step": 96980 }, { "epoch": 0.3691678783219019, "grad_norm": 0.11867061257362366, "learning_rate": 0.0005, "loss": 2.1143, "step": 96990 }, { "epoch": 0.3692059407900246, "grad_norm": 0.11773667484521866, "learning_rate": 0.0005, "loss": 2.1197, "step": 97000 }, { "epoch": 0.36924400325814727, "grad_norm": 0.12527793645858765, "learning_rate": 0.0005, "loss": 2.1389, "step": 97010 }, { "epoch": 0.36928206572627, "grad_norm": 0.12575988471508026, "learning_rate": 0.0005, "loss": 2.1052, "step": 97020 }, { "epoch": 0.36932012819439264, "grad_norm": 0.11121707409620285, "learning_rate": 0.0005, "loss": 2.1007, "step": 97030 }, { "epoch": 0.3693581906625153, "grad_norm": 0.12394191324710846, "learning_rate": 0.0005, "loss": 2.1191, "step": 97040 }, { "epoch": 0.369396253130638, "grad_norm": 0.1266230195760727, "learning_rate": 0.0005, "loss": 2.1147, "step": 97050 }, { "epoch": 0.36943431559876067, "grad_norm": 0.11939870566129684, "learning_rate": 0.0005, "loss": 2.1155, "step": 97060 }, { "epoch": 0.3694723780668834, "grad_norm": 0.11312147229909897, "learning_rate": 0.0005, "loss": 2.1116, "step": 97070 }, { "epoch": 0.36951044053500604, "grad_norm": 0.11626769602298737, "learning_rate": 0.0005, "loss": 2.1176, "step": 97080 }, { "epoch": 0.36954850300312875, "grad_norm": 0.1269378960132599, "learning_rate": 0.0005, "loss": 2.1247, "step": 97090 }, { "epoch": 0.3695865654712514, "grad_norm": 0.12174524366855621, "learning_rate": 0.0005, "loss": 2.1187, "step": 97100 }, { "epoch": 0.3696246279393741, "grad_norm": 0.12147214263677597, "learning_rate": 0.0005, "loss": 2.1313, "step": 97110 }, { "epoch": 0.3696626904074968, "grad_norm": 0.11966443061828613, "learning_rate": 0.0005, "loss": 2.1143, "step": 97120 }, { "epoch": 0.3697007528756195, "grad_norm": 0.12414590269327164, "learning_rate": 0.0005, "loss": 2.1123, "step": 97130 }, { "epoch": 0.36973881534374214, "grad_norm": 0.1292896568775177, "learning_rate": 0.0005, "loss": 2.1339, "step": 97140 }, { "epoch": 0.36977687781186486, "grad_norm": 0.1209682747721672, "learning_rate": 0.0005, "loss": 2.1185, "step": 97150 }, { "epoch": 0.3698149402799875, "grad_norm": 0.1336567997932434, "learning_rate": 0.0005, "loss": 2.1272, "step": 97160 }, { "epoch": 0.36985300274811017, "grad_norm": 0.11845371127128601, "learning_rate": 0.0005, "loss": 2.1169, "step": 97170 }, { "epoch": 0.3698910652162329, "grad_norm": 0.12234880775213242, "learning_rate": 0.0005, "loss": 2.1115, "step": 97180 }, { "epoch": 0.36992912768435554, "grad_norm": 0.13724114000797272, "learning_rate": 0.0005, "loss": 2.1176, "step": 97190 }, { "epoch": 0.36996719015247825, "grad_norm": 0.13921892642974854, "learning_rate": 0.0005, "loss": 2.1357, "step": 97200 }, { "epoch": 0.3700052526206009, "grad_norm": 0.12601223587989807, "learning_rate": 0.0005, "loss": 2.1149, "step": 97210 }, { "epoch": 0.3700433150887236, "grad_norm": 0.13368086516857147, "learning_rate": 0.0005, "loss": 2.1335, "step": 97220 }, { "epoch": 0.3700813775568463, "grad_norm": 0.11279631406068802, "learning_rate": 0.0005, "loss": 2.1335, "step": 97230 }, { "epoch": 0.370119440024969, "grad_norm": 0.11834455281496048, "learning_rate": 0.0005, "loss": 2.1279, "step": 97240 }, { "epoch": 0.37015750249309165, "grad_norm": 0.12333115190267563, "learning_rate": 0.0005, "loss": 2.1349, "step": 97250 }, { "epoch": 0.37019556496121436, "grad_norm": 0.12108000367879868, "learning_rate": 0.0005, "loss": 2.1194, "step": 97260 }, { "epoch": 0.370233627429337, "grad_norm": 0.1163671612739563, "learning_rate": 0.0005, "loss": 2.1055, "step": 97270 }, { "epoch": 0.37027168989745973, "grad_norm": 0.15014775097370148, "learning_rate": 0.0005, "loss": 2.1208, "step": 97280 }, { "epoch": 0.3703097523655824, "grad_norm": 0.14235511422157288, "learning_rate": 0.0005, "loss": 2.1153, "step": 97290 }, { "epoch": 0.3703478148337051, "grad_norm": 0.126591756939888, "learning_rate": 0.0005, "loss": 2.1201, "step": 97300 }, { "epoch": 0.37038587730182776, "grad_norm": 0.12709274888038635, "learning_rate": 0.0005, "loss": 2.1293, "step": 97310 }, { "epoch": 0.3704239397699504, "grad_norm": 0.1244237869977951, "learning_rate": 0.0005, "loss": 2.1259, "step": 97320 }, { "epoch": 0.37046200223807313, "grad_norm": 0.11576223373413086, "learning_rate": 0.0005, "loss": 2.1176, "step": 97330 }, { "epoch": 0.3705000647061958, "grad_norm": 0.12289880961179733, "learning_rate": 0.0005, "loss": 2.1042, "step": 97340 }, { "epoch": 0.3705381271743185, "grad_norm": 0.12008960545063019, "learning_rate": 0.0005, "loss": 2.1278, "step": 97350 }, { "epoch": 0.37057618964244116, "grad_norm": 0.1161564290523529, "learning_rate": 0.0005, "loss": 2.1178, "step": 97360 }, { "epoch": 0.37061425211056387, "grad_norm": 0.1414661556482315, "learning_rate": 0.0005, "loss": 2.1306, "step": 97370 }, { "epoch": 0.3706523145786865, "grad_norm": 0.13043402135372162, "learning_rate": 0.0005, "loss": 2.1191, "step": 97380 }, { "epoch": 0.37069037704680924, "grad_norm": 0.12933848798274994, "learning_rate": 0.0005, "loss": 2.1267, "step": 97390 }, { "epoch": 0.3707284395149319, "grad_norm": 0.13210655748844147, "learning_rate": 0.0005, "loss": 2.1139, "step": 97400 }, { "epoch": 0.3707665019830546, "grad_norm": 0.14086242020130157, "learning_rate": 0.0005, "loss": 2.1165, "step": 97410 }, { "epoch": 0.37080456445117727, "grad_norm": 0.14640408754348755, "learning_rate": 0.0005, "loss": 2.1412, "step": 97420 }, { "epoch": 0.3708426269193, "grad_norm": 0.1357678920030594, "learning_rate": 0.0005, "loss": 2.1266, "step": 97430 }, { "epoch": 0.37088068938742264, "grad_norm": 0.12262952327728271, "learning_rate": 0.0005, "loss": 2.1096, "step": 97440 }, { "epoch": 0.37091875185554535, "grad_norm": 0.1472138911485672, "learning_rate": 0.0005, "loss": 2.1321, "step": 97450 }, { "epoch": 0.370956814323668, "grad_norm": 0.11382775753736496, "learning_rate": 0.0005, "loss": 2.1145, "step": 97460 }, { "epoch": 0.37099487679179066, "grad_norm": 0.13012194633483887, "learning_rate": 0.0005, "loss": 2.1275, "step": 97470 }, { "epoch": 0.3710329392599134, "grad_norm": 0.12605558335781097, "learning_rate": 0.0005, "loss": 2.1278, "step": 97480 }, { "epoch": 0.37107100172803603, "grad_norm": 0.11173474043607712, "learning_rate": 0.0005, "loss": 2.1363, "step": 97490 }, { "epoch": 0.37110906419615874, "grad_norm": 0.14552360773086548, "learning_rate": 0.0005, "loss": 2.114, "step": 97500 }, { "epoch": 0.3711471266642814, "grad_norm": 0.11996559053659439, "learning_rate": 0.0005, "loss": 2.1243, "step": 97510 }, { "epoch": 0.3711851891324041, "grad_norm": 0.12549810111522675, "learning_rate": 0.0005, "loss": 2.1182, "step": 97520 }, { "epoch": 0.37122325160052677, "grad_norm": 0.13002847135066986, "learning_rate": 0.0005, "loss": 2.1403, "step": 97530 }, { "epoch": 0.3712613140686495, "grad_norm": 0.11643203347921371, "learning_rate": 0.0005, "loss": 2.1106, "step": 97540 }, { "epoch": 0.37129937653677214, "grad_norm": 0.1239471510052681, "learning_rate": 0.0005, "loss": 2.1127, "step": 97550 }, { "epoch": 0.37133743900489485, "grad_norm": 0.11400319635868073, "learning_rate": 0.0005, "loss": 2.1124, "step": 97560 }, { "epoch": 0.3713755014730175, "grad_norm": 0.11754616349935532, "learning_rate": 0.0005, "loss": 2.1156, "step": 97570 }, { "epoch": 0.3714135639411402, "grad_norm": 0.12323588132858276, "learning_rate": 0.0005, "loss": 2.1386, "step": 97580 }, { "epoch": 0.3714516264092629, "grad_norm": 0.12451520562171936, "learning_rate": 0.0005, "loss": 2.1205, "step": 97590 }, { "epoch": 0.37148968887738554, "grad_norm": 0.11871838569641113, "learning_rate": 0.0005, "loss": 2.1274, "step": 97600 }, { "epoch": 0.37152775134550825, "grad_norm": 0.1297159045934677, "learning_rate": 0.0005, "loss": 2.1187, "step": 97610 }, { "epoch": 0.3715658138136309, "grad_norm": 0.1391223818063736, "learning_rate": 0.0005, "loss": 2.1129, "step": 97620 }, { "epoch": 0.3716038762817536, "grad_norm": 0.1261225789785385, "learning_rate": 0.0005, "loss": 2.1241, "step": 97630 }, { "epoch": 0.3716419387498763, "grad_norm": 0.12182370573282242, "learning_rate": 0.0005, "loss": 2.1296, "step": 97640 }, { "epoch": 0.371680001217999, "grad_norm": 0.12699827551841736, "learning_rate": 0.0005, "loss": 2.1229, "step": 97650 }, { "epoch": 0.37171806368612165, "grad_norm": 0.11652454733848572, "learning_rate": 0.0005, "loss": 2.1133, "step": 97660 }, { "epoch": 0.37175612615424436, "grad_norm": 0.1208571121096611, "learning_rate": 0.0005, "loss": 2.124, "step": 97670 }, { "epoch": 0.371794188622367, "grad_norm": 0.13136690855026245, "learning_rate": 0.0005, "loss": 2.1204, "step": 97680 }, { "epoch": 0.37183225109048973, "grad_norm": 0.11939571052789688, "learning_rate": 0.0005, "loss": 2.1109, "step": 97690 }, { "epoch": 0.3718703135586124, "grad_norm": 0.12099155783653259, "learning_rate": 0.0005, "loss": 2.137, "step": 97700 }, { "epoch": 0.3719083760267351, "grad_norm": 0.12223554402589798, "learning_rate": 0.0005, "loss": 2.1141, "step": 97710 }, { "epoch": 0.37194643849485776, "grad_norm": 0.12005815654993057, "learning_rate": 0.0005, "loss": 2.1134, "step": 97720 }, { "epoch": 0.37198450096298047, "grad_norm": 0.12749920785427094, "learning_rate": 0.0005, "loss": 2.1384, "step": 97730 }, { "epoch": 0.3720225634311031, "grad_norm": 0.12131239473819733, "learning_rate": 0.0005, "loss": 2.1234, "step": 97740 }, { "epoch": 0.3720606258992258, "grad_norm": 0.12197811156511307, "learning_rate": 0.0005, "loss": 2.1171, "step": 97750 }, { "epoch": 0.3720986883673485, "grad_norm": 0.13216941058635712, "learning_rate": 0.0005, "loss": 2.1065, "step": 97760 }, { "epoch": 0.37213675083547115, "grad_norm": 0.11797917634248734, "learning_rate": 0.0005, "loss": 2.1153, "step": 97770 }, { "epoch": 0.37217481330359387, "grad_norm": 0.1313115358352661, "learning_rate": 0.0005, "loss": 2.1194, "step": 97780 }, { "epoch": 0.3722128757717165, "grad_norm": 0.11875154823064804, "learning_rate": 0.0005, "loss": 2.1215, "step": 97790 }, { "epoch": 0.37225093823983924, "grad_norm": 0.12649646401405334, "learning_rate": 0.0005, "loss": 2.1369, "step": 97800 }, { "epoch": 0.3722890007079619, "grad_norm": 0.128423273563385, "learning_rate": 0.0005, "loss": 2.1297, "step": 97810 }, { "epoch": 0.3723270631760846, "grad_norm": 0.1393405646085739, "learning_rate": 0.0005, "loss": 2.1255, "step": 97820 }, { "epoch": 0.37236512564420726, "grad_norm": 0.13227048516273499, "learning_rate": 0.0005, "loss": 2.1441, "step": 97830 }, { "epoch": 0.37240318811233, "grad_norm": 0.13055221736431122, "learning_rate": 0.0005, "loss": 2.1241, "step": 97840 }, { "epoch": 0.37244125058045263, "grad_norm": 0.11716745048761368, "learning_rate": 0.0005, "loss": 2.1138, "step": 97850 }, { "epoch": 0.37247931304857534, "grad_norm": 0.12659123539924622, "learning_rate": 0.0005, "loss": 2.12, "step": 97860 }, { "epoch": 0.372517375516698, "grad_norm": 0.13839322328567505, "learning_rate": 0.0005, "loss": 2.1265, "step": 97870 }, { "epoch": 0.3725554379848207, "grad_norm": 0.12499116361141205, "learning_rate": 0.0005, "loss": 2.1307, "step": 97880 }, { "epoch": 0.37259350045294337, "grad_norm": 0.12236038595438004, "learning_rate": 0.0005, "loss": 2.1171, "step": 97890 }, { "epoch": 0.37263156292106603, "grad_norm": 0.1392621546983719, "learning_rate": 0.0005, "loss": 2.1226, "step": 97900 }, { "epoch": 0.37266962538918874, "grad_norm": 0.1261049211025238, "learning_rate": 0.0005, "loss": 2.1294, "step": 97910 }, { "epoch": 0.3727076878573114, "grad_norm": 0.15030497312545776, "learning_rate": 0.0005, "loss": 2.1226, "step": 97920 }, { "epoch": 0.3727457503254341, "grad_norm": 0.1456809937953949, "learning_rate": 0.0005, "loss": 2.1116, "step": 97930 }, { "epoch": 0.37278381279355677, "grad_norm": 0.14672748744487762, "learning_rate": 0.0005, "loss": 2.1037, "step": 97940 }, { "epoch": 0.3728218752616795, "grad_norm": 0.12252899259328842, "learning_rate": 0.0005, "loss": 2.1171, "step": 97950 }, { "epoch": 0.37285993772980214, "grad_norm": 0.14606109261512756, "learning_rate": 0.0005, "loss": 2.1265, "step": 97960 }, { "epoch": 0.37289800019792485, "grad_norm": 0.12018848210573196, "learning_rate": 0.0005, "loss": 2.1129, "step": 97970 }, { "epoch": 0.3729360626660475, "grad_norm": 0.1229386255145073, "learning_rate": 0.0005, "loss": 2.1232, "step": 97980 }, { "epoch": 0.3729741251341702, "grad_norm": 0.11852700263261795, "learning_rate": 0.0005, "loss": 2.1267, "step": 97990 }, { "epoch": 0.3730121876022929, "grad_norm": 0.13460673391819, "learning_rate": 0.0005, "loss": 2.1268, "step": 98000 }, { "epoch": 0.3730502500704156, "grad_norm": 0.140608549118042, "learning_rate": 0.0005, "loss": 2.1146, "step": 98010 }, { "epoch": 0.37308831253853825, "grad_norm": 0.11561145633459091, "learning_rate": 0.0005, "loss": 2.1141, "step": 98020 }, { "epoch": 0.3731263750066609, "grad_norm": 0.1210857629776001, "learning_rate": 0.0005, "loss": 2.1195, "step": 98030 }, { "epoch": 0.3731644374747836, "grad_norm": 0.1265321522951126, "learning_rate": 0.0005, "loss": 2.1265, "step": 98040 }, { "epoch": 0.3732024999429063, "grad_norm": 0.12456094473600388, "learning_rate": 0.0005, "loss": 2.1172, "step": 98050 }, { "epoch": 0.373240562411029, "grad_norm": 0.1190691590309143, "learning_rate": 0.0005, "loss": 2.115, "step": 98060 }, { "epoch": 0.37327862487915164, "grad_norm": 0.11460399627685547, "learning_rate": 0.0005, "loss": 2.1222, "step": 98070 }, { "epoch": 0.37331668734727436, "grad_norm": 0.1223832294344902, "learning_rate": 0.0005, "loss": 2.1291, "step": 98080 }, { "epoch": 0.373354749815397, "grad_norm": 0.12511587142944336, "learning_rate": 0.0005, "loss": 2.1215, "step": 98090 }, { "epoch": 0.3733928122835197, "grad_norm": 0.13690148293972015, "learning_rate": 0.0005, "loss": 2.133, "step": 98100 }, { "epoch": 0.3734308747516424, "grad_norm": 0.12537544965744019, "learning_rate": 0.0005, "loss": 2.125, "step": 98110 }, { "epoch": 0.3734689372197651, "grad_norm": 0.13309504091739655, "learning_rate": 0.0005, "loss": 2.1234, "step": 98120 }, { "epoch": 0.37350699968788775, "grad_norm": 0.12549757957458496, "learning_rate": 0.0005, "loss": 2.124, "step": 98130 }, { "epoch": 0.37354506215601047, "grad_norm": 0.11821205914020538, "learning_rate": 0.0005, "loss": 2.1174, "step": 98140 }, { "epoch": 0.3735831246241331, "grad_norm": 0.12350712716579437, "learning_rate": 0.0005, "loss": 2.1152, "step": 98150 }, { "epoch": 0.37362118709225584, "grad_norm": 0.12245646119117737, "learning_rate": 0.0005, "loss": 2.1219, "step": 98160 }, { "epoch": 0.3736592495603785, "grad_norm": 0.11302000284194946, "learning_rate": 0.0005, "loss": 2.1188, "step": 98170 }, { "epoch": 0.37369731202850115, "grad_norm": 0.12072186172008514, "learning_rate": 0.0005, "loss": 2.1371, "step": 98180 }, { "epoch": 0.37373537449662386, "grad_norm": 0.12735703587532043, "learning_rate": 0.0005, "loss": 2.1142, "step": 98190 }, { "epoch": 0.3737734369647465, "grad_norm": 0.12989743053913116, "learning_rate": 0.0005, "loss": 2.111, "step": 98200 }, { "epoch": 0.37381149943286923, "grad_norm": 0.12953096628189087, "learning_rate": 0.0005, "loss": 2.124, "step": 98210 }, { "epoch": 0.3738495619009919, "grad_norm": 0.12121468782424927, "learning_rate": 0.0005, "loss": 2.1241, "step": 98220 }, { "epoch": 0.3738876243691146, "grad_norm": 0.11379808187484741, "learning_rate": 0.0005, "loss": 2.1246, "step": 98230 }, { "epoch": 0.37392568683723726, "grad_norm": 0.12749461829662323, "learning_rate": 0.0005, "loss": 2.125, "step": 98240 }, { "epoch": 0.37396374930535997, "grad_norm": 0.133274644613266, "learning_rate": 0.0005, "loss": 2.1154, "step": 98250 }, { "epoch": 0.37400181177348263, "grad_norm": 0.14356647431850433, "learning_rate": 0.0005, "loss": 2.1206, "step": 98260 }, { "epoch": 0.37403987424160534, "grad_norm": 0.1227138563990593, "learning_rate": 0.0005, "loss": 2.1209, "step": 98270 }, { "epoch": 0.374077936709728, "grad_norm": 0.13839580118656158, "learning_rate": 0.0005, "loss": 2.1302, "step": 98280 }, { "epoch": 0.3741159991778507, "grad_norm": 0.15715768933296204, "learning_rate": 0.0005, "loss": 2.127, "step": 98290 }, { "epoch": 0.37415406164597337, "grad_norm": 0.13170473277568817, "learning_rate": 0.0005, "loss": 2.1324, "step": 98300 }, { "epoch": 0.3741921241140961, "grad_norm": 0.13743096590042114, "learning_rate": 0.0005, "loss": 2.1277, "step": 98310 }, { "epoch": 0.37423018658221874, "grad_norm": 1.8420484066009521, "learning_rate": 0.0005, "loss": 2.126, "step": 98320 }, { "epoch": 0.3742682490503414, "grad_norm": 0.1318242996931076, "learning_rate": 0.0005, "loss": 2.1178, "step": 98330 }, { "epoch": 0.3743063115184641, "grad_norm": 0.11519080400466919, "learning_rate": 0.0005, "loss": 2.1157, "step": 98340 }, { "epoch": 0.37434437398658676, "grad_norm": 0.13783110678195953, "learning_rate": 0.0005, "loss": 2.1226, "step": 98350 }, { "epoch": 0.3743824364547095, "grad_norm": 0.18156218528747559, "learning_rate": 0.0005, "loss": 2.1185, "step": 98360 }, { "epoch": 0.37442049892283213, "grad_norm": 0.13677464425563812, "learning_rate": 0.0005, "loss": 2.1213, "step": 98370 }, { "epoch": 0.37445856139095485, "grad_norm": 0.12956520915031433, "learning_rate": 0.0005, "loss": 2.1194, "step": 98380 }, { "epoch": 0.3744966238590775, "grad_norm": 0.1192094013094902, "learning_rate": 0.0005, "loss": 2.14, "step": 98390 }, { "epoch": 0.3745346863272002, "grad_norm": 0.1302703469991684, "learning_rate": 0.0005, "loss": 2.1219, "step": 98400 }, { "epoch": 0.3745727487953229, "grad_norm": 0.11764563620090485, "learning_rate": 0.0005, "loss": 2.1178, "step": 98410 }, { "epoch": 0.3746108112634456, "grad_norm": 0.12831349670886993, "learning_rate": 0.0005, "loss": 2.1259, "step": 98420 }, { "epoch": 0.37464887373156824, "grad_norm": 0.1260351687669754, "learning_rate": 0.0005, "loss": 2.1223, "step": 98430 }, { "epoch": 0.37468693619969096, "grad_norm": 0.12819142639636993, "learning_rate": 0.0005, "loss": 2.1276, "step": 98440 }, { "epoch": 0.3747249986678136, "grad_norm": 0.14196482300758362, "learning_rate": 0.0005, "loss": 2.1191, "step": 98450 }, { "epoch": 0.3747630611359363, "grad_norm": 0.12788952887058258, "learning_rate": 0.0005, "loss": 2.1144, "step": 98460 }, { "epoch": 0.374801123604059, "grad_norm": 0.13228143751621246, "learning_rate": 0.0005, "loss": 2.1262, "step": 98470 }, { "epoch": 0.37483918607218164, "grad_norm": 0.1315939873456955, "learning_rate": 0.0005, "loss": 2.1178, "step": 98480 }, { "epoch": 0.37487724854030435, "grad_norm": 0.12740527093410492, "learning_rate": 0.0005, "loss": 2.1146, "step": 98490 }, { "epoch": 0.374915311008427, "grad_norm": 0.12354233860969543, "learning_rate": 0.0005, "loss": 2.1311, "step": 98500 }, { "epoch": 0.3749533734765497, "grad_norm": 0.1223762109875679, "learning_rate": 0.0005, "loss": 2.1052, "step": 98510 }, { "epoch": 0.3749914359446724, "grad_norm": 0.13310754299163818, "learning_rate": 0.0005, "loss": 2.1174, "step": 98520 }, { "epoch": 0.3750294984127951, "grad_norm": 0.12507161498069763, "learning_rate": 0.0005, "loss": 2.1187, "step": 98530 }, { "epoch": 0.37506756088091775, "grad_norm": 0.1294754594564438, "learning_rate": 0.0005, "loss": 2.1236, "step": 98540 }, { "epoch": 0.37510562334904046, "grad_norm": 0.13023291528224945, "learning_rate": 0.0005, "loss": 2.1255, "step": 98550 }, { "epoch": 0.3751436858171631, "grad_norm": 0.14100757241249084, "learning_rate": 0.0005, "loss": 2.1176, "step": 98560 }, { "epoch": 0.37518174828528583, "grad_norm": 0.12759803235530853, "learning_rate": 0.0005, "loss": 2.1263, "step": 98570 }, { "epoch": 0.3752198107534085, "grad_norm": 0.12670016288757324, "learning_rate": 0.0005, "loss": 2.1131, "step": 98580 }, { "epoch": 0.3752578732215312, "grad_norm": 0.11383096128702164, "learning_rate": 0.0005, "loss": 2.1201, "step": 98590 }, { "epoch": 0.37529593568965386, "grad_norm": 0.11552945524454117, "learning_rate": 0.0005, "loss": 2.119, "step": 98600 }, { "epoch": 0.3753339981577765, "grad_norm": 0.10721899569034576, "learning_rate": 0.0005, "loss": 2.1311, "step": 98610 }, { "epoch": 0.37537206062589923, "grad_norm": 0.13379792869091034, "learning_rate": 0.0005, "loss": 2.1112, "step": 98620 }, { "epoch": 0.3754101230940219, "grad_norm": 0.11973940581083298, "learning_rate": 0.0005, "loss": 2.0989, "step": 98630 }, { "epoch": 0.3754481855621446, "grad_norm": 0.12449324131011963, "learning_rate": 0.0005, "loss": 2.1237, "step": 98640 }, { "epoch": 0.37548624803026726, "grad_norm": 0.11978937685489655, "learning_rate": 0.0005, "loss": 2.1186, "step": 98650 }, { "epoch": 0.37552431049838997, "grad_norm": 0.128646120429039, "learning_rate": 0.0005, "loss": 2.1277, "step": 98660 }, { "epoch": 0.3755623729665126, "grad_norm": 0.13415206968784332, "learning_rate": 0.0005, "loss": 2.1469, "step": 98670 }, { "epoch": 0.37560043543463534, "grad_norm": 0.12093635648488998, "learning_rate": 0.0005, "loss": 2.1276, "step": 98680 }, { "epoch": 0.375638497902758, "grad_norm": 0.11120793223381042, "learning_rate": 0.0005, "loss": 2.1135, "step": 98690 }, { "epoch": 0.3756765603708807, "grad_norm": 0.11673329770565033, "learning_rate": 0.0005, "loss": 2.1293, "step": 98700 }, { "epoch": 0.37571462283900336, "grad_norm": 0.1250150054693222, "learning_rate": 0.0005, "loss": 2.1256, "step": 98710 }, { "epoch": 0.3757526853071261, "grad_norm": 0.13988950848579407, "learning_rate": 0.0005, "loss": 2.1123, "step": 98720 }, { "epoch": 0.37579074777524873, "grad_norm": 0.13316217064857483, "learning_rate": 0.0005, "loss": 2.111, "step": 98730 }, { "epoch": 0.37582881024337145, "grad_norm": 0.13398806750774384, "learning_rate": 0.0005, "loss": 2.1094, "step": 98740 }, { "epoch": 0.3758668727114941, "grad_norm": 0.1263556182384491, "learning_rate": 0.0005, "loss": 2.1114, "step": 98750 }, { "epoch": 0.37590493517961676, "grad_norm": 0.12224937975406647, "learning_rate": 0.0005, "loss": 2.1165, "step": 98760 }, { "epoch": 0.3759429976477395, "grad_norm": 0.12698322534561157, "learning_rate": 0.0005, "loss": 2.1313, "step": 98770 }, { "epoch": 0.37598106011586213, "grad_norm": 0.12896743416786194, "learning_rate": 0.0005, "loss": 2.1295, "step": 98780 }, { "epoch": 0.37601912258398484, "grad_norm": 0.12525705993175507, "learning_rate": 0.0005, "loss": 2.1302, "step": 98790 }, { "epoch": 0.3760571850521075, "grad_norm": 0.12810900807380676, "learning_rate": 0.0005, "loss": 2.1176, "step": 98800 }, { "epoch": 0.3760952475202302, "grad_norm": 0.1269560009241104, "learning_rate": 0.0005, "loss": 2.1309, "step": 98810 }, { "epoch": 0.37613330998835287, "grad_norm": 0.12469108402729034, "learning_rate": 0.0005, "loss": 2.111, "step": 98820 }, { "epoch": 0.3761713724564756, "grad_norm": 0.1346503645181656, "learning_rate": 0.0005, "loss": 2.1188, "step": 98830 }, { "epoch": 0.37620943492459824, "grad_norm": 0.11811628192663193, "learning_rate": 0.0005, "loss": 2.1413, "step": 98840 }, { "epoch": 0.37624749739272095, "grad_norm": 0.13591505587100983, "learning_rate": 0.0005, "loss": 2.1126, "step": 98850 }, { "epoch": 0.3762855598608436, "grad_norm": 0.1304771453142166, "learning_rate": 0.0005, "loss": 2.1268, "step": 98860 }, { "epoch": 0.3763236223289663, "grad_norm": 0.15867163240909576, "learning_rate": 0.0005, "loss": 2.1159, "step": 98870 }, { "epoch": 0.376361684797089, "grad_norm": 0.14701250195503235, "learning_rate": 0.0005, "loss": 2.1251, "step": 98880 }, { "epoch": 0.3763997472652117, "grad_norm": 0.12889814376831055, "learning_rate": 0.0005, "loss": 2.1311, "step": 98890 }, { "epoch": 0.37643780973333435, "grad_norm": 0.1383143812417984, "learning_rate": 0.0005, "loss": 2.1125, "step": 98900 }, { "epoch": 0.376475872201457, "grad_norm": 0.1334122270345688, "learning_rate": 0.0005, "loss": 2.138, "step": 98910 }, { "epoch": 0.3765139346695797, "grad_norm": 0.12685805559158325, "learning_rate": 0.0005, "loss": 2.1264, "step": 98920 }, { "epoch": 0.3765519971377024, "grad_norm": 0.11598322540521622, "learning_rate": 0.0005, "loss": 2.1205, "step": 98930 }, { "epoch": 0.3765900596058251, "grad_norm": 0.12357661873102188, "learning_rate": 0.0005, "loss": 2.1197, "step": 98940 }, { "epoch": 0.37662812207394775, "grad_norm": 0.11699126660823822, "learning_rate": 0.0005, "loss": 2.0966, "step": 98950 }, { "epoch": 0.37666618454207046, "grad_norm": 0.13793028891086578, "learning_rate": 0.0005, "loss": 2.131, "step": 98960 }, { "epoch": 0.3767042470101931, "grad_norm": 0.12064649164676666, "learning_rate": 0.0005, "loss": 2.1071, "step": 98970 }, { "epoch": 0.37674230947831583, "grad_norm": 0.12668059766292572, "learning_rate": 0.0005, "loss": 2.1158, "step": 98980 }, { "epoch": 0.3767803719464385, "grad_norm": 0.13221696019172668, "learning_rate": 0.0005, "loss": 2.1384, "step": 98990 }, { "epoch": 0.3768184344145612, "grad_norm": 0.12401723116636276, "learning_rate": 0.0005, "loss": 2.1151, "step": 99000 }, { "epoch": 0.37685649688268386, "grad_norm": 0.1281975507736206, "learning_rate": 0.0005, "loss": 2.1282, "step": 99010 }, { "epoch": 0.37689455935080657, "grad_norm": 0.12351026386022568, "learning_rate": 0.0005, "loss": 2.1222, "step": 99020 }, { "epoch": 0.3769326218189292, "grad_norm": 0.1147201880812645, "learning_rate": 0.0005, "loss": 2.1227, "step": 99030 }, { "epoch": 0.3769706842870519, "grad_norm": 0.11766034364700317, "learning_rate": 0.0005, "loss": 2.1171, "step": 99040 }, { "epoch": 0.3770087467551746, "grad_norm": 0.12974408268928528, "learning_rate": 0.0005, "loss": 2.1222, "step": 99050 }, { "epoch": 0.37704680922329725, "grad_norm": 0.1237754076719284, "learning_rate": 0.0005, "loss": 2.1134, "step": 99060 }, { "epoch": 0.37708487169141996, "grad_norm": 0.13523773849010468, "learning_rate": 0.0005, "loss": 2.1082, "step": 99070 }, { "epoch": 0.3771229341595426, "grad_norm": 0.13876040279865265, "learning_rate": 0.0005, "loss": 2.1121, "step": 99080 }, { "epoch": 0.37716099662766533, "grad_norm": 0.13175301253795624, "learning_rate": 0.0005, "loss": 2.1238, "step": 99090 }, { "epoch": 0.377199059095788, "grad_norm": 0.1185319572687149, "learning_rate": 0.0005, "loss": 2.1273, "step": 99100 }, { "epoch": 0.3772371215639107, "grad_norm": 0.12355204671621323, "learning_rate": 0.0005, "loss": 2.12, "step": 99110 }, { "epoch": 0.37727518403203336, "grad_norm": 0.12566806375980377, "learning_rate": 0.0005, "loss": 2.1248, "step": 99120 }, { "epoch": 0.3773132465001561, "grad_norm": 0.12418641149997711, "learning_rate": 0.0005, "loss": 2.1167, "step": 99130 }, { "epoch": 0.37735130896827873, "grad_norm": 0.14258967339992523, "learning_rate": 0.0005, "loss": 2.1226, "step": 99140 }, { "epoch": 0.37738937143640144, "grad_norm": 0.13339242339134216, "learning_rate": 0.0005, "loss": 2.1152, "step": 99150 }, { "epoch": 0.3774274339045241, "grad_norm": 0.12326198816299438, "learning_rate": 0.0005, "loss": 2.1313, "step": 99160 }, { "epoch": 0.3774654963726468, "grad_norm": 0.13680458068847656, "learning_rate": 0.0005, "loss": 2.1084, "step": 99170 }, { "epoch": 0.37750355884076947, "grad_norm": 0.1330464482307434, "learning_rate": 0.0005, "loss": 2.1037, "step": 99180 }, { "epoch": 0.37754162130889213, "grad_norm": 0.13588985800743103, "learning_rate": 0.0005, "loss": 2.134, "step": 99190 }, { "epoch": 0.37757968377701484, "grad_norm": 0.1276908963918686, "learning_rate": 0.0005, "loss": 2.1266, "step": 99200 }, { "epoch": 0.3776177462451375, "grad_norm": 0.11521900445222855, "learning_rate": 0.0005, "loss": 2.1165, "step": 99210 }, { "epoch": 0.3776558087132602, "grad_norm": 0.12597927451133728, "learning_rate": 0.0005, "loss": 2.123, "step": 99220 }, { "epoch": 0.37769387118138287, "grad_norm": 0.1277197152376175, "learning_rate": 0.0005, "loss": 2.13, "step": 99230 }, { "epoch": 0.3777319336495056, "grad_norm": 0.11337390542030334, "learning_rate": 0.0005, "loss": 2.1199, "step": 99240 }, { "epoch": 0.37776999611762824, "grad_norm": 0.13153591752052307, "learning_rate": 0.0005, "loss": 2.1221, "step": 99250 }, { "epoch": 0.37780805858575095, "grad_norm": 0.12262382358312607, "learning_rate": 0.0005, "loss": 2.1265, "step": 99260 }, { "epoch": 0.3778461210538736, "grad_norm": 0.1202477365732193, "learning_rate": 0.0005, "loss": 2.1365, "step": 99270 }, { "epoch": 0.3778841835219963, "grad_norm": 0.1355462670326233, "learning_rate": 0.0005, "loss": 2.1272, "step": 99280 }, { "epoch": 0.377922245990119, "grad_norm": 0.12229014188051224, "learning_rate": 0.0005, "loss": 2.1226, "step": 99290 }, { "epoch": 0.3779603084582417, "grad_norm": 0.1229465901851654, "learning_rate": 0.0005, "loss": 2.1321, "step": 99300 }, { "epoch": 0.37799837092636435, "grad_norm": 0.11344870924949646, "learning_rate": 0.0005, "loss": 2.1213, "step": 99310 }, { "epoch": 0.37803643339448706, "grad_norm": 0.12172535806894302, "learning_rate": 0.0005, "loss": 2.112, "step": 99320 }, { "epoch": 0.3780744958626097, "grad_norm": 0.1417960375547409, "learning_rate": 0.0005, "loss": 2.1366, "step": 99330 }, { "epoch": 0.3781125583307324, "grad_norm": 0.12265501916408539, "learning_rate": 0.0005, "loss": 2.1299, "step": 99340 }, { "epoch": 0.3781506207988551, "grad_norm": 0.1277245581150055, "learning_rate": 0.0005, "loss": 2.1184, "step": 99350 }, { "epoch": 0.37818868326697774, "grad_norm": 0.12461922317743301, "learning_rate": 0.0005, "loss": 2.1282, "step": 99360 }, { "epoch": 0.37822674573510046, "grad_norm": 0.11946941912174225, "learning_rate": 0.0005, "loss": 2.1297, "step": 99370 }, { "epoch": 0.3782648082032231, "grad_norm": 0.1216985285282135, "learning_rate": 0.0005, "loss": 2.1025, "step": 99380 }, { "epoch": 0.3783028706713458, "grad_norm": 0.14715443551540375, "learning_rate": 0.0005, "loss": 2.1161, "step": 99390 }, { "epoch": 0.3783409331394685, "grad_norm": 0.12096799165010452, "learning_rate": 0.0005, "loss": 2.1244, "step": 99400 }, { "epoch": 0.3783789956075912, "grad_norm": 0.11835294961929321, "learning_rate": 0.0005, "loss": 2.1305, "step": 99410 }, { "epoch": 0.37841705807571385, "grad_norm": 0.13805124163627625, "learning_rate": 0.0005, "loss": 2.1275, "step": 99420 }, { "epoch": 0.37845512054383657, "grad_norm": 0.12678532302379608, "learning_rate": 0.0005, "loss": 2.1135, "step": 99430 }, { "epoch": 0.3784931830119592, "grad_norm": 0.11587246507406235, "learning_rate": 0.0005, "loss": 2.1349, "step": 99440 }, { "epoch": 0.37853124548008193, "grad_norm": 0.11982610076665878, "learning_rate": 0.0005, "loss": 2.1275, "step": 99450 }, { "epoch": 0.3785693079482046, "grad_norm": 0.1394578516483307, "learning_rate": 0.0005, "loss": 2.1305, "step": 99460 }, { "epoch": 0.37860737041632725, "grad_norm": 0.12834946811199188, "learning_rate": 0.0005, "loss": 2.1074, "step": 99470 }, { "epoch": 0.37864543288444996, "grad_norm": 0.12075355648994446, "learning_rate": 0.0005, "loss": 2.1148, "step": 99480 }, { "epoch": 0.3786834953525726, "grad_norm": 0.11674944311380386, "learning_rate": 0.0005, "loss": 2.1259, "step": 99490 }, { "epoch": 0.37872155782069533, "grad_norm": 0.11884240806102753, "learning_rate": 0.0005, "loss": 2.122, "step": 99500 }, { "epoch": 0.378759620288818, "grad_norm": 0.12764117121696472, "learning_rate": 0.0005, "loss": 2.1368, "step": 99510 }, { "epoch": 0.3787976827569407, "grad_norm": 0.12957796454429626, "learning_rate": 0.0005, "loss": 2.1123, "step": 99520 }, { "epoch": 0.37883574522506336, "grad_norm": 0.11994970589876175, "learning_rate": 0.0005, "loss": 2.1211, "step": 99530 }, { "epoch": 0.37887380769318607, "grad_norm": 0.1316346526145935, "learning_rate": 0.0005, "loss": 2.1185, "step": 99540 }, { "epoch": 0.37891187016130873, "grad_norm": 0.1294875144958496, "learning_rate": 0.0005, "loss": 2.1222, "step": 99550 }, { "epoch": 0.37894993262943144, "grad_norm": 0.1248297244310379, "learning_rate": 0.0005, "loss": 2.1226, "step": 99560 }, { "epoch": 0.3789879950975541, "grad_norm": 0.11559437960386276, "learning_rate": 0.0005, "loss": 2.1273, "step": 99570 }, { "epoch": 0.3790260575656768, "grad_norm": 0.12500129640102386, "learning_rate": 0.0005, "loss": 2.1239, "step": 99580 }, { "epoch": 0.37906412003379947, "grad_norm": 0.1193610355257988, "learning_rate": 0.0005, "loss": 2.1237, "step": 99590 }, { "epoch": 0.3791021825019222, "grad_norm": 0.13608145713806152, "learning_rate": 0.0005, "loss": 2.1207, "step": 99600 }, { "epoch": 0.37914024497004484, "grad_norm": 0.12819543480873108, "learning_rate": 0.0005, "loss": 2.1321, "step": 99610 }, { "epoch": 0.3791783074381675, "grad_norm": 0.13131417334079742, "learning_rate": 0.0005, "loss": 2.1401, "step": 99620 }, { "epoch": 0.3792163699062902, "grad_norm": 0.14582496881484985, "learning_rate": 0.0005, "loss": 2.1319, "step": 99630 }, { "epoch": 0.37925443237441286, "grad_norm": 0.12820561230182648, "learning_rate": 0.0005, "loss": 2.1203, "step": 99640 }, { "epoch": 0.3792924948425356, "grad_norm": 0.1195472702383995, "learning_rate": 0.0005, "loss": 2.1339, "step": 99650 }, { "epoch": 0.37933055731065823, "grad_norm": 0.13277654349803925, "learning_rate": 0.0005, "loss": 2.1143, "step": 99660 }, { "epoch": 0.37936861977878095, "grad_norm": 0.10699354112148285, "learning_rate": 0.0005, "loss": 2.1159, "step": 99670 }, { "epoch": 0.3794066822469036, "grad_norm": 0.13318446278572083, "learning_rate": 0.0005, "loss": 2.1216, "step": 99680 }, { "epoch": 0.3794447447150263, "grad_norm": 0.1301574856042862, "learning_rate": 0.0005, "loss": 2.1186, "step": 99690 }, { "epoch": 0.379482807183149, "grad_norm": 0.13273996114730835, "learning_rate": 0.0005, "loss": 2.1093, "step": 99700 }, { "epoch": 0.3795208696512717, "grad_norm": 0.119916170835495, "learning_rate": 0.0005, "loss": 2.1155, "step": 99710 }, { "epoch": 0.37955893211939434, "grad_norm": 0.12175925821065903, "learning_rate": 0.0005, "loss": 2.1211, "step": 99720 }, { "epoch": 0.37959699458751706, "grad_norm": 0.11958014965057373, "learning_rate": 0.0005, "loss": 2.1179, "step": 99730 }, { "epoch": 0.3796350570556397, "grad_norm": 0.12307074666023254, "learning_rate": 0.0005, "loss": 2.1244, "step": 99740 }, { "epoch": 0.3796731195237624, "grad_norm": 0.1192697286605835, "learning_rate": 0.0005, "loss": 2.1139, "step": 99750 }, { "epoch": 0.3797111819918851, "grad_norm": 0.11530807614326477, "learning_rate": 0.0005, "loss": 2.1134, "step": 99760 }, { "epoch": 0.37974924446000774, "grad_norm": 0.12537164986133575, "learning_rate": 0.0005, "loss": 2.1164, "step": 99770 }, { "epoch": 0.37978730692813045, "grad_norm": 0.14167091250419617, "learning_rate": 0.0005, "loss": 2.122, "step": 99780 }, { "epoch": 0.3798253693962531, "grad_norm": 0.13286343216896057, "learning_rate": 0.0005, "loss": 2.1166, "step": 99790 }, { "epoch": 0.3798634318643758, "grad_norm": 0.13088318705558777, "learning_rate": 0.0005, "loss": 2.1169, "step": 99800 }, { "epoch": 0.3799014943324985, "grad_norm": 0.1335020512342453, "learning_rate": 0.0005, "loss": 2.1352, "step": 99810 }, { "epoch": 0.3799395568006212, "grad_norm": 0.12035942822694778, "learning_rate": 0.0005, "loss": 2.121, "step": 99820 }, { "epoch": 0.37997761926874385, "grad_norm": 0.13277862966060638, "learning_rate": 0.0005, "loss": 2.1181, "step": 99830 }, { "epoch": 0.38001568173686656, "grad_norm": 0.12266730517148972, "learning_rate": 0.0005, "loss": 2.1224, "step": 99840 }, { "epoch": 0.3800537442049892, "grad_norm": 0.12493303418159485, "learning_rate": 0.0005, "loss": 2.1432, "step": 99850 }, { "epoch": 0.38009180667311193, "grad_norm": 0.1336122304201126, "learning_rate": 0.0005, "loss": 2.1226, "step": 99860 }, { "epoch": 0.3801298691412346, "grad_norm": 0.12226969748735428, "learning_rate": 0.0005, "loss": 2.1284, "step": 99870 }, { "epoch": 0.3801679316093573, "grad_norm": 0.1311367005109787, "learning_rate": 0.0005, "loss": 2.1167, "step": 99880 }, { "epoch": 0.38020599407747996, "grad_norm": 0.12251698970794678, "learning_rate": 0.0005, "loss": 2.1153, "step": 99890 }, { "epoch": 0.3802440565456026, "grad_norm": 0.1320013850927353, "learning_rate": 0.0005, "loss": 2.1243, "step": 99900 }, { "epoch": 0.38028211901372533, "grad_norm": 0.12274371087551117, "learning_rate": 0.0005, "loss": 2.1251, "step": 99910 }, { "epoch": 0.380320181481848, "grad_norm": 0.12575620412826538, "learning_rate": 0.0005, "loss": 2.1375, "step": 99920 }, { "epoch": 0.3803582439499707, "grad_norm": 0.11021941900253296, "learning_rate": 0.0005, "loss": 2.105, "step": 99930 }, { "epoch": 0.38039630641809336, "grad_norm": 0.1258212924003601, "learning_rate": 0.0005, "loss": 2.1113, "step": 99940 }, { "epoch": 0.38043436888621607, "grad_norm": 0.14333589375019073, "learning_rate": 0.0005, "loss": 2.1167, "step": 99950 }, { "epoch": 0.3804724313543387, "grad_norm": 0.12342032790184021, "learning_rate": 0.0005, "loss": 2.1189, "step": 99960 }, { "epoch": 0.38051049382246144, "grad_norm": 0.12532587349414825, "learning_rate": 0.0005, "loss": 2.1083, "step": 99970 }, { "epoch": 0.3805485562905841, "grad_norm": 0.12661361694335938, "learning_rate": 0.0005, "loss": 2.123, "step": 99980 }, { "epoch": 0.3805866187587068, "grad_norm": 0.13249489665031433, "learning_rate": 0.0005, "loss": 2.1142, "step": 99990 }, { "epoch": 0.38062468122682946, "grad_norm": 0.11751711368560791, "learning_rate": 0.0005, "loss": 2.1032, "step": 100000 }, { "epoch": 0.3806627436949522, "grad_norm": 0.1317417472600937, "learning_rate": 0.0005, "loss": 2.1179, "step": 100010 }, { "epoch": 0.38070080616307483, "grad_norm": 0.1290077269077301, "learning_rate": 0.0005, "loss": 2.1317, "step": 100020 }, { "epoch": 0.38073886863119755, "grad_norm": 0.1366860419511795, "learning_rate": 0.0005, "loss": 2.1314, "step": 100030 }, { "epoch": 0.3807769310993202, "grad_norm": 0.11924261599779129, "learning_rate": 0.0005, "loss": 2.1079, "step": 100040 }, { "epoch": 0.38081499356744286, "grad_norm": 0.11624366044998169, "learning_rate": 0.0005, "loss": 2.1142, "step": 100050 }, { "epoch": 0.3808530560355656, "grad_norm": 0.12848447263240814, "learning_rate": 0.0005, "loss": 2.13, "step": 100060 }, { "epoch": 0.38089111850368823, "grad_norm": 0.13916176557540894, "learning_rate": 0.0005, "loss": 2.1262, "step": 100070 }, { "epoch": 0.38092918097181094, "grad_norm": 0.13948209583759308, "learning_rate": 0.0005, "loss": 2.1292, "step": 100080 }, { "epoch": 0.3809672434399336, "grad_norm": 0.12091422080993652, "learning_rate": 0.0005, "loss": 2.125, "step": 100090 }, { "epoch": 0.3810053059080563, "grad_norm": 0.12870047986507416, "learning_rate": 0.0005, "loss": 2.1174, "step": 100100 }, { "epoch": 0.38104336837617897, "grad_norm": 0.11702585965394974, "learning_rate": 0.0005, "loss": 2.1232, "step": 100110 }, { "epoch": 0.3810814308443017, "grad_norm": 0.1217094287276268, "learning_rate": 0.0005, "loss": 2.1177, "step": 100120 }, { "epoch": 0.38111949331242434, "grad_norm": 0.13027448952198029, "learning_rate": 0.0005, "loss": 2.1168, "step": 100130 }, { "epoch": 0.38115755578054705, "grad_norm": 0.12258381396532059, "learning_rate": 0.0005, "loss": 2.1348, "step": 100140 }, { "epoch": 0.3811956182486697, "grad_norm": 0.12027046829462051, "learning_rate": 0.0005, "loss": 2.128, "step": 100150 }, { "epoch": 0.3812336807167924, "grad_norm": 0.13201120495796204, "learning_rate": 0.0005, "loss": 2.1395, "step": 100160 }, { "epoch": 0.3812717431849151, "grad_norm": 0.12681923806667328, "learning_rate": 0.0005, "loss": 2.1182, "step": 100170 }, { "epoch": 0.3813098056530378, "grad_norm": 0.12848328053951263, "learning_rate": 0.0005, "loss": 2.111, "step": 100180 }, { "epoch": 0.38134786812116045, "grad_norm": 0.12477723509073257, "learning_rate": 0.0005, "loss": 2.1083, "step": 100190 }, { "epoch": 0.3813859305892831, "grad_norm": 0.13087469339370728, "learning_rate": 0.0005, "loss": 2.1137, "step": 100200 }, { "epoch": 0.3814239930574058, "grad_norm": 0.12919747829437256, "learning_rate": 0.0005, "loss": 2.1209, "step": 100210 }, { "epoch": 0.3814620555255285, "grad_norm": 0.12140429764986038, "learning_rate": 0.0005, "loss": 2.1301, "step": 100220 }, { "epoch": 0.3815001179936512, "grad_norm": 0.11911433190107346, "learning_rate": 0.0005, "loss": 2.133, "step": 100230 }, { "epoch": 0.38153818046177385, "grad_norm": 0.12428278475999832, "learning_rate": 0.0005, "loss": 2.1255, "step": 100240 }, { "epoch": 0.38157624292989656, "grad_norm": 0.12575286626815796, "learning_rate": 0.0005, "loss": 2.1173, "step": 100250 }, { "epoch": 0.3816143053980192, "grad_norm": 0.14253956079483032, "learning_rate": 0.0005, "loss": 2.1291, "step": 100260 }, { "epoch": 0.38165236786614193, "grad_norm": 0.11677713692188263, "learning_rate": 0.0005, "loss": 2.135, "step": 100270 }, { "epoch": 0.3816904303342646, "grad_norm": 0.11532783508300781, "learning_rate": 0.0005, "loss": 2.1224, "step": 100280 }, { "epoch": 0.3817284928023873, "grad_norm": 0.12663224339485168, "learning_rate": 0.0005, "loss": 2.1172, "step": 100290 }, { "epoch": 0.38176655527050996, "grad_norm": 0.1268301010131836, "learning_rate": 0.0005, "loss": 2.1242, "step": 100300 }, { "epoch": 0.38180461773863267, "grad_norm": 0.14021086692810059, "learning_rate": 0.0005, "loss": 2.1135, "step": 100310 }, { "epoch": 0.3818426802067553, "grad_norm": 0.1259777545928955, "learning_rate": 0.0005, "loss": 2.1001, "step": 100320 }, { "epoch": 0.381880742674878, "grad_norm": 0.11848200857639313, "learning_rate": 0.0005, "loss": 2.1137, "step": 100330 }, { "epoch": 0.3819188051430007, "grad_norm": 0.12533099949359894, "learning_rate": 0.0005, "loss": 2.1249, "step": 100340 }, { "epoch": 0.38195686761112335, "grad_norm": 0.12691478431224823, "learning_rate": 0.0005, "loss": 2.1191, "step": 100350 }, { "epoch": 0.38199493007924606, "grad_norm": 0.13623826205730438, "learning_rate": 0.0005, "loss": 2.1169, "step": 100360 }, { "epoch": 0.3820329925473687, "grad_norm": 0.11483057588338852, "learning_rate": 0.0005, "loss": 2.1156, "step": 100370 }, { "epoch": 0.38207105501549143, "grad_norm": 0.12684819102287292, "learning_rate": 0.0005, "loss": 2.1215, "step": 100380 }, { "epoch": 0.3821091174836141, "grad_norm": 0.11294738203287125, "learning_rate": 0.0005, "loss": 2.1094, "step": 100390 }, { "epoch": 0.3821471799517368, "grad_norm": 0.12915776669979095, "learning_rate": 0.0005, "loss": 2.1258, "step": 100400 }, { "epoch": 0.38218524241985946, "grad_norm": 0.11914768069982529, "learning_rate": 0.0005, "loss": 2.1351, "step": 100410 }, { "epoch": 0.3822233048879822, "grad_norm": 0.11203866451978683, "learning_rate": 0.0005, "loss": 2.1135, "step": 100420 }, { "epoch": 0.38226136735610483, "grad_norm": 0.1280486285686493, "learning_rate": 0.0005, "loss": 2.1214, "step": 100430 }, { "epoch": 0.38229942982422754, "grad_norm": 0.12039019912481308, "learning_rate": 0.0005, "loss": 2.1349, "step": 100440 }, { "epoch": 0.3823374922923502, "grad_norm": 0.12762552499771118, "learning_rate": 0.0005, "loss": 2.1267, "step": 100450 }, { "epoch": 0.3823755547604729, "grad_norm": 0.1186232715845108, "learning_rate": 0.0005, "loss": 2.1308, "step": 100460 }, { "epoch": 0.38241361722859557, "grad_norm": 0.12590287625789642, "learning_rate": 0.0005, "loss": 2.1358, "step": 100470 }, { "epoch": 0.3824516796967182, "grad_norm": 0.1216677650809288, "learning_rate": 0.0005, "loss": 2.1066, "step": 100480 }, { "epoch": 0.38248974216484094, "grad_norm": 0.12391503155231476, "learning_rate": 0.0005, "loss": 2.0988, "step": 100490 }, { "epoch": 0.3825278046329636, "grad_norm": 0.12275954335927963, "learning_rate": 0.0005, "loss": 2.1163, "step": 100500 }, { "epoch": 0.3825658671010863, "grad_norm": 0.12779061496257782, "learning_rate": 0.0005, "loss": 2.1206, "step": 100510 }, { "epoch": 0.38260392956920897, "grad_norm": 0.12359108030796051, "learning_rate": 0.0005, "loss": 2.1116, "step": 100520 }, { "epoch": 0.3826419920373317, "grad_norm": 0.13737986981868744, "learning_rate": 0.0005, "loss": 2.1049, "step": 100530 }, { "epoch": 0.38268005450545434, "grad_norm": 0.12026502937078476, "learning_rate": 0.0005, "loss": 2.1104, "step": 100540 }, { "epoch": 0.38271811697357705, "grad_norm": 0.13088242709636688, "learning_rate": 0.0005, "loss": 2.1218, "step": 100550 }, { "epoch": 0.3827561794416997, "grad_norm": 0.1281553953886032, "learning_rate": 0.0005, "loss": 2.1104, "step": 100560 }, { "epoch": 0.3827942419098224, "grad_norm": 0.1370796263217926, "learning_rate": 0.0005, "loss": 2.1127, "step": 100570 }, { "epoch": 0.3828323043779451, "grad_norm": 0.12698887288570404, "learning_rate": 0.0005, "loss": 2.1368, "step": 100580 }, { "epoch": 0.3828703668460678, "grad_norm": 0.13594062626361847, "learning_rate": 0.0005, "loss": 2.1149, "step": 100590 }, { "epoch": 0.38290842931419045, "grad_norm": 0.1371491253376007, "learning_rate": 0.0005, "loss": 2.134, "step": 100600 }, { "epoch": 0.38294649178231316, "grad_norm": 0.11673377454280853, "learning_rate": 0.0005, "loss": 2.1132, "step": 100610 }, { "epoch": 0.3829845542504358, "grad_norm": 0.12888579070568085, "learning_rate": 0.0005, "loss": 2.1265, "step": 100620 }, { "epoch": 0.3830226167185585, "grad_norm": 0.11862296611070633, "learning_rate": 0.0005, "loss": 2.1192, "step": 100630 }, { "epoch": 0.3830606791866812, "grad_norm": 0.1351194977760315, "learning_rate": 0.0005, "loss": 2.1073, "step": 100640 }, { "epoch": 0.38309874165480384, "grad_norm": 0.12605620920658112, "learning_rate": 0.0005, "loss": 2.1146, "step": 100650 }, { "epoch": 0.38313680412292656, "grad_norm": 0.13026510179042816, "learning_rate": 0.0005, "loss": 2.1176, "step": 100660 }, { "epoch": 0.3831748665910492, "grad_norm": 0.13861605525016785, "learning_rate": 0.0005, "loss": 2.1291, "step": 100670 }, { "epoch": 0.3832129290591719, "grad_norm": 0.40944308042526245, "learning_rate": 0.0005, "loss": 2.1235, "step": 100680 }, { "epoch": 0.3832509915272946, "grad_norm": 0.12580206990242004, "learning_rate": 0.0005, "loss": 2.137, "step": 100690 }, { "epoch": 0.3832890539954173, "grad_norm": 0.13019052147865295, "learning_rate": 0.0005, "loss": 2.1131, "step": 100700 }, { "epoch": 0.38332711646353995, "grad_norm": 0.13055548071861267, "learning_rate": 0.0005, "loss": 2.11, "step": 100710 }, { "epoch": 0.38336517893166266, "grad_norm": 0.13237163424491882, "learning_rate": 0.0005, "loss": 2.1199, "step": 100720 }, { "epoch": 0.3834032413997853, "grad_norm": 0.1250726729631424, "learning_rate": 0.0005, "loss": 2.1146, "step": 100730 }, { "epoch": 0.38344130386790803, "grad_norm": 0.13131138682365417, "learning_rate": 0.0005, "loss": 2.1231, "step": 100740 }, { "epoch": 0.3834793663360307, "grad_norm": 0.13097605109214783, "learning_rate": 0.0005, "loss": 2.1198, "step": 100750 }, { "epoch": 0.3835174288041534, "grad_norm": 0.12748023867607117, "learning_rate": 0.0005, "loss": 2.1371, "step": 100760 }, { "epoch": 0.38355549127227606, "grad_norm": 0.13089165091514587, "learning_rate": 0.0005, "loss": 2.1335, "step": 100770 }, { "epoch": 0.3835935537403987, "grad_norm": 0.11884886026382446, "learning_rate": 0.0005, "loss": 2.1186, "step": 100780 }, { "epoch": 0.38363161620852143, "grad_norm": 0.1299116313457489, "learning_rate": 0.0005, "loss": 2.116, "step": 100790 }, { "epoch": 0.3836696786766441, "grad_norm": 0.12401427328586578, "learning_rate": 0.0005, "loss": 2.1131, "step": 100800 }, { "epoch": 0.3837077411447668, "grad_norm": 0.12213096767663956, "learning_rate": 0.0005, "loss": 2.1214, "step": 100810 }, { "epoch": 0.38374580361288946, "grad_norm": 0.13122443854808807, "learning_rate": 0.0005, "loss": 2.1243, "step": 100820 }, { "epoch": 0.38378386608101217, "grad_norm": 0.1398945450782776, "learning_rate": 0.0005, "loss": 2.1111, "step": 100830 }, { "epoch": 0.3838219285491348, "grad_norm": 0.11701014637947083, "learning_rate": 0.0005, "loss": 2.1188, "step": 100840 }, { "epoch": 0.38385999101725754, "grad_norm": 0.12747806310653687, "learning_rate": 0.0005, "loss": 2.1254, "step": 100850 }, { "epoch": 0.3838980534853802, "grad_norm": 0.13189804553985596, "learning_rate": 0.0005, "loss": 2.1294, "step": 100860 }, { "epoch": 0.3839361159535029, "grad_norm": 0.12226402014493942, "learning_rate": 0.0005, "loss": 2.1349, "step": 100870 }, { "epoch": 0.38397417842162557, "grad_norm": 0.11916231364011765, "learning_rate": 0.0005, "loss": 2.1212, "step": 100880 }, { "epoch": 0.3840122408897483, "grad_norm": 0.12730424106121063, "learning_rate": 0.0005, "loss": 2.1088, "step": 100890 }, { "epoch": 0.38405030335787094, "grad_norm": 0.13549363613128662, "learning_rate": 0.0005, "loss": 2.1149, "step": 100900 }, { "epoch": 0.3840883658259936, "grad_norm": 0.13349735736846924, "learning_rate": 0.0005, "loss": 2.1487, "step": 100910 }, { "epoch": 0.3841264282941163, "grad_norm": 0.14545609056949615, "learning_rate": 0.0005, "loss": 2.13, "step": 100920 }, { "epoch": 0.38416449076223896, "grad_norm": 0.12006688117980957, "learning_rate": 0.0005, "loss": 2.1192, "step": 100930 }, { "epoch": 0.3842025532303617, "grad_norm": 0.1416459083557129, "learning_rate": 0.0005, "loss": 2.1142, "step": 100940 }, { "epoch": 0.38424061569848433, "grad_norm": 0.1135077103972435, "learning_rate": 0.0005, "loss": 2.1309, "step": 100950 }, { "epoch": 0.38427867816660705, "grad_norm": 0.12496229261159897, "learning_rate": 0.0005, "loss": 2.1081, "step": 100960 }, { "epoch": 0.3843167406347297, "grad_norm": 0.11518893390893936, "learning_rate": 0.0005, "loss": 2.1366, "step": 100970 }, { "epoch": 0.3843548031028524, "grad_norm": 0.13658462464809418, "learning_rate": 0.0005, "loss": 2.1235, "step": 100980 }, { "epoch": 0.3843928655709751, "grad_norm": 0.13168896734714508, "learning_rate": 0.0005, "loss": 2.1303, "step": 100990 }, { "epoch": 0.3844309280390978, "grad_norm": 0.11835760623216629, "learning_rate": 0.0005, "loss": 2.1294, "step": 101000 }, { "epoch": 0.38446899050722044, "grad_norm": 0.13231638073921204, "learning_rate": 0.0005, "loss": 2.1188, "step": 101010 }, { "epoch": 0.38450705297534316, "grad_norm": 0.1255374252796173, "learning_rate": 0.0005, "loss": 2.1207, "step": 101020 }, { "epoch": 0.3845451154434658, "grad_norm": 0.1154334619641304, "learning_rate": 0.0005, "loss": 2.1269, "step": 101030 }, { "epoch": 0.3845831779115885, "grad_norm": 0.12606540322303772, "learning_rate": 0.0005, "loss": 2.1256, "step": 101040 }, { "epoch": 0.3846212403797112, "grad_norm": 0.12749677896499634, "learning_rate": 0.0005, "loss": 2.1336, "step": 101050 }, { "epoch": 0.38465930284783384, "grad_norm": 0.12091683596372604, "learning_rate": 0.0005, "loss": 2.1177, "step": 101060 }, { "epoch": 0.38469736531595655, "grad_norm": 0.12428902834653854, "learning_rate": 0.0005, "loss": 2.1271, "step": 101070 }, { "epoch": 0.3847354277840792, "grad_norm": 0.12779396772384644, "learning_rate": 0.0005, "loss": 2.117, "step": 101080 }, { "epoch": 0.3847734902522019, "grad_norm": 0.12357478588819504, "learning_rate": 0.0005, "loss": 2.1058, "step": 101090 }, { "epoch": 0.3848115527203246, "grad_norm": 0.1376105099916458, "learning_rate": 0.0005, "loss": 2.1008, "step": 101100 }, { "epoch": 0.3848496151884473, "grad_norm": 0.11973535269498825, "learning_rate": 0.0005, "loss": 2.1264, "step": 101110 }, { "epoch": 0.38488767765656995, "grad_norm": 0.1322673112154007, "learning_rate": 0.0005, "loss": 2.1166, "step": 101120 }, { "epoch": 0.38492574012469266, "grad_norm": 0.1237492710351944, "learning_rate": 0.0005, "loss": 2.1137, "step": 101130 }, { "epoch": 0.3849638025928153, "grad_norm": 0.12245187908411026, "learning_rate": 0.0005, "loss": 2.1093, "step": 101140 }, { "epoch": 0.38500186506093803, "grad_norm": 0.12443853914737701, "learning_rate": 0.0005, "loss": 2.1188, "step": 101150 }, { "epoch": 0.3850399275290607, "grad_norm": 0.11383303999900818, "learning_rate": 0.0005, "loss": 2.1267, "step": 101160 }, { "epoch": 0.3850779899971834, "grad_norm": 0.12312997877597809, "learning_rate": 0.0005, "loss": 2.1085, "step": 101170 }, { "epoch": 0.38511605246530606, "grad_norm": 0.12528856098651886, "learning_rate": 0.0005, "loss": 2.1202, "step": 101180 }, { "epoch": 0.38515411493342877, "grad_norm": 0.12684626877307892, "learning_rate": 0.0005, "loss": 2.1259, "step": 101190 }, { "epoch": 0.3851921774015514, "grad_norm": 0.12324848026037216, "learning_rate": 0.0005, "loss": 2.1047, "step": 101200 }, { "epoch": 0.3852302398696741, "grad_norm": 0.1210331991314888, "learning_rate": 0.0005, "loss": 2.1167, "step": 101210 }, { "epoch": 0.3852683023377968, "grad_norm": 0.11422744393348694, "learning_rate": 0.0005, "loss": 2.1149, "step": 101220 }, { "epoch": 0.38530636480591945, "grad_norm": 0.13483406603336334, "learning_rate": 0.0005, "loss": 2.1301, "step": 101230 }, { "epoch": 0.38534442727404217, "grad_norm": 0.1397322416305542, "learning_rate": 0.0005, "loss": 2.1157, "step": 101240 }, { "epoch": 0.3853824897421648, "grad_norm": 0.11989077180624008, "learning_rate": 0.0005, "loss": 2.1231, "step": 101250 }, { "epoch": 0.38542055221028754, "grad_norm": 0.13294818997383118, "learning_rate": 0.0005, "loss": 2.1182, "step": 101260 }, { "epoch": 0.3854586146784102, "grad_norm": 0.1253851354122162, "learning_rate": 0.0005, "loss": 2.1158, "step": 101270 }, { "epoch": 0.3854966771465329, "grad_norm": 0.12310951203107834, "learning_rate": 0.0005, "loss": 2.1113, "step": 101280 }, { "epoch": 0.38553473961465556, "grad_norm": 0.13452036678791046, "learning_rate": 0.0005, "loss": 2.1178, "step": 101290 }, { "epoch": 0.3855728020827783, "grad_norm": 0.13898633420467377, "learning_rate": 0.0005, "loss": 2.1132, "step": 101300 }, { "epoch": 0.38561086455090093, "grad_norm": 0.3918830156326294, "learning_rate": 0.0005, "loss": 2.1197, "step": 101310 }, { "epoch": 0.38564892701902365, "grad_norm": 0.11546818912029266, "learning_rate": 0.0005, "loss": 2.1165, "step": 101320 }, { "epoch": 0.3856869894871463, "grad_norm": 0.128716379404068, "learning_rate": 0.0005, "loss": 2.1399, "step": 101330 }, { "epoch": 0.38572505195526896, "grad_norm": 0.12486767768859863, "learning_rate": 0.0005, "loss": 2.1082, "step": 101340 }, { "epoch": 0.3857631144233917, "grad_norm": 0.12809643149375916, "learning_rate": 0.0005, "loss": 2.1237, "step": 101350 }, { "epoch": 0.38580117689151433, "grad_norm": 0.12099676579236984, "learning_rate": 0.0005, "loss": 2.1192, "step": 101360 }, { "epoch": 0.38583923935963704, "grad_norm": 0.13036486506462097, "learning_rate": 0.0005, "loss": 2.1217, "step": 101370 }, { "epoch": 0.3858773018277597, "grad_norm": 0.13107898831367493, "learning_rate": 0.0005, "loss": 2.1111, "step": 101380 }, { "epoch": 0.3859153642958824, "grad_norm": 0.12261070311069489, "learning_rate": 0.0005, "loss": 2.1157, "step": 101390 }, { "epoch": 0.38595342676400507, "grad_norm": 0.12817129492759705, "learning_rate": 0.0005, "loss": 2.1242, "step": 101400 }, { "epoch": 0.3859914892321278, "grad_norm": 0.12391543388366699, "learning_rate": 0.0005, "loss": 2.1271, "step": 101410 }, { "epoch": 0.38602955170025044, "grad_norm": 0.11720357090234756, "learning_rate": 0.0005, "loss": 2.1151, "step": 101420 }, { "epoch": 0.38606761416837315, "grad_norm": 0.12664619088172913, "learning_rate": 0.0005, "loss": 2.1115, "step": 101430 }, { "epoch": 0.3861056766364958, "grad_norm": 0.12041633576154709, "learning_rate": 0.0005, "loss": 2.1131, "step": 101440 }, { "epoch": 0.3861437391046185, "grad_norm": 0.12451664358377457, "learning_rate": 0.0005, "loss": 2.1101, "step": 101450 }, { "epoch": 0.3861818015727412, "grad_norm": 0.11611100286245346, "learning_rate": 0.0005, "loss": 2.1051, "step": 101460 }, { "epoch": 0.3862198640408639, "grad_norm": 0.1349475085735321, "learning_rate": 0.0005, "loss": 2.1124, "step": 101470 }, { "epoch": 0.38625792650898655, "grad_norm": 0.12914234399795532, "learning_rate": 0.0005, "loss": 2.133, "step": 101480 }, { "epoch": 0.3862959889771092, "grad_norm": 0.1278100609779358, "learning_rate": 0.0005, "loss": 2.1318, "step": 101490 }, { "epoch": 0.3863340514452319, "grad_norm": 0.12993694841861725, "learning_rate": 0.0005, "loss": 2.1199, "step": 101500 }, { "epoch": 0.3863721139133546, "grad_norm": 0.1148102730512619, "learning_rate": 0.0005, "loss": 2.1277, "step": 101510 }, { "epoch": 0.3864101763814773, "grad_norm": 0.1425301879644394, "learning_rate": 0.0005, "loss": 2.135, "step": 101520 }, { "epoch": 0.38644823884959995, "grad_norm": 0.12484724074602127, "learning_rate": 0.0005, "loss": 2.1294, "step": 101530 }, { "epoch": 0.38648630131772266, "grad_norm": 0.12289290130138397, "learning_rate": 0.0005, "loss": 2.1305, "step": 101540 }, { "epoch": 0.3865243637858453, "grad_norm": 0.11314371973276138, "learning_rate": 0.0005, "loss": 2.12, "step": 101550 }, { "epoch": 0.38656242625396803, "grad_norm": 0.12221511453390121, "learning_rate": 0.0005, "loss": 2.1265, "step": 101560 }, { "epoch": 0.3866004887220907, "grad_norm": 0.15185439586639404, "learning_rate": 0.0005, "loss": 2.1294, "step": 101570 }, { "epoch": 0.3866385511902134, "grad_norm": 0.16293543577194214, "learning_rate": 0.0005, "loss": 2.1078, "step": 101580 }, { "epoch": 0.38667661365833605, "grad_norm": 0.13466903567314148, "learning_rate": 0.0005, "loss": 2.1246, "step": 101590 }, { "epoch": 0.38671467612645877, "grad_norm": 0.12571501731872559, "learning_rate": 0.0005, "loss": 2.1298, "step": 101600 }, { "epoch": 0.3867527385945814, "grad_norm": 0.11617813259363174, "learning_rate": 0.0005, "loss": 2.1335, "step": 101610 }, { "epoch": 0.38679080106270414, "grad_norm": 0.11796658486127853, "learning_rate": 0.0005, "loss": 2.112, "step": 101620 }, { "epoch": 0.3868288635308268, "grad_norm": 0.1331198364496231, "learning_rate": 0.0005, "loss": 2.129, "step": 101630 }, { "epoch": 0.38686692599894945, "grad_norm": 0.12835271656513214, "learning_rate": 0.0005, "loss": 2.123, "step": 101640 }, { "epoch": 0.38690498846707216, "grad_norm": 0.11672591418027878, "learning_rate": 0.0005, "loss": 2.1096, "step": 101650 }, { "epoch": 0.3869430509351948, "grad_norm": 0.13207651674747467, "learning_rate": 0.0005, "loss": 2.103, "step": 101660 }, { "epoch": 0.38698111340331753, "grad_norm": 0.12128318846225739, "learning_rate": 0.0005, "loss": 2.1344, "step": 101670 }, { "epoch": 0.3870191758714402, "grad_norm": 0.12716425955295563, "learning_rate": 0.0005, "loss": 2.1278, "step": 101680 }, { "epoch": 0.3870572383395629, "grad_norm": 0.12909677624702454, "learning_rate": 0.0005, "loss": 2.1153, "step": 101690 }, { "epoch": 0.38709530080768556, "grad_norm": 0.13584424555301666, "learning_rate": 0.0005, "loss": 2.1307, "step": 101700 }, { "epoch": 0.3871333632758083, "grad_norm": 0.12449723482131958, "learning_rate": 0.0005, "loss": 2.1139, "step": 101710 }, { "epoch": 0.38717142574393093, "grad_norm": 0.13110417127609253, "learning_rate": 0.0005, "loss": 2.1237, "step": 101720 }, { "epoch": 0.38720948821205364, "grad_norm": 0.1298656314611435, "learning_rate": 0.0005, "loss": 2.1143, "step": 101730 }, { "epoch": 0.3872475506801763, "grad_norm": 0.11582604050636292, "learning_rate": 0.0005, "loss": 2.1053, "step": 101740 }, { "epoch": 0.387285613148299, "grad_norm": 0.12446392327547073, "learning_rate": 0.0005, "loss": 2.1234, "step": 101750 }, { "epoch": 0.38732367561642167, "grad_norm": 0.1227712333202362, "learning_rate": 0.0005, "loss": 2.1253, "step": 101760 }, { "epoch": 0.3873617380845443, "grad_norm": 0.14874985814094543, "learning_rate": 0.0005, "loss": 2.1057, "step": 101770 }, { "epoch": 0.38739980055266704, "grad_norm": 0.12331117689609528, "learning_rate": 0.0005, "loss": 2.1118, "step": 101780 }, { "epoch": 0.3874378630207897, "grad_norm": 0.11972736567258835, "learning_rate": 0.0005, "loss": 2.1181, "step": 101790 }, { "epoch": 0.3874759254889124, "grad_norm": 0.11666350811719894, "learning_rate": 0.0005, "loss": 2.1154, "step": 101800 }, { "epoch": 0.38751398795703507, "grad_norm": 0.12108370661735535, "learning_rate": 0.0005, "loss": 2.1177, "step": 101810 }, { "epoch": 0.3875520504251578, "grad_norm": 0.13469970226287842, "learning_rate": 0.0005, "loss": 2.1003, "step": 101820 }, { "epoch": 0.38759011289328044, "grad_norm": 0.11954975873231888, "learning_rate": 0.0005, "loss": 2.1237, "step": 101830 }, { "epoch": 0.38762817536140315, "grad_norm": 0.11528520286083221, "learning_rate": 0.0005, "loss": 2.1326, "step": 101840 }, { "epoch": 0.3876662378295258, "grad_norm": 0.126707524061203, "learning_rate": 0.0005, "loss": 2.1073, "step": 101850 }, { "epoch": 0.3877043002976485, "grad_norm": 0.12829038500785828, "learning_rate": 0.0005, "loss": 2.1264, "step": 101860 }, { "epoch": 0.3877423627657712, "grad_norm": 0.13470347225666046, "learning_rate": 0.0005, "loss": 2.1208, "step": 101870 }, { "epoch": 0.3877804252338939, "grad_norm": 0.13755831122398376, "learning_rate": 0.0005, "loss": 2.1343, "step": 101880 }, { "epoch": 0.38781848770201655, "grad_norm": 0.12866328656673431, "learning_rate": 0.0005, "loss": 2.118, "step": 101890 }, { "epoch": 0.38785655017013926, "grad_norm": 0.12940312922000885, "learning_rate": 0.0005, "loss": 2.1211, "step": 101900 }, { "epoch": 0.3878946126382619, "grad_norm": 0.13062919676303864, "learning_rate": 0.0005, "loss": 2.1118, "step": 101910 }, { "epoch": 0.3879326751063846, "grad_norm": 0.14013129472732544, "learning_rate": 0.0005, "loss": 2.1238, "step": 101920 }, { "epoch": 0.3879707375745073, "grad_norm": 0.13211211562156677, "learning_rate": 0.0005, "loss": 2.1202, "step": 101930 }, { "epoch": 0.38800880004262994, "grad_norm": 0.1241949051618576, "learning_rate": 0.0005, "loss": 2.1165, "step": 101940 }, { "epoch": 0.38804686251075265, "grad_norm": 0.11884935945272446, "learning_rate": 0.0005, "loss": 2.1236, "step": 101950 }, { "epoch": 0.3880849249788753, "grad_norm": 0.12933066487312317, "learning_rate": 0.0005, "loss": 2.1111, "step": 101960 }, { "epoch": 0.388122987446998, "grad_norm": 0.12881408631801605, "learning_rate": 0.0005, "loss": 2.1209, "step": 101970 }, { "epoch": 0.3881610499151207, "grad_norm": 0.12620744109153748, "learning_rate": 0.0005, "loss": 2.1187, "step": 101980 }, { "epoch": 0.3881991123832434, "grad_norm": 0.1269460916519165, "learning_rate": 0.0005, "loss": 2.1175, "step": 101990 }, { "epoch": 0.38823717485136605, "grad_norm": 0.12020882219076157, "learning_rate": 0.0005, "loss": 2.1055, "step": 102000 }, { "epoch": 0.38827523731948876, "grad_norm": 0.11622332036495209, "learning_rate": 0.0005, "loss": 2.1059, "step": 102010 }, { "epoch": 0.3883132997876114, "grad_norm": 0.11607720702886581, "learning_rate": 0.0005, "loss": 2.1164, "step": 102020 }, { "epoch": 0.38835136225573413, "grad_norm": 0.11501598358154297, "learning_rate": 0.0005, "loss": 2.1036, "step": 102030 }, { "epoch": 0.3883894247238568, "grad_norm": 0.11709321290254593, "learning_rate": 0.0005, "loss": 2.1057, "step": 102040 }, { "epoch": 0.3884274871919795, "grad_norm": 0.12677747011184692, "learning_rate": 0.0005, "loss": 2.127, "step": 102050 }, { "epoch": 0.38846554966010216, "grad_norm": 0.12956194579601288, "learning_rate": 0.0005, "loss": 2.1196, "step": 102060 }, { "epoch": 0.3885036121282248, "grad_norm": 0.17184223234653473, "learning_rate": 0.0005, "loss": 2.118, "step": 102070 }, { "epoch": 0.38854167459634753, "grad_norm": 0.11873897910118103, "learning_rate": 0.0005, "loss": 2.1244, "step": 102080 }, { "epoch": 0.3885797370644702, "grad_norm": 0.13604381680488586, "learning_rate": 0.0005, "loss": 2.1108, "step": 102090 }, { "epoch": 0.3886177995325929, "grad_norm": 0.1449870616197586, "learning_rate": 0.0005, "loss": 2.1358, "step": 102100 }, { "epoch": 0.38865586200071556, "grad_norm": 0.13565057516098022, "learning_rate": 0.0005, "loss": 2.1244, "step": 102110 }, { "epoch": 0.38869392446883827, "grad_norm": 0.134196937084198, "learning_rate": 0.0005, "loss": 2.1295, "step": 102120 }, { "epoch": 0.3887319869369609, "grad_norm": 0.13982726633548737, "learning_rate": 0.0005, "loss": 2.1074, "step": 102130 }, { "epoch": 0.38877004940508364, "grad_norm": 0.11478301882743835, "learning_rate": 0.0005, "loss": 2.1147, "step": 102140 }, { "epoch": 0.3888081118732063, "grad_norm": 0.111292764544487, "learning_rate": 0.0005, "loss": 2.1219, "step": 102150 }, { "epoch": 0.388846174341329, "grad_norm": 0.12361454218626022, "learning_rate": 0.0005, "loss": 2.1315, "step": 102160 }, { "epoch": 0.38888423680945167, "grad_norm": 0.11410224437713623, "learning_rate": 0.0005, "loss": 2.1202, "step": 102170 }, { "epoch": 0.3889222992775744, "grad_norm": 0.1234268844127655, "learning_rate": 0.0005, "loss": 2.1338, "step": 102180 }, { "epoch": 0.38896036174569704, "grad_norm": 0.11605652421712875, "learning_rate": 0.0005, "loss": 2.1169, "step": 102190 }, { "epoch": 0.3889984242138197, "grad_norm": 0.13466107845306396, "learning_rate": 0.0005, "loss": 2.1213, "step": 102200 }, { "epoch": 0.3890364866819424, "grad_norm": 0.1219257116317749, "learning_rate": 0.0005, "loss": 2.0981, "step": 102210 }, { "epoch": 0.38907454915006506, "grad_norm": 0.13218198716640472, "learning_rate": 0.0005, "loss": 2.1092, "step": 102220 }, { "epoch": 0.3891126116181878, "grad_norm": 0.13117769360542297, "learning_rate": 0.0005, "loss": 2.1233, "step": 102230 }, { "epoch": 0.38915067408631043, "grad_norm": 0.13620539009571075, "learning_rate": 0.0005, "loss": 2.1231, "step": 102240 }, { "epoch": 0.38918873655443315, "grad_norm": 0.11436023563146591, "learning_rate": 0.0005, "loss": 2.1091, "step": 102250 }, { "epoch": 0.3892267990225558, "grad_norm": 0.12375251203775406, "learning_rate": 0.0005, "loss": 2.1276, "step": 102260 }, { "epoch": 0.3892648614906785, "grad_norm": 0.13119354844093323, "learning_rate": 0.0005, "loss": 2.1377, "step": 102270 }, { "epoch": 0.3893029239588012, "grad_norm": 0.1198691576719284, "learning_rate": 0.0005, "loss": 2.1323, "step": 102280 }, { "epoch": 0.3893409864269239, "grad_norm": 0.12678927183151245, "learning_rate": 0.0005, "loss": 2.1237, "step": 102290 }, { "epoch": 0.38937904889504654, "grad_norm": 0.12140702456235886, "learning_rate": 0.0005, "loss": 2.1212, "step": 102300 }, { "epoch": 0.38941711136316925, "grad_norm": 0.1271730661392212, "learning_rate": 0.0005, "loss": 2.1166, "step": 102310 }, { "epoch": 0.3894551738312919, "grad_norm": 0.1230878159403801, "learning_rate": 0.0005, "loss": 2.1303, "step": 102320 }, { "epoch": 0.3894932362994146, "grad_norm": 0.12364904582500458, "learning_rate": 0.0005, "loss": 2.1088, "step": 102330 }, { "epoch": 0.3895312987675373, "grad_norm": 0.12749449908733368, "learning_rate": 0.0005, "loss": 2.1262, "step": 102340 }, { "epoch": 0.38956936123565994, "grad_norm": 0.1394471675157547, "learning_rate": 0.0005, "loss": 2.113, "step": 102350 }, { "epoch": 0.38960742370378265, "grad_norm": 0.1296152025461197, "learning_rate": 0.0005, "loss": 2.1172, "step": 102360 }, { "epoch": 0.3896454861719053, "grad_norm": 0.11513220518827438, "learning_rate": 0.0005, "loss": 2.1292, "step": 102370 }, { "epoch": 0.389683548640028, "grad_norm": 0.12122727185487747, "learning_rate": 0.0005, "loss": 2.1172, "step": 102380 }, { "epoch": 0.3897216111081507, "grad_norm": 0.11633864790201187, "learning_rate": 0.0005, "loss": 2.1279, "step": 102390 }, { "epoch": 0.3897596735762734, "grad_norm": 0.12338420748710632, "learning_rate": 0.0005, "loss": 2.1185, "step": 102400 }, { "epoch": 0.38979773604439605, "grad_norm": 0.1302909106016159, "learning_rate": 0.0005, "loss": 2.1072, "step": 102410 }, { "epoch": 0.38983579851251876, "grad_norm": 0.12100611627101898, "learning_rate": 0.0005, "loss": 2.1251, "step": 102420 }, { "epoch": 0.3898738609806414, "grad_norm": 0.11967547237873077, "learning_rate": 0.0005, "loss": 2.1235, "step": 102430 }, { "epoch": 0.38991192344876413, "grad_norm": 0.1297510713338852, "learning_rate": 0.0005, "loss": 2.1206, "step": 102440 }, { "epoch": 0.3899499859168868, "grad_norm": 0.13035336136817932, "learning_rate": 0.0005, "loss": 2.115, "step": 102450 }, { "epoch": 0.3899880483850095, "grad_norm": 0.13591642677783966, "learning_rate": 0.0005, "loss": 2.1317, "step": 102460 }, { "epoch": 0.39002611085313216, "grad_norm": 0.12677662074565887, "learning_rate": 0.0005, "loss": 2.1313, "step": 102470 }, { "epoch": 0.39006417332125487, "grad_norm": 0.11698787659406662, "learning_rate": 0.0005, "loss": 2.1133, "step": 102480 }, { "epoch": 0.3901022357893775, "grad_norm": 0.11306315660476685, "learning_rate": 0.0005, "loss": 2.1279, "step": 102490 }, { "epoch": 0.3901402982575002, "grad_norm": 0.11850694566965103, "learning_rate": 0.0005, "loss": 2.1253, "step": 102500 }, { "epoch": 0.3901783607256229, "grad_norm": 0.13537517189979553, "learning_rate": 0.0005, "loss": 2.1217, "step": 102510 }, { "epoch": 0.39021642319374555, "grad_norm": 0.13141389191150665, "learning_rate": 0.0005, "loss": 2.1294, "step": 102520 }, { "epoch": 0.39025448566186827, "grad_norm": 0.1263415813446045, "learning_rate": 0.0005, "loss": 2.1198, "step": 102530 }, { "epoch": 0.3902925481299909, "grad_norm": 0.12739363312721252, "learning_rate": 0.0005, "loss": 2.123, "step": 102540 }, { "epoch": 0.39033061059811364, "grad_norm": 0.12390932440757751, "learning_rate": 0.0005, "loss": 2.1129, "step": 102550 }, { "epoch": 0.3903686730662363, "grad_norm": 0.1357351541519165, "learning_rate": 0.0005, "loss": 2.1161, "step": 102560 }, { "epoch": 0.390406735534359, "grad_norm": 0.13923802971839905, "learning_rate": 0.0005, "loss": 2.1225, "step": 102570 }, { "epoch": 0.39044479800248166, "grad_norm": 0.13165980577468872, "learning_rate": 0.0005, "loss": 2.1218, "step": 102580 }, { "epoch": 0.3904828604706044, "grad_norm": 0.12961310148239136, "learning_rate": 0.0005, "loss": 2.1105, "step": 102590 }, { "epoch": 0.39052092293872703, "grad_norm": 0.1298636794090271, "learning_rate": 0.0005, "loss": 2.1188, "step": 102600 }, { "epoch": 0.39055898540684975, "grad_norm": 0.12369903922080994, "learning_rate": 0.0005, "loss": 2.1341, "step": 102610 }, { "epoch": 0.3905970478749724, "grad_norm": 0.12848657369613647, "learning_rate": 0.0005, "loss": 2.1295, "step": 102620 }, { "epoch": 0.39063511034309506, "grad_norm": 0.14106298983097076, "learning_rate": 0.0005, "loss": 2.1054, "step": 102630 }, { "epoch": 0.3906731728112178, "grad_norm": 0.12941746413707733, "learning_rate": 0.0005, "loss": 2.1249, "step": 102640 }, { "epoch": 0.39071123527934043, "grad_norm": 0.12591303884983063, "learning_rate": 0.0005, "loss": 2.1166, "step": 102650 }, { "epoch": 0.39074929774746314, "grad_norm": 0.1679193377494812, "learning_rate": 0.0005, "loss": 2.1299, "step": 102660 }, { "epoch": 0.3907873602155858, "grad_norm": 0.1251562386751175, "learning_rate": 0.0005, "loss": 2.1265, "step": 102670 }, { "epoch": 0.3908254226837085, "grad_norm": 0.12617377936840057, "learning_rate": 0.0005, "loss": 2.1299, "step": 102680 }, { "epoch": 0.39086348515183117, "grad_norm": 0.12458827346563339, "learning_rate": 0.0005, "loss": 2.118, "step": 102690 }, { "epoch": 0.3909015476199539, "grad_norm": 0.11865270137786865, "learning_rate": 0.0005, "loss": 2.1358, "step": 102700 }, { "epoch": 0.39093961008807654, "grad_norm": 0.12173104286193848, "learning_rate": 0.0005, "loss": 2.1274, "step": 102710 }, { "epoch": 0.39097767255619925, "grad_norm": 0.12546369433403015, "learning_rate": 0.0005, "loss": 2.1135, "step": 102720 }, { "epoch": 0.3910157350243219, "grad_norm": 0.1272427886724472, "learning_rate": 0.0005, "loss": 2.1295, "step": 102730 }, { "epoch": 0.3910537974924446, "grad_norm": 0.11905576288700104, "learning_rate": 0.0005, "loss": 2.1209, "step": 102740 }, { "epoch": 0.3910918599605673, "grad_norm": 0.11975698918104172, "learning_rate": 0.0005, "loss": 2.1264, "step": 102750 }, { "epoch": 0.39112992242869, "grad_norm": 0.12736965715885162, "learning_rate": 0.0005, "loss": 2.1226, "step": 102760 }, { "epoch": 0.39116798489681265, "grad_norm": 0.12499444931745529, "learning_rate": 0.0005, "loss": 2.1184, "step": 102770 }, { "epoch": 0.3912060473649353, "grad_norm": 0.12132129818201065, "learning_rate": 0.0005, "loss": 2.129, "step": 102780 }, { "epoch": 0.391244109833058, "grad_norm": 0.12807084619998932, "learning_rate": 0.0005, "loss": 2.1417, "step": 102790 }, { "epoch": 0.3912821723011807, "grad_norm": 0.12466669827699661, "learning_rate": 0.0005, "loss": 2.1176, "step": 102800 }, { "epoch": 0.3913202347693034, "grad_norm": 0.13407708704471588, "learning_rate": 0.0005, "loss": 2.1212, "step": 102810 }, { "epoch": 0.39135829723742604, "grad_norm": 0.12648305296897888, "learning_rate": 0.0005, "loss": 2.1114, "step": 102820 }, { "epoch": 0.39139635970554876, "grad_norm": 0.13111071288585663, "learning_rate": 0.0005, "loss": 2.1324, "step": 102830 }, { "epoch": 0.3914344221736714, "grad_norm": 0.11999868601560593, "learning_rate": 0.0005, "loss": 2.1121, "step": 102840 }, { "epoch": 0.3914724846417941, "grad_norm": 0.11485709995031357, "learning_rate": 0.0005, "loss": 2.119, "step": 102850 }, { "epoch": 0.3915105471099168, "grad_norm": 0.1268894374370575, "learning_rate": 0.0005, "loss": 2.1262, "step": 102860 }, { "epoch": 0.3915486095780395, "grad_norm": 0.12413586676120758, "learning_rate": 0.0005, "loss": 2.1345, "step": 102870 }, { "epoch": 0.39158667204616215, "grad_norm": 0.12754595279693604, "learning_rate": 0.0005, "loss": 2.1249, "step": 102880 }, { "epoch": 0.39162473451428487, "grad_norm": 0.3754200339317322, "learning_rate": 0.0005, "loss": 2.1109, "step": 102890 }, { "epoch": 0.3916627969824075, "grad_norm": 0.1305660754442215, "learning_rate": 0.0005, "loss": 2.1119, "step": 102900 }, { "epoch": 0.39170085945053024, "grad_norm": 0.11650175601243973, "learning_rate": 0.0005, "loss": 2.1198, "step": 102910 }, { "epoch": 0.3917389219186529, "grad_norm": 0.12068041414022446, "learning_rate": 0.0005, "loss": 2.1233, "step": 102920 }, { "epoch": 0.39177698438677555, "grad_norm": 0.13195456564426422, "learning_rate": 0.0005, "loss": 2.1243, "step": 102930 }, { "epoch": 0.39181504685489826, "grad_norm": 0.13185672461986542, "learning_rate": 0.0005, "loss": 2.1298, "step": 102940 }, { "epoch": 0.3918531093230209, "grad_norm": 0.12099901586771011, "learning_rate": 0.0005, "loss": 2.1095, "step": 102950 }, { "epoch": 0.39189117179114363, "grad_norm": 0.11593295633792877, "learning_rate": 0.0005, "loss": 2.1353, "step": 102960 }, { "epoch": 0.3919292342592663, "grad_norm": 0.1235249787569046, "learning_rate": 0.0005, "loss": 2.1029, "step": 102970 }, { "epoch": 0.391967296727389, "grad_norm": 0.14133043587207794, "learning_rate": 0.0005, "loss": 2.1059, "step": 102980 }, { "epoch": 0.39200535919551166, "grad_norm": 0.13981376588344574, "learning_rate": 0.0005, "loss": 2.1165, "step": 102990 }, { "epoch": 0.3920434216636344, "grad_norm": 0.12288466095924377, "learning_rate": 0.0005, "loss": 2.1298, "step": 103000 }, { "epoch": 0.39208148413175703, "grad_norm": 0.12503664195537567, "learning_rate": 0.0005, "loss": 2.121, "step": 103010 }, { "epoch": 0.39211954659987974, "grad_norm": 0.12194748967885971, "learning_rate": 0.0005, "loss": 2.1129, "step": 103020 }, { "epoch": 0.3921576090680024, "grad_norm": 0.13284580409526825, "learning_rate": 0.0005, "loss": 2.1231, "step": 103030 }, { "epoch": 0.3921956715361251, "grad_norm": 0.12611354887485504, "learning_rate": 0.0005, "loss": 2.1289, "step": 103040 }, { "epoch": 0.39223373400424777, "grad_norm": 0.11393626779317856, "learning_rate": 0.0005, "loss": 2.104, "step": 103050 }, { "epoch": 0.3922717964723705, "grad_norm": 0.11623258143663406, "learning_rate": 0.0005, "loss": 2.1106, "step": 103060 }, { "epoch": 0.39230985894049314, "grad_norm": 0.12657833099365234, "learning_rate": 0.0005, "loss": 2.1375, "step": 103070 }, { "epoch": 0.3923479214086158, "grad_norm": 0.11555872857570648, "learning_rate": 0.0005, "loss": 2.1269, "step": 103080 }, { "epoch": 0.3923859838767385, "grad_norm": 0.12106861919164658, "learning_rate": 0.0005, "loss": 2.1104, "step": 103090 }, { "epoch": 0.39242404634486117, "grad_norm": 0.1341984122991562, "learning_rate": 0.0005, "loss": 2.1349, "step": 103100 }, { "epoch": 0.3924621088129839, "grad_norm": 0.13863204419612885, "learning_rate": 0.0005, "loss": 2.1285, "step": 103110 }, { "epoch": 0.39250017128110654, "grad_norm": 0.1220608800649643, "learning_rate": 0.0005, "loss": 2.1182, "step": 103120 }, { "epoch": 0.39253823374922925, "grad_norm": 0.13805212080478668, "learning_rate": 0.0005, "loss": 2.1226, "step": 103130 }, { "epoch": 0.3925762962173519, "grad_norm": 0.1482156664133072, "learning_rate": 0.0005, "loss": 2.1074, "step": 103140 }, { "epoch": 0.3926143586854746, "grad_norm": 0.1242128238081932, "learning_rate": 0.0005, "loss": 2.1181, "step": 103150 }, { "epoch": 0.3926524211535973, "grad_norm": 0.1172168180346489, "learning_rate": 0.0005, "loss": 2.1267, "step": 103160 }, { "epoch": 0.39269048362172, "grad_norm": 0.11944341659545898, "learning_rate": 0.0005, "loss": 2.1185, "step": 103170 }, { "epoch": 0.39272854608984265, "grad_norm": 0.12237085402011871, "learning_rate": 0.0005, "loss": 2.1274, "step": 103180 }, { "epoch": 0.39276660855796536, "grad_norm": 0.13623057305812836, "learning_rate": 0.0005, "loss": 2.1149, "step": 103190 }, { "epoch": 0.392804671026088, "grad_norm": 0.13276179134845734, "learning_rate": 0.0005, "loss": 2.1253, "step": 103200 }, { "epoch": 0.39284273349421067, "grad_norm": 0.12691223621368408, "learning_rate": 0.0005, "loss": 2.1232, "step": 103210 }, { "epoch": 0.3928807959623334, "grad_norm": 0.1295863837003708, "learning_rate": 0.0005, "loss": 2.1258, "step": 103220 }, { "epoch": 0.39291885843045604, "grad_norm": 0.12674763798713684, "learning_rate": 0.0005, "loss": 2.1134, "step": 103230 }, { "epoch": 0.39295692089857875, "grad_norm": 0.1289980411529541, "learning_rate": 0.0005, "loss": 2.121, "step": 103240 }, { "epoch": 0.3929949833667014, "grad_norm": 0.126417338848114, "learning_rate": 0.0005, "loss": 2.1249, "step": 103250 }, { "epoch": 0.3930330458348241, "grad_norm": 0.12376362085342407, "learning_rate": 0.0005, "loss": 2.1071, "step": 103260 }, { "epoch": 0.3930711083029468, "grad_norm": 0.1145966425538063, "learning_rate": 0.0005, "loss": 2.1142, "step": 103270 }, { "epoch": 0.3931091707710695, "grad_norm": 0.12255162000656128, "learning_rate": 0.0005, "loss": 2.1162, "step": 103280 }, { "epoch": 0.39314723323919215, "grad_norm": 0.12236989289522171, "learning_rate": 0.0005, "loss": 2.1137, "step": 103290 }, { "epoch": 0.39318529570731486, "grad_norm": 0.11930961161851883, "learning_rate": 0.0005, "loss": 2.1127, "step": 103300 }, { "epoch": 0.3932233581754375, "grad_norm": 0.11663472652435303, "learning_rate": 0.0005, "loss": 2.1245, "step": 103310 }, { "epoch": 0.39326142064356023, "grad_norm": 0.10983003675937653, "learning_rate": 0.0005, "loss": 2.1125, "step": 103320 }, { "epoch": 0.3932994831116829, "grad_norm": 0.1318846344947815, "learning_rate": 0.0005, "loss": 2.1196, "step": 103330 }, { "epoch": 0.3933375455798056, "grad_norm": 0.13595378398895264, "learning_rate": 0.0005, "loss": 2.1141, "step": 103340 }, { "epoch": 0.39337560804792826, "grad_norm": 0.1256626397371292, "learning_rate": 0.0005, "loss": 2.1268, "step": 103350 }, { "epoch": 0.3934136705160509, "grad_norm": 0.11132992804050446, "learning_rate": 0.0005, "loss": 2.1174, "step": 103360 }, { "epoch": 0.39345173298417363, "grad_norm": 0.1152862012386322, "learning_rate": 0.0005, "loss": 2.1233, "step": 103370 }, { "epoch": 0.3934897954522963, "grad_norm": 0.1286613941192627, "learning_rate": 0.0005, "loss": 2.1213, "step": 103380 }, { "epoch": 0.393527857920419, "grad_norm": 0.12287045270204544, "learning_rate": 0.0005, "loss": 2.1199, "step": 103390 }, { "epoch": 0.39356592038854166, "grad_norm": 0.1277402639389038, "learning_rate": 0.0005, "loss": 2.1256, "step": 103400 }, { "epoch": 0.39360398285666437, "grad_norm": 0.12594376504421234, "learning_rate": 0.0005, "loss": 2.1178, "step": 103410 }, { "epoch": 0.393642045324787, "grad_norm": 0.12150339782238007, "learning_rate": 0.0005, "loss": 2.1264, "step": 103420 }, { "epoch": 0.39368010779290974, "grad_norm": 0.12241839617490768, "learning_rate": 0.0005, "loss": 2.1163, "step": 103430 }, { "epoch": 0.3937181702610324, "grad_norm": 0.12070048600435257, "learning_rate": 0.0005, "loss": 2.1219, "step": 103440 }, { "epoch": 0.3937562327291551, "grad_norm": 0.13486874103546143, "learning_rate": 0.0005, "loss": 2.102, "step": 103450 }, { "epoch": 0.39379429519727777, "grad_norm": 0.12598316371440887, "learning_rate": 0.0005, "loss": 2.1331, "step": 103460 }, { "epoch": 0.3938323576654005, "grad_norm": 0.11616887152194977, "learning_rate": 0.0005, "loss": 2.1198, "step": 103470 }, { "epoch": 0.39387042013352314, "grad_norm": 0.12013692408800125, "learning_rate": 0.0005, "loss": 2.1152, "step": 103480 }, { "epoch": 0.39390848260164585, "grad_norm": 0.12813392281532288, "learning_rate": 0.0005, "loss": 2.1198, "step": 103490 }, { "epoch": 0.3939465450697685, "grad_norm": 0.1240399107336998, "learning_rate": 0.0005, "loss": 2.1292, "step": 103500 }, { "epoch": 0.39398460753789116, "grad_norm": 0.1368178278207779, "learning_rate": 0.0005, "loss": 2.128, "step": 103510 }, { "epoch": 0.3940226700060139, "grad_norm": 0.11790160089731216, "learning_rate": 0.0005, "loss": 2.1219, "step": 103520 }, { "epoch": 0.39406073247413653, "grad_norm": 0.12600161135196686, "learning_rate": 0.0005, "loss": 2.1264, "step": 103530 }, { "epoch": 0.39409879494225925, "grad_norm": 0.11743983626365662, "learning_rate": 0.0005, "loss": 2.1226, "step": 103540 }, { "epoch": 0.3941368574103819, "grad_norm": 0.11900418996810913, "learning_rate": 0.0005, "loss": 2.1174, "step": 103550 }, { "epoch": 0.3941749198785046, "grad_norm": 0.13106529414653778, "learning_rate": 0.0005, "loss": 2.1092, "step": 103560 }, { "epoch": 0.39421298234662727, "grad_norm": 0.126107320189476, "learning_rate": 0.0005, "loss": 2.127, "step": 103570 }, { "epoch": 0.39425104481475, "grad_norm": 0.13327786326408386, "learning_rate": 0.0005, "loss": 2.1148, "step": 103580 }, { "epoch": 0.39428910728287264, "grad_norm": 0.13225555419921875, "learning_rate": 0.0005, "loss": 2.1155, "step": 103590 }, { "epoch": 0.39432716975099535, "grad_norm": 0.12251465022563934, "learning_rate": 0.0005, "loss": 2.1174, "step": 103600 }, { "epoch": 0.394365232219118, "grad_norm": 0.13184860348701477, "learning_rate": 0.0005, "loss": 2.1144, "step": 103610 }, { "epoch": 0.3944032946872407, "grad_norm": 0.13471008837223053, "learning_rate": 0.0005, "loss": 2.1306, "step": 103620 }, { "epoch": 0.3944413571553634, "grad_norm": 0.1355457454919815, "learning_rate": 0.0005, "loss": 2.1163, "step": 103630 }, { "epoch": 0.39447941962348604, "grad_norm": 0.12295703589916229, "learning_rate": 0.0005, "loss": 2.1256, "step": 103640 }, { "epoch": 0.39451748209160875, "grad_norm": 0.1327628791332245, "learning_rate": 0.0005, "loss": 2.1229, "step": 103650 }, { "epoch": 0.3945555445597314, "grad_norm": 0.13155631721019745, "learning_rate": 0.0005, "loss": 2.1282, "step": 103660 }, { "epoch": 0.3945936070278541, "grad_norm": 0.13830965757369995, "learning_rate": 0.0005, "loss": 2.1348, "step": 103670 }, { "epoch": 0.3946316694959768, "grad_norm": 0.12834765017032623, "learning_rate": 0.0005, "loss": 2.1331, "step": 103680 }, { "epoch": 0.3946697319640995, "grad_norm": 0.13996703922748566, "learning_rate": 0.0005, "loss": 2.1161, "step": 103690 }, { "epoch": 0.39470779443222215, "grad_norm": 0.13787522912025452, "learning_rate": 0.0005, "loss": 2.1201, "step": 103700 }, { "epoch": 0.39474585690034486, "grad_norm": 0.13497807085514069, "learning_rate": 0.0005, "loss": 2.1126, "step": 103710 }, { "epoch": 0.3947839193684675, "grad_norm": 0.1280667930841446, "learning_rate": 0.0005, "loss": 2.114, "step": 103720 }, { "epoch": 0.39482198183659023, "grad_norm": 0.12160675972700119, "learning_rate": 0.0005, "loss": 2.1281, "step": 103730 }, { "epoch": 0.3948600443047129, "grad_norm": 0.11742819100618362, "learning_rate": 0.0005, "loss": 2.1196, "step": 103740 }, { "epoch": 0.3948981067728356, "grad_norm": 0.12526483833789825, "learning_rate": 0.0005, "loss": 2.1075, "step": 103750 }, { "epoch": 0.39493616924095826, "grad_norm": 0.12073063105344772, "learning_rate": 0.0005, "loss": 2.1269, "step": 103760 }, { "epoch": 0.39497423170908097, "grad_norm": 0.11773089319467545, "learning_rate": 0.0005, "loss": 2.1151, "step": 103770 }, { "epoch": 0.3950122941772036, "grad_norm": 0.13035206496715546, "learning_rate": 0.0005, "loss": 2.1136, "step": 103780 }, { "epoch": 0.3950503566453263, "grad_norm": 0.1201128363609314, "learning_rate": 0.0005, "loss": 2.1155, "step": 103790 }, { "epoch": 0.395088419113449, "grad_norm": 0.11840377748012543, "learning_rate": 0.0005, "loss": 2.136, "step": 103800 }, { "epoch": 0.39512648158157165, "grad_norm": 0.1157442033290863, "learning_rate": 0.0005, "loss": 2.1058, "step": 103810 }, { "epoch": 0.39516454404969437, "grad_norm": 0.11903751641511917, "learning_rate": 0.0005, "loss": 2.1214, "step": 103820 }, { "epoch": 0.395202606517817, "grad_norm": 0.12379298359155655, "learning_rate": 0.0005, "loss": 2.1108, "step": 103830 }, { "epoch": 0.39524066898593974, "grad_norm": 0.12372729927301407, "learning_rate": 0.0005, "loss": 2.1351, "step": 103840 }, { "epoch": 0.3952787314540624, "grad_norm": 0.1268322765827179, "learning_rate": 0.0005, "loss": 2.1354, "step": 103850 }, { "epoch": 0.3953167939221851, "grad_norm": 0.13438722491264343, "learning_rate": 0.0005, "loss": 2.1092, "step": 103860 }, { "epoch": 0.39535485639030776, "grad_norm": 0.12297304719686508, "learning_rate": 0.0005, "loss": 2.1172, "step": 103870 }, { "epoch": 0.3953929188584305, "grad_norm": 0.13192100822925568, "learning_rate": 0.0005, "loss": 2.1198, "step": 103880 }, { "epoch": 0.39543098132655313, "grad_norm": 0.1235286295413971, "learning_rate": 0.0005, "loss": 2.1268, "step": 103890 }, { "epoch": 0.39546904379467585, "grad_norm": 0.11537299305200577, "learning_rate": 0.0005, "loss": 2.1091, "step": 103900 }, { "epoch": 0.3955071062627985, "grad_norm": 0.11818663775920868, "learning_rate": 0.0005, "loss": 2.1279, "step": 103910 }, { "epoch": 0.3955451687309212, "grad_norm": 0.1426924765110016, "learning_rate": 0.0005, "loss": 2.1177, "step": 103920 }, { "epoch": 0.39558323119904387, "grad_norm": 0.14220619201660156, "learning_rate": 0.0005, "loss": 2.1141, "step": 103930 }, { "epoch": 0.39562129366716653, "grad_norm": 0.12632210552692413, "learning_rate": 0.0005, "loss": 2.1225, "step": 103940 }, { "epoch": 0.39565935613528924, "grad_norm": 0.12559543550014496, "learning_rate": 0.0005, "loss": 2.1028, "step": 103950 }, { "epoch": 0.3956974186034119, "grad_norm": 0.13188651204109192, "learning_rate": 0.0005, "loss": 2.114, "step": 103960 }, { "epoch": 0.3957354810715346, "grad_norm": 0.12158432602882385, "learning_rate": 0.0005, "loss": 2.1151, "step": 103970 }, { "epoch": 0.39577354353965727, "grad_norm": 0.13120721280574799, "learning_rate": 0.0005, "loss": 2.109, "step": 103980 }, { "epoch": 0.39581160600778, "grad_norm": 0.1457386314868927, "learning_rate": 0.0005, "loss": 2.128, "step": 103990 }, { "epoch": 0.39584966847590264, "grad_norm": 0.12688206136226654, "learning_rate": 0.0005, "loss": 2.116, "step": 104000 }, { "epoch": 0.39588773094402535, "grad_norm": 0.15381436049938202, "learning_rate": 0.0005, "loss": 2.1142, "step": 104010 }, { "epoch": 0.395925793412148, "grad_norm": 0.11215616017580032, "learning_rate": 0.0005, "loss": 2.131, "step": 104020 }, { "epoch": 0.3959638558802707, "grad_norm": 0.12374036014080048, "learning_rate": 0.0005, "loss": 2.1171, "step": 104030 }, { "epoch": 0.3960019183483934, "grad_norm": 0.11824839562177658, "learning_rate": 0.0005, "loss": 2.1018, "step": 104040 }, { "epoch": 0.3960399808165161, "grad_norm": 0.14382266998291016, "learning_rate": 0.0005, "loss": 2.113, "step": 104050 }, { "epoch": 0.39607804328463875, "grad_norm": 0.11462534964084625, "learning_rate": 0.0005, "loss": 2.1038, "step": 104060 }, { "epoch": 0.3961161057527614, "grad_norm": 0.11632947623729706, "learning_rate": 0.0005, "loss": 2.1322, "step": 104070 }, { "epoch": 0.3961541682208841, "grad_norm": 0.13774609565734863, "learning_rate": 0.0005, "loss": 2.1236, "step": 104080 }, { "epoch": 0.3961922306890068, "grad_norm": 0.11715240776538849, "learning_rate": 0.0005, "loss": 2.1185, "step": 104090 }, { "epoch": 0.3962302931571295, "grad_norm": 0.13816210627555847, "learning_rate": 0.0005, "loss": 2.1185, "step": 104100 }, { "epoch": 0.39626835562525214, "grad_norm": 0.11385868489742279, "learning_rate": 0.0005, "loss": 2.12, "step": 104110 }, { "epoch": 0.39630641809337486, "grad_norm": 0.14412954449653625, "learning_rate": 0.0005, "loss": 2.1192, "step": 104120 }, { "epoch": 0.3963444805614975, "grad_norm": 0.12326811254024506, "learning_rate": 0.0005, "loss": 2.1099, "step": 104130 }, { "epoch": 0.3963825430296202, "grad_norm": 0.11583611369132996, "learning_rate": 0.0005, "loss": 2.1168, "step": 104140 }, { "epoch": 0.3964206054977429, "grad_norm": 0.11942502856254578, "learning_rate": 0.0005, "loss": 2.1289, "step": 104150 }, { "epoch": 0.3964586679658656, "grad_norm": 0.1244032084941864, "learning_rate": 0.0005, "loss": 2.1074, "step": 104160 }, { "epoch": 0.39649673043398825, "grad_norm": 0.13132727146148682, "learning_rate": 0.0005, "loss": 2.1164, "step": 104170 }, { "epoch": 0.39653479290211097, "grad_norm": 0.12686878442764282, "learning_rate": 0.0005, "loss": 2.1262, "step": 104180 }, { "epoch": 0.3965728553702336, "grad_norm": 0.12790274620056152, "learning_rate": 0.0005, "loss": 2.1205, "step": 104190 }, { "epoch": 0.39661091783835634, "grad_norm": 0.125844344496727, "learning_rate": 0.0005, "loss": 2.1143, "step": 104200 }, { "epoch": 0.396648980306479, "grad_norm": 0.12397968024015427, "learning_rate": 0.0005, "loss": 2.1194, "step": 104210 }, { "epoch": 0.39668704277460165, "grad_norm": 0.117226742208004, "learning_rate": 0.0005, "loss": 2.1196, "step": 104220 }, { "epoch": 0.39672510524272436, "grad_norm": 0.12511186301708221, "learning_rate": 0.0005, "loss": 2.1186, "step": 104230 }, { "epoch": 0.396763167710847, "grad_norm": 0.12875999510288239, "learning_rate": 0.0005, "loss": 2.1242, "step": 104240 }, { "epoch": 0.39680123017896973, "grad_norm": 0.1346457302570343, "learning_rate": 0.0005, "loss": 2.1273, "step": 104250 }, { "epoch": 0.3968392926470924, "grad_norm": 0.11340010911226273, "learning_rate": 0.0005, "loss": 2.127, "step": 104260 }, { "epoch": 0.3968773551152151, "grad_norm": 0.13111717998981476, "learning_rate": 0.0005, "loss": 2.1232, "step": 104270 }, { "epoch": 0.39691541758333776, "grad_norm": 0.12048640102148056, "learning_rate": 0.0005, "loss": 2.1199, "step": 104280 }, { "epoch": 0.39695348005146047, "grad_norm": 0.14155083894729614, "learning_rate": 0.0005, "loss": 2.1181, "step": 104290 }, { "epoch": 0.39699154251958313, "grad_norm": 0.11819305270910263, "learning_rate": 0.0005, "loss": 2.1061, "step": 104300 }, { "epoch": 0.39702960498770584, "grad_norm": 0.11852467060089111, "learning_rate": 0.0005, "loss": 2.1193, "step": 104310 }, { "epoch": 0.3970676674558285, "grad_norm": 0.14167369902133942, "learning_rate": 0.0005, "loss": 2.1152, "step": 104320 }, { "epoch": 0.3971057299239512, "grad_norm": 0.1200118288397789, "learning_rate": 0.0005, "loss": 2.1059, "step": 104330 }, { "epoch": 0.39714379239207387, "grad_norm": 0.12136401236057281, "learning_rate": 0.0005, "loss": 2.1079, "step": 104340 }, { "epoch": 0.3971818548601966, "grad_norm": 0.12574835121631622, "learning_rate": 0.0005, "loss": 2.112, "step": 104350 }, { "epoch": 0.39721991732831924, "grad_norm": 0.12684336304664612, "learning_rate": 0.0005, "loss": 2.1219, "step": 104360 }, { "epoch": 0.3972579797964419, "grad_norm": 0.12519370019435883, "learning_rate": 0.0005, "loss": 2.1242, "step": 104370 }, { "epoch": 0.3972960422645646, "grad_norm": 0.13009725511074066, "learning_rate": 0.0005, "loss": 2.1118, "step": 104380 }, { "epoch": 0.39733410473268727, "grad_norm": 0.1330563724040985, "learning_rate": 0.0005, "loss": 2.117, "step": 104390 }, { "epoch": 0.39737216720081, "grad_norm": 0.11972329020500183, "learning_rate": 0.0005, "loss": 2.1226, "step": 104400 }, { "epoch": 0.39741022966893264, "grad_norm": 0.1132497563958168, "learning_rate": 0.0005, "loss": 2.1149, "step": 104410 }, { "epoch": 0.39744829213705535, "grad_norm": 0.12347466498613358, "learning_rate": 0.0005, "loss": 2.1081, "step": 104420 }, { "epoch": 0.397486354605178, "grad_norm": 0.12338844686746597, "learning_rate": 0.0005, "loss": 2.118, "step": 104430 }, { "epoch": 0.3975244170733007, "grad_norm": 0.13165982067584991, "learning_rate": 0.0005, "loss": 2.1287, "step": 104440 }, { "epoch": 0.3975624795414234, "grad_norm": 0.1271611452102661, "learning_rate": 0.0005, "loss": 2.1181, "step": 104450 }, { "epoch": 0.3976005420095461, "grad_norm": 0.12106011807918549, "learning_rate": 0.0005, "loss": 2.1315, "step": 104460 }, { "epoch": 0.39763860447766874, "grad_norm": 0.12377516180276871, "learning_rate": 0.0005, "loss": 2.1229, "step": 104470 }, { "epoch": 0.39767666694579146, "grad_norm": 0.12067466974258423, "learning_rate": 0.0005, "loss": 2.1218, "step": 104480 }, { "epoch": 0.3977147294139141, "grad_norm": 0.12885430455207825, "learning_rate": 0.0005, "loss": 2.1199, "step": 104490 }, { "epoch": 0.39775279188203677, "grad_norm": 0.11753305047750473, "learning_rate": 0.0005, "loss": 2.1209, "step": 104500 }, { "epoch": 0.3977908543501595, "grad_norm": 0.1275755614042282, "learning_rate": 0.0005, "loss": 2.1092, "step": 104510 }, { "epoch": 0.39782891681828214, "grad_norm": 0.12653808295726776, "learning_rate": 0.0005, "loss": 2.1252, "step": 104520 }, { "epoch": 0.39786697928640485, "grad_norm": 0.1440698355436325, "learning_rate": 0.0005, "loss": 2.1275, "step": 104530 }, { "epoch": 0.3979050417545275, "grad_norm": 0.11419986188411713, "learning_rate": 0.0005, "loss": 2.114, "step": 104540 }, { "epoch": 0.3979431042226502, "grad_norm": 0.2337425798177719, "learning_rate": 0.0005, "loss": 2.1085, "step": 104550 }, { "epoch": 0.3979811666907729, "grad_norm": 0.1223544031381607, "learning_rate": 0.0005, "loss": 2.1311, "step": 104560 }, { "epoch": 0.3980192291588956, "grad_norm": 0.11273296922445297, "learning_rate": 0.0005, "loss": 2.1249, "step": 104570 }, { "epoch": 0.39805729162701825, "grad_norm": 0.11929282546043396, "learning_rate": 0.0005, "loss": 2.1068, "step": 104580 }, { "epoch": 0.39809535409514096, "grad_norm": 0.13098135590553284, "learning_rate": 0.0005, "loss": 2.1164, "step": 104590 }, { "epoch": 0.3981334165632636, "grad_norm": 0.1531413197517395, "learning_rate": 0.0005, "loss": 2.0996, "step": 104600 }, { "epoch": 0.39817147903138633, "grad_norm": 0.12102365493774414, "learning_rate": 0.0005, "loss": 2.1161, "step": 104610 }, { "epoch": 0.398209541499509, "grad_norm": 0.12185650318861008, "learning_rate": 0.0005, "loss": 2.1206, "step": 104620 }, { "epoch": 0.3982476039676317, "grad_norm": 0.12916898727416992, "learning_rate": 0.0005, "loss": 2.119, "step": 104630 }, { "epoch": 0.39828566643575436, "grad_norm": 0.12003316730260849, "learning_rate": 0.0005, "loss": 2.1099, "step": 104640 }, { "epoch": 0.398323728903877, "grad_norm": 0.11966529488563538, "learning_rate": 0.0005, "loss": 2.1312, "step": 104650 }, { "epoch": 0.39836179137199973, "grad_norm": 0.12870483100414276, "learning_rate": 0.0005, "loss": 2.0988, "step": 104660 }, { "epoch": 0.3983998538401224, "grad_norm": 0.12053332477807999, "learning_rate": 0.0005, "loss": 2.1053, "step": 104670 }, { "epoch": 0.3984379163082451, "grad_norm": 0.11767672747373581, "learning_rate": 0.0005, "loss": 2.1161, "step": 104680 }, { "epoch": 0.39847597877636776, "grad_norm": 0.1143900528550148, "learning_rate": 0.0005, "loss": 2.1321, "step": 104690 }, { "epoch": 0.39851404124449047, "grad_norm": 0.12328346073627472, "learning_rate": 0.0005, "loss": 2.1079, "step": 104700 }, { "epoch": 0.3985521037126131, "grad_norm": 0.1246117502450943, "learning_rate": 0.0005, "loss": 2.1102, "step": 104710 }, { "epoch": 0.39859016618073584, "grad_norm": 0.13052716851234436, "learning_rate": 0.0005, "loss": 2.1005, "step": 104720 }, { "epoch": 0.3986282286488585, "grad_norm": 0.13421329855918884, "learning_rate": 0.0005, "loss": 2.1241, "step": 104730 }, { "epoch": 0.3986662911169812, "grad_norm": 0.1216406598687172, "learning_rate": 0.0005, "loss": 2.1234, "step": 104740 }, { "epoch": 0.39870435358510387, "grad_norm": 0.12023992091417313, "learning_rate": 0.0005, "loss": 2.1142, "step": 104750 }, { "epoch": 0.3987424160532266, "grad_norm": 0.12383361160755157, "learning_rate": 0.0005, "loss": 2.1222, "step": 104760 }, { "epoch": 0.39878047852134924, "grad_norm": 0.12238840013742447, "learning_rate": 0.0005, "loss": 2.1165, "step": 104770 }, { "epoch": 0.39881854098947195, "grad_norm": 0.11502031236886978, "learning_rate": 0.0005, "loss": 2.1105, "step": 104780 }, { "epoch": 0.3988566034575946, "grad_norm": 0.1243932768702507, "learning_rate": 0.0005, "loss": 2.1252, "step": 104790 }, { "epoch": 0.39889466592571726, "grad_norm": 0.12052633613348007, "learning_rate": 0.0005, "loss": 2.1215, "step": 104800 }, { "epoch": 0.39893272839384, "grad_norm": 0.12813816964626312, "learning_rate": 0.0005, "loss": 2.1075, "step": 104810 }, { "epoch": 0.39897079086196263, "grad_norm": 0.12674903869628906, "learning_rate": 0.0005, "loss": 2.1236, "step": 104820 }, { "epoch": 0.39900885333008534, "grad_norm": 0.12259454280138016, "learning_rate": 0.0005, "loss": 2.1134, "step": 104830 }, { "epoch": 0.399046915798208, "grad_norm": 0.12549050152301788, "learning_rate": 0.0005, "loss": 2.1189, "step": 104840 }, { "epoch": 0.3990849782663307, "grad_norm": 0.12888945639133453, "learning_rate": 0.0005, "loss": 2.1149, "step": 104850 }, { "epoch": 0.39912304073445337, "grad_norm": 0.1267143189907074, "learning_rate": 0.0005, "loss": 2.115, "step": 104860 }, { "epoch": 0.3991611032025761, "grad_norm": 0.14655642211437225, "learning_rate": 0.0005, "loss": 2.1247, "step": 104870 }, { "epoch": 0.39919916567069874, "grad_norm": 0.12013908475637436, "learning_rate": 0.0005, "loss": 2.1233, "step": 104880 }, { "epoch": 0.39923722813882145, "grad_norm": 0.14582134783267975, "learning_rate": 0.0005, "loss": 2.1147, "step": 104890 }, { "epoch": 0.3992752906069441, "grad_norm": 0.10703765600919724, "learning_rate": 0.0005, "loss": 2.121, "step": 104900 }, { "epoch": 0.3993133530750668, "grad_norm": 0.13721676170825958, "learning_rate": 0.0005, "loss": 2.1218, "step": 104910 }, { "epoch": 0.3993514155431895, "grad_norm": 0.12658992409706116, "learning_rate": 0.0005, "loss": 2.1225, "step": 104920 }, { "epoch": 0.39938947801131214, "grad_norm": 0.12144620716571808, "learning_rate": 0.0005, "loss": 2.1199, "step": 104930 }, { "epoch": 0.39942754047943485, "grad_norm": 0.1336260586977005, "learning_rate": 0.0005, "loss": 2.112, "step": 104940 }, { "epoch": 0.3994656029475575, "grad_norm": 0.12390537559986115, "learning_rate": 0.0005, "loss": 2.1124, "step": 104950 }, { "epoch": 0.3995036654156802, "grad_norm": 0.1204170510172844, "learning_rate": 0.0005, "loss": 2.1273, "step": 104960 }, { "epoch": 0.3995417278838029, "grad_norm": 0.12203984707593918, "learning_rate": 0.0005, "loss": 2.1174, "step": 104970 }, { "epoch": 0.3995797903519256, "grad_norm": 0.11647836118936539, "learning_rate": 0.0005, "loss": 2.1251, "step": 104980 }, { "epoch": 0.39961785282004825, "grad_norm": 0.12164629250764847, "learning_rate": 0.0005, "loss": 2.1171, "step": 104990 }, { "epoch": 0.39965591528817096, "grad_norm": 0.1342100352048874, "learning_rate": 0.0005, "loss": 2.1192, "step": 105000 }, { "epoch": 0.3996939777562936, "grad_norm": 0.13156567513942719, "learning_rate": 0.0005, "loss": 2.1101, "step": 105010 }, { "epoch": 0.39973204022441633, "grad_norm": 0.13787831366062164, "learning_rate": 0.0005, "loss": 2.1273, "step": 105020 }, { "epoch": 0.399770102692539, "grad_norm": 0.12670817971229553, "learning_rate": 0.0005, "loss": 2.1039, "step": 105030 }, { "epoch": 0.3998081651606617, "grad_norm": 0.12434558570384979, "learning_rate": 0.0005, "loss": 2.1104, "step": 105040 }, { "epoch": 0.39984622762878436, "grad_norm": 0.13706181943416595, "learning_rate": 0.0005, "loss": 2.1174, "step": 105050 }, { "epoch": 0.39988429009690707, "grad_norm": 0.13545085489749908, "learning_rate": 0.0005, "loss": 2.102, "step": 105060 }, { "epoch": 0.3999223525650297, "grad_norm": 0.12967762351036072, "learning_rate": 0.0005, "loss": 2.1175, "step": 105070 }, { "epoch": 0.3999604150331524, "grad_norm": 0.12019047141075134, "learning_rate": 0.0005, "loss": 2.1273, "step": 105080 }, { "epoch": 0.3999984775012751, "grad_norm": 0.12411542236804962, "learning_rate": 0.0005, "loss": 2.1191, "step": 105090 }, { "epoch": 0.40003653996939775, "grad_norm": 0.1309502273797989, "learning_rate": 0.0005, "loss": 2.1136, "step": 105100 }, { "epoch": 0.40007460243752047, "grad_norm": 0.11400396376848221, "learning_rate": 0.0005, "loss": 2.1181, "step": 105110 }, { "epoch": 0.4001126649056431, "grad_norm": 0.13012060523033142, "learning_rate": 0.0005, "loss": 2.1027, "step": 105120 }, { "epoch": 0.40015072737376584, "grad_norm": 0.12380818277597427, "learning_rate": 0.0005, "loss": 2.0941, "step": 105130 }, { "epoch": 0.4001887898418885, "grad_norm": 0.11546797305345535, "learning_rate": 0.0005, "loss": 2.1123, "step": 105140 }, { "epoch": 0.4002268523100112, "grad_norm": 0.12413118034601212, "learning_rate": 0.0005, "loss": 2.1184, "step": 105150 }, { "epoch": 0.40026491477813386, "grad_norm": 0.12526023387908936, "learning_rate": 0.0005, "loss": 2.1448, "step": 105160 }, { "epoch": 0.4003029772462566, "grad_norm": 0.12476836144924164, "learning_rate": 0.0005, "loss": 2.1259, "step": 105170 }, { "epoch": 0.40034103971437923, "grad_norm": 0.12636984884738922, "learning_rate": 0.0005, "loss": 2.1129, "step": 105180 }, { "epoch": 0.40037910218250194, "grad_norm": 0.12862300872802734, "learning_rate": 0.0005, "loss": 2.1113, "step": 105190 }, { "epoch": 0.4004171646506246, "grad_norm": 0.13172148168087006, "learning_rate": 0.0005, "loss": 2.1292, "step": 105200 }, { "epoch": 0.4004552271187473, "grad_norm": 0.1303453892469406, "learning_rate": 0.0005, "loss": 2.1034, "step": 105210 }, { "epoch": 0.40049328958686997, "grad_norm": 0.12228729575872421, "learning_rate": 0.0005, "loss": 2.1185, "step": 105220 }, { "epoch": 0.40053135205499263, "grad_norm": 0.138387992978096, "learning_rate": 0.0005, "loss": 2.128, "step": 105230 }, { "epoch": 0.40056941452311534, "grad_norm": 0.11740188300609589, "learning_rate": 0.0005, "loss": 2.1067, "step": 105240 }, { "epoch": 0.400607476991238, "grad_norm": 0.12075120210647583, "learning_rate": 0.0005, "loss": 2.121, "step": 105250 }, { "epoch": 0.4006455394593607, "grad_norm": 0.12485245615243912, "learning_rate": 0.0005, "loss": 2.1184, "step": 105260 }, { "epoch": 0.40068360192748337, "grad_norm": 0.12556962668895721, "learning_rate": 0.0005, "loss": 2.1239, "step": 105270 }, { "epoch": 0.4007216643956061, "grad_norm": 0.14791473746299744, "learning_rate": 0.0005, "loss": 2.1061, "step": 105280 }, { "epoch": 0.40075972686372874, "grad_norm": 0.1255379170179367, "learning_rate": 0.0005, "loss": 2.122, "step": 105290 }, { "epoch": 0.40079778933185145, "grad_norm": 0.12866927683353424, "learning_rate": 0.0005, "loss": 2.1076, "step": 105300 }, { "epoch": 0.4008358517999741, "grad_norm": 0.126442089676857, "learning_rate": 0.0005, "loss": 2.1125, "step": 105310 }, { "epoch": 0.4008739142680968, "grad_norm": 0.11720888316631317, "learning_rate": 0.0005, "loss": 2.1243, "step": 105320 }, { "epoch": 0.4009119767362195, "grad_norm": 0.12004213035106659, "learning_rate": 0.0005, "loss": 2.1168, "step": 105330 }, { "epoch": 0.4009500392043422, "grad_norm": 0.12049522995948792, "learning_rate": 0.0005, "loss": 2.1112, "step": 105340 }, { "epoch": 0.40098810167246485, "grad_norm": 0.15276920795440674, "learning_rate": 0.0005, "loss": 2.1293, "step": 105350 }, { "epoch": 0.40102616414058756, "grad_norm": 0.12708880007266998, "learning_rate": 0.0005, "loss": 2.1204, "step": 105360 }, { "epoch": 0.4010642266087102, "grad_norm": 0.13746021687984467, "learning_rate": 0.0005, "loss": 2.1185, "step": 105370 }, { "epoch": 0.4011022890768329, "grad_norm": 0.11826682835817337, "learning_rate": 0.0005, "loss": 2.124, "step": 105380 }, { "epoch": 0.4011403515449556, "grad_norm": 0.12515002489089966, "learning_rate": 0.0005, "loss": 2.1249, "step": 105390 }, { "epoch": 0.40117841401307824, "grad_norm": 0.13323919475078583, "learning_rate": 0.0005, "loss": 2.1199, "step": 105400 }, { "epoch": 0.40121647648120096, "grad_norm": 0.13742923736572266, "learning_rate": 0.0005, "loss": 2.1061, "step": 105410 }, { "epoch": 0.4012545389493236, "grad_norm": 0.11911813914775848, "learning_rate": 0.0005, "loss": 2.1269, "step": 105420 }, { "epoch": 0.4012926014174463, "grad_norm": 0.11971248686313629, "learning_rate": 0.0005, "loss": 2.1121, "step": 105430 }, { "epoch": 0.401330663885569, "grad_norm": 0.1214604452252388, "learning_rate": 0.0005, "loss": 2.1256, "step": 105440 }, { "epoch": 0.4013687263536917, "grad_norm": 0.12025048583745956, "learning_rate": 0.0005, "loss": 2.1071, "step": 105450 }, { "epoch": 0.40140678882181435, "grad_norm": 0.12886670231819153, "learning_rate": 0.0005, "loss": 2.124, "step": 105460 }, { "epoch": 0.40144485128993707, "grad_norm": 0.11783948540687561, "learning_rate": 0.0005, "loss": 2.1062, "step": 105470 }, { "epoch": 0.4014829137580597, "grad_norm": 0.13330931961536407, "learning_rate": 0.0005, "loss": 2.1229, "step": 105480 }, { "epoch": 0.40152097622618244, "grad_norm": 0.12965691089630127, "learning_rate": 0.0005, "loss": 2.1143, "step": 105490 }, { "epoch": 0.4015590386943051, "grad_norm": 0.13883811235427856, "learning_rate": 0.0005, "loss": 2.0993, "step": 105500 }, { "epoch": 0.40159710116242775, "grad_norm": 0.12219704687595367, "learning_rate": 0.0005, "loss": 2.0945, "step": 105510 }, { "epoch": 0.40163516363055046, "grad_norm": 0.13543589413166046, "learning_rate": 0.0005, "loss": 2.1275, "step": 105520 }, { "epoch": 0.4016732260986731, "grad_norm": 0.12430472671985626, "learning_rate": 0.0005, "loss": 2.1253, "step": 105530 }, { "epoch": 0.40171128856679583, "grad_norm": 0.12211424857378006, "learning_rate": 0.0005, "loss": 2.1263, "step": 105540 }, { "epoch": 0.4017493510349185, "grad_norm": 0.12356506288051605, "learning_rate": 0.0005, "loss": 2.1177, "step": 105550 }, { "epoch": 0.4017874135030412, "grad_norm": 0.12859176099300385, "learning_rate": 0.0005, "loss": 2.136, "step": 105560 }, { "epoch": 0.40182547597116386, "grad_norm": 0.12038209289312363, "learning_rate": 0.0005, "loss": 2.13, "step": 105570 }, { "epoch": 0.40186353843928657, "grad_norm": 0.12111588567495346, "learning_rate": 0.0005, "loss": 2.1274, "step": 105580 }, { "epoch": 0.40190160090740923, "grad_norm": 0.13333773612976074, "learning_rate": 0.0005, "loss": 2.1173, "step": 105590 }, { "epoch": 0.40193966337553194, "grad_norm": 0.13389089703559875, "learning_rate": 0.0005, "loss": 2.1134, "step": 105600 }, { "epoch": 0.4019777258436546, "grad_norm": 0.1274549514055252, "learning_rate": 0.0005, "loss": 2.1205, "step": 105610 }, { "epoch": 0.4020157883117773, "grad_norm": 0.13398416340351105, "learning_rate": 0.0005, "loss": 2.1197, "step": 105620 }, { "epoch": 0.40205385077989997, "grad_norm": 0.12145400792360306, "learning_rate": 0.0005, "loss": 2.1251, "step": 105630 }, { "epoch": 0.4020919132480227, "grad_norm": 0.13358928263187408, "learning_rate": 0.0005, "loss": 2.121, "step": 105640 }, { "epoch": 0.40212997571614534, "grad_norm": 0.11757256835699081, "learning_rate": 0.0005, "loss": 2.1306, "step": 105650 }, { "epoch": 0.402168038184268, "grad_norm": 0.11538097262382507, "learning_rate": 0.0005, "loss": 2.1297, "step": 105660 }, { "epoch": 0.4022061006523907, "grad_norm": 0.13190820813179016, "learning_rate": 0.0005, "loss": 2.1142, "step": 105670 }, { "epoch": 0.40224416312051336, "grad_norm": 0.14979924261569977, "learning_rate": 0.0005, "loss": 2.1328, "step": 105680 }, { "epoch": 0.4022822255886361, "grad_norm": 0.1268017739057541, "learning_rate": 0.0005, "loss": 2.1273, "step": 105690 }, { "epoch": 0.40232028805675873, "grad_norm": 0.12742775678634644, "learning_rate": 0.0005, "loss": 2.1197, "step": 105700 }, { "epoch": 0.40235835052488145, "grad_norm": 0.12942014634609222, "learning_rate": 0.0005, "loss": 2.1178, "step": 105710 }, { "epoch": 0.4023964129930041, "grad_norm": 0.11720619350671768, "learning_rate": 0.0005, "loss": 2.1276, "step": 105720 }, { "epoch": 0.4024344754611268, "grad_norm": 0.13581234216690063, "learning_rate": 0.0005, "loss": 2.1274, "step": 105730 }, { "epoch": 0.4024725379292495, "grad_norm": 0.12838581204414368, "learning_rate": 0.0005, "loss": 2.1045, "step": 105740 }, { "epoch": 0.4025106003973722, "grad_norm": 0.12061332911252975, "learning_rate": 0.0005, "loss": 2.1145, "step": 105750 }, { "epoch": 0.40254866286549484, "grad_norm": 0.12706062197685242, "learning_rate": 0.0005, "loss": 2.1085, "step": 105760 }, { "epoch": 0.40258672533361756, "grad_norm": 0.12208613753318787, "learning_rate": 0.0005, "loss": 2.1057, "step": 105770 }, { "epoch": 0.4026247878017402, "grad_norm": 0.12473037838935852, "learning_rate": 0.0005, "loss": 2.1126, "step": 105780 }, { "epoch": 0.4026628502698629, "grad_norm": 0.11989544332027435, "learning_rate": 0.0005, "loss": 2.1243, "step": 105790 }, { "epoch": 0.4027009127379856, "grad_norm": 0.18878960609436035, "learning_rate": 0.0005, "loss": 2.1283, "step": 105800 }, { "epoch": 0.40273897520610824, "grad_norm": 0.12641625106334686, "learning_rate": 0.0005, "loss": 2.1194, "step": 105810 }, { "epoch": 0.40277703767423095, "grad_norm": 0.12721383571624756, "learning_rate": 0.0005, "loss": 2.1314, "step": 105820 }, { "epoch": 0.4028151001423536, "grad_norm": 0.13350263237953186, "learning_rate": 0.0005, "loss": 2.1261, "step": 105830 }, { "epoch": 0.4028531626104763, "grad_norm": 0.13324423134326935, "learning_rate": 0.0005, "loss": 2.1275, "step": 105840 }, { "epoch": 0.402891225078599, "grad_norm": 0.1388176828622818, "learning_rate": 0.0005, "loss": 2.1211, "step": 105850 }, { "epoch": 0.4029292875467217, "grad_norm": 0.1237429603934288, "learning_rate": 0.0005, "loss": 2.1088, "step": 105860 }, { "epoch": 0.40296735001484435, "grad_norm": 0.12776918709278107, "learning_rate": 0.0005, "loss": 2.1229, "step": 105870 }, { "epoch": 0.40300541248296706, "grad_norm": 0.12410617619752884, "learning_rate": 0.0005, "loss": 2.1086, "step": 105880 }, { "epoch": 0.4030434749510897, "grad_norm": 0.12657538056373596, "learning_rate": 0.0005, "loss": 2.1045, "step": 105890 }, { "epoch": 0.40308153741921243, "grad_norm": 0.12220700085163116, "learning_rate": 0.0005, "loss": 2.1252, "step": 105900 }, { "epoch": 0.4031195998873351, "grad_norm": 0.13133662939071655, "learning_rate": 0.0005, "loss": 2.1341, "step": 105910 }, { "epoch": 0.4031576623554578, "grad_norm": 0.13154169917106628, "learning_rate": 0.0005, "loss": 2.1192, "step": 105920 }, { "epoch": 0.40319572482358046, "grad_norm": 0.1365981101989746, "learning_rate": 0.0005, "loss": 2.1142, "step": 105930 }, { "epoch": 0.4032337872917031, "grad_norm": 0.14633728563785553, "learning_rate": 0.0005, "loss": 2.1175, "step": 105940 }, { "epoch": 0.40327184975982583, "grad_norm": 0.13925231993198395, "learning_rate": 0.0005, "loss": 2.122, "step": 105950 }, { "epoch": 0.4033099122279485, "grad_norm": 0.12116432934999466, "learning_rate": 0.0005, "loss": 2.1252, "step": 105960 }, { "epoch": 0.4033479746960712, "grad_norm": 0.12359362095594406, "learning_rate": 0.0005, "loss": 2.1304, "step": 105970 }, { "epoch": 0.40338603716419386, "grad_norm": 0.12147456407546997, "learning_rate": 0.0005, "loss": 2.132, "step": 105980 }, { "epoch": 0.40342409963231657, "grad_norm": 0.11512767523527145, "learning_rate": 0.0005, "loss": 2.1227, "step": 105990 }, { "epoch": 0.4034621621004392, "grad_norm": 0.12523676455020905, "learning_rate": 0.0005, "loss": 2.1257, "step": 106000 }, { "epoch": 0.40350022456856194, "grad_norm": 0.1256232112646103, "learning_rate": 0.0005, "loss": 2.1112, "step": 106010 }, { "epoch": 0.4035382870366846, "grad_norm": 0.15836471319198608, "learning_rate": 0.0005, "loss": 2.0999, "step": 106020 }, { "epoch": 0.4035763495048073, "grad_norm": 0.13333560526371002, "learning_rate": 0.0005, "loss": 2.1258, "step": 106030 }, { "epoch": 0.40361441197292997, "grad_norm": 0.12412805110216141, "learning_rate": 0.0005, "loss": 2.1108, "step": 106040 }, { "epoch": 0.4036524744410527, "grad_norm": 0.12889280915260315, "learning_rate": 0.0005, "loss": 2.1075, "step": 106050 }, { "epoch": 0.40369053690917533, "grad_norm": 0.12141398340463638, "learning_rate": 0.0005, "loss": 2.1112, "step": 106060 }, { "epoch": 0.40372859937729805, "grad_norm": 0.12618468701839447, "learning_rate": 0.0005, "loss": 2.12, "step": 106070 }, { "epoch": 0.4037666618454207, "grad_norm": 0.13387101888656616, "learning_rate": 0.0005, "loss": 2.1252, "step": 106080 }, { "epoch": 0.40380472431354336, "grad_norm": 0.1238047257065773, "learning_rate": 0.0005, "loss": 2.1105, "step": 106090 }, { "epoch": 0.4038427867816661, "grad_norm": 0.11805254220962524, "learning_rate": 0.0005, "loss": 2.1328, "step": 106100 }, { "epoch": 0.40388084924978873, "grad_norm": 0.12471137195825577, "learning_rate": 0.0005, "loss": 2.11, "step": 106110 }, { "epoch": 0.40391891171791144, "grad_norm": 0.12302107363939285, "learning_rate": 0.0005, "loss": 2.1226, "step": 106120 }, { "epoch": 0.4039569741860341, "grad_norm": 0.1345338076353073, "learning_rate": 0.0005, "loss": 2.1155, "step": 106130 }, { "epoch": 0.4039950366541568, "grad_norm": 0.12187851220369339, "learning_rate": 0.0005, "loss": 2.119, "step": 106140 }, { "epoch": 0.40403309912227947, "grad_norm": 0.12177269905805588, "learning_rate": 0.0005, "loss": 2.1328, "step": 106150 }, { "epoch": 0.4040711615904022, "grad_norm": 0.11549049615859985, "learning_rate": 0.0005, "loss": 2.1262, "step": 106160 }, { "epoch": 0.40410922405852484, "grad_norm": 0.15617483854293823, "learning_rate": 0.0005, "loss": 2.1183, "step": 106170 }, { "epoch": 0.40414728652664755, "grad_norm": 0.11969661712646484, "learning_rate": 0.0005, "loss": 2.1296, "step": 106180 }, { "epoch": 0.4041853489947702, "grad_norm": 0.11274225264787674, "learning_rate": 0.0005, "loss": 2.1035, "step": 106190 }, { "epoch": 0.4042234114628929, "grad_norm": 0.13579653203487396, "learning_rate": 0.0005, "loss": 2.1039, "step": 106200 }, { "epoch": 0.4042614739310156, "grad_norm": 0.13461516797542572, "learning_rate": 0.0005, "loss": 2.1176, "step": 106210 }, { "epoch": 0.4042995363991383, "grad_norm": 0.13219502568244934, "learning_rate": 0.0005, "loss": 2.1226, "step": 106220 }, { "epoch": 0.40433759886726095, "grad_norm": 0.11906775832176208, "learning_rate": 0.0005, "loss": 2.1075, "step": 106230 }, { "epoch": 0.4043756613353836, "grad_norm": 0.13577035069465637, "learning_rate": 0.0005, "loss": 2.1181, "step": 106240 }, { "epoch": 0.4044137238035063, "grad_norm": 0.11380638927221298, "learning_rate": 0.0005, "loss": 2.1181, "step": 106250 }, { "epoch": 0.404451786271629, "grad_norm": 0.11306143552064896, "learning_rate": 0.0005, "loss": 2.1238, "step": 106260 }, { "epoch": 0.4044898487397517, "grad_norm": 0.12404890358448029, "learning_rate": 0.0005, "loss": 2.1118, "step": 106270 }, { "epoch": 0.40452791120787435, "grad_norm": 0.12584885954856873, "learning_rate": 0.0005, "loss": 2.1242, "step": 106280 }, { "epoch": 0.40456597367599706, "grad_norm": 0.13881009817123413, "learning_rate": 0.0005, "loss": 2.1164, "step": 106290 }, { "epoch": 0.4046040361441197, "grad_norm": 0.14016923308372498, "learning_rate": 0.0005, "loss": 2.1142, "step": 106300 }, { "epoch": 0.40464209861224243, "grad_norm": 0.14057432115077972, "learning_rate": 0.0005, "loss": 2.1261, "step": 106310 }, { "epoch": 0.4046801610803651, "grad_norm": 0.13729490339756012, "learning_rate": 0.0005, "loss": 2.1204, "step": 106320 }, { "epoch": 0.4047182235484878, "grad_norm": 0.14463871717453003, "learning_rate": 0.0005, "loss": 2.1025, "step": 106330 }, { "epoch": 0.40475628601661046, "grad_norm": 0.14529265463352203, "learning_rate": 0.0005, "loss": 2.1276, "step": 106340 }, { "epoch": 0.40479434848473317, "grad_norm": 0.1335321068763733, "learning_rate": 0.0005, "loss": 2.1127, "step": 106350 }, { "epoch": 0.4048324109528558, "grad_norm": 0.1342439204454422, "learning_rate": 0.0005, "loss": 2.1129, "step": 106360 }, { "epoch": 0.4048704734209785, "grad_norm": 0.12198767811059952, "learning_rate": 0.0005, "loss": 2.1225, "step": 106370 }, { "epoch": 0.4049085358891012, "grad_norm": 0.13221341371536255, "learning_rate": 0.0005, "loss": 2.1001, "step": 106380 }, { "epoch": 0.40494659835722385, "grad_norm": 0.14494569599628448, "learning_rate": 0.0005, "loss": 2.1245, "step": 106390 }, { "epoch": 0.40498466082534657, "grad_norm": 0.12156090885400772, "learning_rate": 0.0005, "loss": 2.1297, "step": 106400 }, { "epoch": 0.4050227232934692, "grad_norm": 0.13109037280082703, "learning_rate": 0.0005, "loss": 2.1223, "step": 106410 }, { "epoch": 0.40506078576159193, "grad_norm": 0.13591161370277405, "learning_rate": 0.0005, "loss": 2.1191, "step": 106420 }, { "epoch": 0.4050988482297146, "grad_norm": 0.13301120698451996, "learning_rate": 0.0005, "loss": 2.1334, "step": 106430 }, { "epoch": 0.4051369106978373, "grad_norm": 0.12221255898475647, "learning_rate": 0.0005, "loss": 2.1254, "step": 106440 }, { "epoch": 0.40517497316595996, "grad_norm": 0.12087249010801315, "learning_rate": 0.0005, "loss": 2.1342, "step": 106450 }, { "epoch": 0.4052130356340827, "grad_norm": 0.1255703717470169, "learning_rate": 0.0005, "loss": 2.1126, "step": 106460 }, { "epoch": 0.40525109810220533, "grad_norm": 0.12779287993907928, "learning_rate": 0.0005, "loss": 2.1342, "step": 106470 }, { "epoch": 0.40528916057032804, "grad_norm": 0.12255334854125977, "learning_rate": 0.0005, "loss": 2.1177, "step": 106480 }, { "epoch": 0.4053272230384507, "grad_norm": 0.1284879446029663, "learning_rate": 0.0005, "loss": 2.1411, "step": 106490 }, { "epoch": 0.4053652855065734, "grad_norm": 0.1315813958644867, "learning_rate": 0.0005, "loss": 2.123, "step": 106500 }, { "epoch": 0.40540334797469607, "grad_norm": 0.13096173107624054, "learning_rate": 0.0005, "loss": 2.1299, "step": 106510 }, { "epoch": 0.40544141044281873, "grad_norm": 0.1382998824119568, "learning_rate": 0.0005, "loss": 2.1055, "step": 106520 }, { "epoch": 0.40547947291094144, "grad_norm": 0.12591317296028137, "learning_rate": 0.0005, "loss": 2.1191, "step": 106530 }, { "epoch": 0.4055175353790641, "grad_norm": 0.13930954039096832, "learning_rate": 0.0005, "loss": 2.1164, "step": 106540 }, { "epoch": 0.4055555978471868, "grad_norm": 0.11556414514780045, "learning_rate": 0.0005, "loss": 2.1239, "step": 106550 }, { "epoch": 0.40559366031530947, "grad_norm": 0.12952682375907898, "learning_rate": 0.0005, "loss": 2.1074, "step": 106560 }, { "epoch": 0.4056317227834322, "grad_norm": 0.14504283666610718, "learning_rate": 0.0005, "loss": 2.1231, "step": 106570 }, { "epoch": 0.40566978525155484, "grad_norm": 0.13777132332324982, "learning_rate": 0.0005, "loss": 2.123, "step": 106580 }, { "epoch": 0.40570784771967755, "grad_norm": 0.12666688859462738, "learning_rate": 0.0005, "loss": 2.1254, "step": 106590 }, { "epoch": 0.4057459101878002, "grad_norm": 0.12245593219995499, "learning_rate": 0.0005, "loss": 2.1006, "step": 106600 }, { "epoch": 0.4057839726559229, "grad_norm": 0.14578312635421753, "learning_rate": 0.0005, "loss": 2.1167, "step": 106610 }, { "epoch": 0.4058220351240456, "grad_norm": 0.1222921684384346, "learning_rate": 0.0005, "loss": 2.117, "step": 106620 }, { "epoch": 0.4058600975921683, "grad_norm": 0.12834157049655914, "learning_rate": 0.0005, "loss": 2.1261, "step": 106630 }, { "epoch": 0.40589816006029095, "grad_norm": 0.12874889373779297, "learning_rate": 0.0005, "loss": 2.1028, "step": 106640 }, { "epoch": 0.40593622252841366, "grad_norm": 0.12798567116260529, "learning_rate": 0.0005, "loss": 2.1346, "step": 106650 }, { "epoch": 0.4059742849965363, "grad_norm": 0.1158149316906929, "learning_rate": 0.0005, "loss": 2.1158, "step": 106660 }, { "epoch": 0.406012347464659, "grad_norm": 0.13018986582756042, "learning_rate": 0.0005, "loss": 2.1149, "step": 106670 }, { "epoch": 0.4060504099327817, "grad_norm": 0.11808565258979797, "learning_rate": 0.0005, "loss": 2.1219, "step": 106680 }, { "epoch": 0.40608847240090434, "grad_norm": 0.12245102971792221, "learning_rate": 0.0005, "loss": 2.1257, "step": 106690 }, { "epoch": 0.40612653486902706, "grad_norm": 0.11302248388528824, "learning_rate": 0.0005, "loss": 2.1122, "step": 106700 }, { "epoch": 0.4061645973371497, "grad_norm": 0.13028843700885773, "learning_rate": 0.0005, "loss": 2.1329, "step": 106710 }, { "epoch": 0.4062026598052724, "grad_norm": 0.1179802417755127, "learning_rate": 0.0005, "loss": 2.1222, "step": 106720 }, { "epoch": 0.4062407222733951, "grad_norm": 0.13540194928646088, "learning_rate": 0.0005, "loss": 2.1263, "step": 106730 }, { "epoch": 0.4062787847415178, "grad_norm": 0.12822499871253967, "learning_rate": 0.0005, "loss": 2.1132, "step": 106740 }, { "epoch": 0.40631684720964045, "grad_norm": 0.11414218693971634, "learning_rate": 0.0005, "loss": 2.1244, "step": 106750 }, { "epoch": 0.40635490967776317, "grad_norm": 0.13179926574230194, "learning_rate": 0.0005, "loss": 2.1227, "step": 106760 }, { "epoch": 0.4063929721458858, "grad_norm": 0.12584665417671204, "learning_rate": 0.0005, "loss": 2.1147, "step": 106770 }, { "epoch": 0.40643103461400854, "grad_norm": 0.1283552050590515, "learning_rate": 0.0005, "loss": 2.1337, "step": 106780 }, { "epoch": 0.4064690970821312, "grad_norm": 0.11601763218641281, "learning_rate": 0.0005, "loss": 2.1129, "step": 106790 }, { "epoch": 0.40650715955025385, "grad_norm": 0.1216268315911293, "learning_rate": 0.0005, "loss": 2.1133, "step": 106800 }, { "epoch": 0.40654522201837656, "grad_norm": 0.12042805552482605, "learning_rate": 0.0005, "loss": 2.1179, "step": 106810 }, { "epoch": 0.4065832844864992, "grad_norm": 0.13717348873615265, "learning_rate": 0.0005, "loss": 2.1049, "step": 106820 }, { "epoch": 0.40662134695462193, "grad_norm": 0.13994011282920837, "learning_rate": 0.0005, "loss": 2.1144, "step": 106830 }, { "epoch": 0.4066594094227446, "grad_norm": 0.13324889540672302, "learning_rate": 0.0005, "loss": 2.131, "step": 106840 }, { "epoch": 0.4066974718908673, "grad_norm": 0.1207243949174881, "learning_rate": 0.0005, "loss": 2.122, "step": 106850 }, { "epoch": 0.40673553435898996, "grad_norm": 0.12666364014148712, "learning_rate": 0.0005, "loss": 2.1287, "step": 106860 }, { "epoch": 0.40677359682711267, "grad_norm": 0.1227511316537857, "learning_rate": 0.0005, "loss": 2.1144, "step": 106870 }, { "epoch": 0.40681165929523533, "grad_norm": 0.13686570525169373, "learning_rate": 0.0005, "loss": 2.0984, "step": 106880 }, { "epoch": 0.40684972176335804, "grad_norm": 0.12842904031276703, "learning_rate": 0.0005, "loss": 2.1277, "step": 106890 }, { "epoch": 0.4068877842314807, "grad_norm": 0.12260560691356659, "learning_rate": 0.0005, "loss": 2.1118, "step": 106900 }, { "epoch": 0.4069258466996034, "grad_norm": 0.12146230787038803, "learning_rate": 0.0005, "loss": 2.1234, "step": 106910 }, { "epoch": 0.40696390916772607, "grad_norm": 0.1416623294353485, "learning_rate": 0.0005, "loss": 2.1231, "step": 106920 }, { "epoch": 0.4070019716358488, "grad_norm": 0.14083746075630188, "learning_rate": 0.0005, "loss": 2.1199, "step": 106930 }, { "epoch": 0.40704003410397144, "grad_norm": 0.13980624079704285, "learning_rate": 0.0005, "loss": 2.1258, "step": 106940 }, { "epoch": 0.4070780965720941, "grad_norm": 0.12137874215841293, "learning_rate": 0.0005, "loss": 2.1091, "step": 106950 }, { "epoch": 0.4071161590402168, "grad_norm": 0.12216074019670486, "learning_rate": 0.0005, "loss": 2.1228, "step": 106960 }, { "epoch": 0.40715422150833946, "grad_norm": 0.11431451141834259, "learning_rate": 0.0005, "loss": 2.1232, "step": 106970 }, { "epoch": 0.4071922839764622, "grad_norm": 0.12706711888313293, "learning_rate": 0.0005, "loss": 2.1219, "step": 106980 }, { "epoch": 0.40723034644458483, "grad_norm": 0.12637631595134735, "learning_rate": 0.0005, "loss": 2.1082, "step": 106990 }, { "epoch": 0.40726840891270755, "grad_norm": 0.12496950477361679, "learning_rate": 0.0005, "loss": 2.1088, "step": 107000 }, { "epoch": 0.4073064713808302, "grad_norm": 0.12362261861562729, "learning_rate": 0.0005, "loss": 2.1062, "step": 107010 }, { "epoch": 0.4073445338489529, "grad_norm": 0.13388338685035706, "learning_rate": 0.0005, "loss": 2.127, "step": 107020 }, { "epoch": 0.4073825963170756, "grad_norm": 0.12871481478214264, "learning_rate": 0.0005, "loss": 2.1262, "step": 107030 }, { "epoch": 0.4074206587851983, "grad_norm": 0.13223305344581604, "learning_rate": 0.0005, "loss": 2.1177, "step": 107040 }, { "epoch": 0.40745872125332094, "grad_norm": 0.12447560578584671, "learning_rate": 0.0005, "loss": 2.1067, "step": 107050 }, { "epoch": 0.40749678372144366, "grad_norm": 0.13165107369422913, "learning_rate": 0.0005, "loss": 2.1045, "step": 107060 }, { "epoch": 0.4075348461895663, "grad_norm": 0.12830142676830292, "learning_rate": 0.0005, "loss": 2.1293, "step": 107070 }, { "epoch": 0.407572908657689, "grad_norm": 0.11964263767004013, "learning_rate": 0.0005, "loss": 2.1089, "step": 107080 }, { "epoch": 0.4076109711258117, "grad_norm": 0.11753566563129425, "learning_rate": 0.0005, "loss": 2.1146, "step": 107090 }, { "epoch": 0.40764903359393434, "grad_norm": 0.12723736464977264, "learning_rate": 0.0005, "loss": 2.1129, "step": 107100 }, { "epoch": 0.40768709606205705, "grad_norm": 0.11674083769321442, "learning_rate": 0.0005, "loss": 2.1285, "step": 107110 }, { "epoch": 0.4077251585301797, "grad_norm": 0.12733396887779236, "learning_rate": 0.0005, "loss": 2.1281, "step": 107120 }, { "epoch": 0.4077632209983024, "grad_norm": 0.13298550248146057, "learning_rate": 0.0005, "loss": 2.1241, "step": 107130 }, { "epoch": 0.4078012834664251, "grad_norm": 0.11968598514795303, "learning_rate": 0.0005, "loss": 2.1215, "step": 107140 }, { "epoch": 0.4078393459345478, "grad_norm": 0.12861406803131104, "learning_rate": 0.0005, "loss": 2.1176, "step": 107150 }, { "epoch": 0.40787740840267045, "grad_norm": 0.1397750824689865, "learning_rate": 0.0005, "loss": 2.1247, "step": 107160 }, { "epoch": 0.40791547087079316, "grad_norm": 0.12403788417577744, "learning_rate": 0.0005, "loss": 2.1143, "step": 107170 }, { "epoch": 0.4079535333389158, "grad_norm": 0.14027903974056244, "learning_rate": 0.0005, "loss": 2.1199, "step": 107180 }, { "epoch": 0.40799159580703853, "grad_norm": 0.1178564578294754, "learning_rate": 0.0005, "loss": 2.1208, "step": 107190 }, { "epoch": 0.4080296582751612, "grad_norm": 0.12243127077817917, "learning_rate": 0.0005, "loss": 2.1294, "step": 107200 }, { "epoch": 0.4080677207432839, "grad_norm": 0.12157271802425385, "learning_rate": 0.0005, "loss": 2.1261, "step": 107210 }, { "epoch": 0.40810578321140656, "grad_norm": 0.12877163290977478, "learning_rate": 0.0005, "loss": 2.1238, "step": 107220 }, { "epoch": 0.4081438456795292, "grad_norm": 0.1127689853310585, "learning_rate": 0.0005, "loss": 2.1185, "step": 107230 }, { "epoch": 0.40818190814765193, "grad_norm": 0.1347082555294037, "learning_rate": 0.0005, "loss": 2.1294, "step": 107240 }, { "epoch": 0.4082199706157746, "grad_norm": 0.1271202713251114, "learning_rate": 0.0005, "loss": 2.1202, "step": 107250 }, { "epoch": 0.4082580330838973, "grad_norm": 0.12755969166755676, "learning_rate": 0.0005, "loss": 2.1271, "step": 107260 }, { "epoch": 0.40829609555201996, "grad_norm": 0.13087047636508942, "learning_rate": 0.0005, "loss": 2.125, "step": 107270 }, { "epoch": 0.40833415802014267, "grad_norm": 0.13195504248142242, "learning_rate": 0.0005, "loss": 2.1226, "step": 107280 }, { "epoch": 0.4083722204882653, "grad_norm": 0.12616842985153198, "learning_rate": 0.0005, "loss": 2.1093, "step": 107290 }, { "epoch": 0.40841028295638804, "grad_norm": 0.12336033582687378, "learning_rate": 0.0005, "loss": 2.1057, "step": 107300 }, { "epoch": 0.4084483454245107, "grad_norm": 0.13530464470386505, "learning_rate": 0.0005, "loss": 2.125, "step": 107310 }, { "epoch": 0.4084864078926334, "grad_norm": 0.12252623587846756, "learning_rate": 0.0005, "loss": 2.1138, "step": 107320 }, { "epoch": 0.40852447036075606, "grad_norm": 0.11957842856645584, "learning_rate": 0.0005, "loss": 2.1292, "step": 107330 }, { "epoch": 0.4085625328288788, "grad_norm": 0.12841619551181793, "learning_rate": 0.0005, "loss": 2.1076, "step": 107340 }, { "epoch": 0.40860059529700143, "grad_norm": 0.13767950236797333, "learning_rate": 0.0005, "loss": 2.1226, "step": 107350 }, { "epoch": 0.40863865776512415, "grad_norm": 0.13289736211299896, "learning_rate": 0.0005, "loss": 2.1296, "step": 107360 }, { "epoch": 0.4086767202332468, "grad_norm": 0.1358732134103775, "learning_rate": 0.0005, "loss": 2.1359, "step": 107370 }, { "epoch": 0.40871478270136946, "grad_norm": 0.127150297164917, "learning_rate": 0.0005, "loss": 2.1131, "step": 107380 }, { "epoch": 0.4087528451694922, "grad_norm": 0.12780392169952393, "learning_rate": 0.0005, "loss": 2.1377, "step": 107390 }, { "epoch": 0.40879090763761483, "grad_norm": 0.13576877117156982, "learning_rate": 0.0005, "loss": 2.1196, "step": 107400 }, { "epoch": 0.40882897010573754, "grad_norm": 0.12100337445735931, "learning_rate": 0.0005, "loss": 2.1224, "step": 107410 }, { "epoch": 0.4088670325738602, "grad_norm": 0.12995721399784088, "learning_rate": 0.0005, "loss": 2.1221, "step": 107420 }, { "epoch": 0.4089050950419829, "grad_norm": 0.12193107604980469, "learning_rate": 0.0005, "loss": 2.1224, "step": 107430 }, { "epoch": 0.40894315751010557, "grad_norm": 0.1287761926651001, "learning_rate": 0.0005, "loss": 2.141, "step": 107440 }, { "epoch": 0.4089812199782283, "grad_norm": 0.12959787249565125, "learning_rate": 0.0005, "loss": 2.0993, "step": 107450 }, { "epoch": 0.40901928244635094, "grad_norm": 0.11854854971170425, "learning_rate": 0.0005, "loss": 2.1268, "step": 107460 }, { "epoch": 0.40905734491447365, "grad_norm": 0.12470625340938568, "learning_rate": 0.0005, "loss": 2.1282, "step": 107470 }, { "epoch": 0.4090954073825963, "grad_norm": 0.11399077624082565, "learning_rate": 0.0005, "loss": 2.1132, "step": 107480 }, { "epoch": 0.409133469850719, "grad_norm": 0.12666715681552887, "learning_rate": 0.0005, "loss": 2.1131, "step": 107490 }, { "epoch": 0.4091715323188417, "grad_norm": 0.11980894953012466, "learning_rate": 0.0005, "loss": 2.1192, "step": 107500 }, { "epoch": 0.4092095947869644, "grad_norm": 0.11786861717700958, "learning_rate": 0.0005, "loss": 2.1197, "step": 107510 }, { "epoch": 0.40924765725508705, "grad_norm": 0.12244259566068649, "learning_rate": 0.0005, "loss": 2.1239, "step": 107520 }, { "epoch": 0.4092857197232097, "grad_norm": 0.1384306699037552, "learning_rate": 0.0005, "loss": 2.1126, "step": 107530 }, { "epoch": 0.4093237821913324, "grad_norm": 0.12418042868375778, "learning_rate": 0.0005, "loss": 2.1062, "step": 107540 }, { "epoch": 0.4093618446594551, "grad_norm": 0.1298133134841919, "learning_rate": 0.0005, "loss": 2.1203, "step": 107550 }, { "epoch": 0.4093999071275778, "grad_norm": 0.12193987518548965, "learning_rate": 0.0005, "loss": 2.12, "step": 107560 }, { "epoch": 0.40943796959570045, "grad_norm": 0.12395806610584259, "learning_rate": 0.0005, "loss": 2.1152, "step": 107570 }, { "epoch": 0.40947603206382316, "grad_norm": 0.13407832384109497, "learning_rate": 0.0005, "loss": 2.1054, "step": 107580 }, { "epoch": 0.4095140945319458, "grad_norm": 0.12088234722614288, "learning_rate": 0.0005, "loss": 2.1203, "step": 107590 }, { "epoch": 0.40955215700006853, "grad_norm": 0.12836715579032898, "learning_rate": 0.0005, "loss": 2.1121, "step": 107600 }, { "epoch": 0.4095902194681912, "grad_norm": 0.12157371640205383, "learning_rate": 0.0005, "loss": 2.1295, "step": 107610 }, { "epoch": 0.4096282819363139, "grad_norm": 0.12131499499082565, "learning_rate": 0.0005, "loss": 2.114, "step": 107620 }, { "epoch": 0.40966634440443656, "grad_norm": 0.1216978132724762, "learning_rate": 0.0005, "loss": 2.1026, "step": 107630 }, { "epoch": 0.40970440687255927, "grad_norm": 0.12099350988864899, "learning_rate": 0.0005, "loss": 2.1134, "step": 107640 }, { "epoch": 0.4097424693406819, "grad_norm": 0.12920355796813965, "learning_rate": 0.0005, "loss": 2.1109, "step": 107650 }, { "epoch": 0.40978053180880464, "grad_norm": 0.1273610144853592, "learning_rate": 0.0005, "loss": 2.1256, "step": 107660 }, { "epoch": 0.4098185942769273, "grad_norm": 0.13866552710533142, "learning_rate": 0.0005, "loss": 2.1283, "step": 107670 }, { "epoch": 0.40985665674504995, "grad_norm": 0.13295462727546692, "learning_rate": 0.0005, "loss": 2.1195, "step": 107680 }, { "epoch": 0.40989471921317266, "grad_norm": 0.12541206181049347, "learning_rate": 0.0005, "loss": 2.1325, "step": 107690 }, { "epoch": 0.4099327816812953, "grad_norm": 0.1251440793275833, "learning_rate": 0.0005, "loss": 2.1325, "step": 107700 }, { "epoch": 0.40997084414941803, "grad_norm": 0.1260979324579239, "learning_rate": 0.0005, "loss": 2.107, "step": 107710 }, { "epoch": 0.4100089066175407, "grad_norm": 0.11850418895483017, "learning_rate": 0.0005, "loss": 2.1125, "step": 107720 }, { "epoch": 0.4100469690856634, "grad_norm": 0.13400107622146606, "learning_rate": 0.0005, "loss": 2.1227, "step": 107730 }, { "epoch": 0.41008503155378606, "grad_norm": 0.12020386755466461, "learning_rate": 0.0005, "loss": 2.116, "step": 107740 }, { "epoch": 0.4101230940219088, "grad_norm": 0.12448807805776596, "learning_rate": 0.0005, "loss": 2.1245, "step": 107750 }, { "epoch": 0.41016115649003143, "grad_norm": 0.12401855736970901, "learning_rate": 0.0005, "loss": 2.1266, "step": 107760 }, { "epoch": 0.41019921895815414, "grad_norm": 0.12212575227022171, "learning_rate": 0.0005, "loss": 2.1184, "step": 107770 }, { "epoch": 0.4102372814262768, "grad_norm": 0.1332526057958603, "learning_rate": 0.0005, "loss": 2.133, "step": 107780 }, { "epoch": 0.4102753438943995, "grad_norm": 0.12568722665309906, "learning_rate": 0.0005, "loss": 2.1078, "step": 107790 }, { "epoch": 0.41031340636252217, "grad_norm": 0.1174527257680893, "learning_rate": 0.0005, "loss": 2.1119, "step": 107800 }, { "epoch": 0.4103514688306448, "grad_norm": 0.12806077301502228, "learning_rate": 0.0005, "loss": 2.127, "step": 107810 }, { "epoch": 0.41038953129876754, "grad_norm": 0.12083237618207932, "learning_rate": 0.0005, "loss": 2.1001, "step": 107820 }, { "epoch": 0.4104275937668902, "grad_norm": 0.12038543820381165, "learning_rate": 0.0005, "loss": 2.1317, "step": 107830 }, { "epoch": 0.4104656562350129, "grad_norm": 0.12364353239536285, "learning_rate": 0.0005, "loss": 2.1095, "step": 107840 }, { "epoch": 0.41050371870313557, "grad_norm": 0.1154148206114769, "learning_rate": 0.0005, "loss": 2.1094, "step": 107850 }, { "epoch": 0.4105417811712583, "grad_norm": 0.12116552889347076, "learning_rate": 0.0005, "loss": 2.1171, "step": 107860 }, { "epoch": 0.41057984363938094, "grad_norm": 0.1332802176475525, "learning_rate": 0.0005, "loss": 2.1313, "step": 107870 }, { "epoch": 0.41061790610750365, "grad_norm": 0.13961386680603027, "learning_rate": 0.0005, "loss": 2.1153, "step": 107880 }, { "epoch": 0.4106559685756263, "grad_norm": 0.1177176982164383, "learning_rate": 0.0005, "loss": 2.1229, "step": 107890 }, { "epoch": 0.410694031043749, "grad_norm": 0.1382824331521988, "learning_rate": 0.0005, "loss": 2.1143, "step": 107900 }, { "epoch": 0.4107320935118717, "grad_norm": 0.13816149532794952, "learning_rate": 0.0005, "loss": 2.1186, "step": 107910 }, { "epoch": 0.4107701559799944, "grad_norm": 0.13288699090480804, "learning_rate": 0.0005, "loss": 2.1243, "step": 107920 }, { "epoch": 0.41080821844811705, "grad_norm": 0.13014812767505646, "learning_rate": 0.0005, "loss": 2.1136, "step": 107930 }, { "epoch": 0.41084628091623976, "grad_norm": 0.12453297525644302, "learning_rate": 0.0005, "loss": 2.1216, "step": 107940 }, { "epoch": 0.4108843433843624, "grad_norm": 0.11405854672193527, "learning_rate": 0.0005, "loss": 2.1148, "step": 107950 }, { "epoch": 0.4109224058524851, "grad_norm": 0.12348777055740356, "learning_rate": 0.0005, "loss": 2.111, "step": 107960 }, { "epoch": 0.4109604683206078, "grad_norm": 0.1157868430018425, "learning_rate": 0.0005, "loss": 2.1321, "step": 107970 }, { "epoch": 0.41099853078873044, "grad_norm": 0.12794789671897888, "learning_rate": 0.0005, "loss": 2.1076, "step": 107980 }, { "epoch": 0.41103659325685316, "grad_norm": 0.12420628219842911, "learning_rate": 0.0005, "loss": 2.1077, "step": 107990 }, { "epoch": 0.4110746557249758, "grad_norm": 0.12100957334041595, "learning_rate": 0.0005, "loss": 2.1104, "step": 108000 }, { "epoch": 0.4111127181930985, "grad_norm": 0.1334034949541092, "learning_rate": 0.0005, "loss": 2.1271, "step": 108010 }, { "epoch": 0.4111507806612212, "grad_norm": 0.1218486949801445, "learning_rate": 0.0005, "loss": 2.1246, "step": 108020 }, { "epoch": 0.4111888431293439, "grad_norm": 0.1329689621925354, "learning_rate": 0.0005, "loss": 2.1119, "step": 108030 }, { "epoch": 0.41122690559746655, "grad_norm": 0.12003668397665024, "learning_rate": 0.0005, "loss": 2.1079, "step": 108040 }, { "epoch": 0.41126496806558926, "grad_norm": 0.13905754685401917, "learning_rate": 0.0005, "loss": 2.1367, "step": 108050 }, { "epoch": 0.4113030305337119, "grad_norm": 0.13376957178115845, "learning_rate": 0.0005, "loss": 2.1271, "step": 108060 }, { "epoch": 0.41134109300183463, "grad_norm": 0.13737185299396515, "learning_rate": 0.0005, "loss": 2.1183, "step": 108070 }, { "epoch": 0.4113791554699573, "grad_norm": 0.1212373897433281, "learning_rate": 0.0005, "loss": 2.11, "step": 108080 }, { "epoch": 0.41141721793808, "grad_norm": 0.14076654613018036, "learning_rate": 0.0005, "loss": 2.1042, "step": 108090 }, { "epoch": 0.41145528040620266, "grad_norm": 0.1278214007616043, "learning_rate": 0.0005, "loss": 2.1171, "step": 108100 }, { "epoch": 0.4114933428743253, "grad_norm": 0.14113424718379974, "learning_rate": 0.0005, "loss": 2.1396, "step": 108110 }, { "epoch": 0.41153140534244803, "grad_norm": 0.13218048214912415, "learning_rate": 0.0005, "loss": 2.1223, "step": 108120 }, { "epoch": 0.4115694678105707, "grad_norm": 0.11836156994104385, "learning_rate": 0.0005, "loss": 2.1076, "step": 108130 }, { "epoch": 0.4116075302786934, "grad_norm": 0.12116833031177521, "learning_rate": 0.0005, "loss": 2.123, "step": 108140 }, { "epoch": 0.41164559274681606, "grad_norm": 0.126220241189003, "learning_rate": 0.0005, "loss": 2.1223, "step": 108150 }, { "epoch": 0.41168365521493877, "grad_norm": 0.12636734545230865, "learning_rate": 0.0005, "loss": 2.1171, "step": 108160 }, { "epoch": 0.41172171768306143, "grad_norm": 0.1304181069135666, "learning_rate": 0.0005, "loss": 2.1193, "step": 108170 }, { "epoch": 0.41175978015118414, "grad_norm": 0.12628519535064697, "learning_rate": 0.0005, "loss": 2.1133, "step": 108180 }, { "epoch": 0.4117978426193068, "grad_norm": 0.12502655386924744, "learning_rate": 0.0005, "loss": 2.1238, "step": 108190 }, { "epoch": 0.4118359050874295, "grad_norm": 0.12146708369255066, "learning_rate": 0.0005, "loss": 2.127, "step": 108200 }, { "epoch": 0.41187396755555217, "grad_norm": 0.12809845805168152, "learning_rate": 0.0005, "loss": 2.0932, "step": 108210 }, { "epoch": 0.4119120300236749, "grad_norm": 0.15716975927352905, "learning_rate": 0.0005, "loss": 2.1155, "step": 108220 }, { "epoch": 0.41195009249179754, "grad_norm": 0.1256551891565323, "learning_rate": 0.0005, "loss": 2.1152, "step": 108230 }, { "epoch": 0.4119881549599202, "grad_norm": 0.11105377227067947, "learning_rate": 0.0005, "loss": 2.1202, "step": 108240 }, { "epoch": 0.4120262174280429, "grad_norm": 0.12480401247739792, "learning_rate": 0.0005, "loss": 2.1354, "step": 108250 }, { "epoch": 0.41206427989616556, "grad_norm": 0.13151103258132935, "learning_rate": 0.0005, "loss": 2.1128, "step": 108260 }, { "epoch": 0.4121023423642883, "grad_norm": 0.1231314092874527, "learning_rate": 0.0005, "loss": 2.1236, "step": 108270 }, { "epoch": 0.41214040483241093, "grad_norm": 0.12152253836393356, "learning_rate": 0.0005, "loss": 2.1108, "step": 108280 }, { "epoch": 0.41217846730053365, "grad_norm": 0.11900048702955246, "learning_rate": 0.0005, "loss": 2.1048, "step": 108290 }, { "epoch": 0.4122165297686563, "grad_norm": 0.13020843267440796, "learning_rate": 0.0005, "loss": 2.1289, "step": 108300 }, { "epoch": 0.412254592236779, "grad_norm": 0.1309482455253601, "learning_rate": 0.0005, "loss": 2.1299, "step": 108310 }, { "epoch": 0.4122926547049017, "grad_norm": 0.14295001327991486, "learning_rate": 0.0005, "loss": 2.1369, "step": 108320 }, { "epoch": 0.4123307171730244, "grad_norm": 0.12833385169506073, "learning_rate": 0.0005, "loss": 2.1158, "step": 108330 }, { "epoch": 0.41236877964114704, "grad_norm": 0.11909869313240051, "learning_rate": 0.0005, "loss": 2.1173, "step": 108340 }, { "epoch": 0.41240684210926976, "grad_norm": 0.12089741230010986, "learning_rate": 0.0005, "loss": 2.1092, "step": 108350 }, { "epoch": 0.4124449045773924, "grad_norm": 0.13093233108520508, "learning_rate": 0.0005, "loss": 2.1038, "step": 108360 }, { "epoch": 0.4124829670455151, "grad_norm": 0.11446915566921234, "learning_rate": 0.0005, "loss": 2.1284, "step": 108370 }, { "epoch": 0.4125210295136378, "grad_norm": 0.12707512080669403, "learning_rate": 0.0005, "loss": 2.1205, "step": 108380 }, { "epoch": 0.41255909198176044, "grad_norm": 0.13645118474960327, "learning_rate": 0.0005, "loss": 2.1413, "step": 108390 }, { "epoch": 0.41259715444988315, "grad_norm": 0.11449270695447922, "learning_rate": 0.0005, "loss": 2.1111, "step": 108400 }, { "epoch": 0.4126352169180058, "grad_norm": 0.1177472248673439, "learning_rate": 0.0005, "loss": 2.122, "step": 108410 }, { "epoch": 0.4126732793861285, "grad_norm": 0.1370553970336914, "learning_rate": 0.0005, "loss": 2.1177, "step": 108420 }, { "epoch": 0.4127113418542512, "grad_norm": 0.1213172897696495, "learning_rate": 0.0005, "loss": 2.1264, "step": 108430 }, { "epoch": 0.4127494043223739, "grad_norm": 0.12861493229866028, "learning_rate": 0.0005, "loss": 2.1055, "step": 108440 }, { "epoch": 0.41278746679049655, "grad_norm": 0.13049571216106415, "learning_rate": 0.0005, "loss": 2.1127, "step": 108450 }, { "epoch": 0.41282552925861926, "grad_norm": 0.13287471234798431, "learning_rate": 0.0005, "loss": 2.1284, "step": 108460 }, { "epoch": 0.4128635917267419, "grad_norm": 0.12993772327899933, "learning_rate": 0.0005, "loss": 2.11, "step": 108470 }, { "epoch": 0.41290165419486463, "grad_norm": 0.12364847958087921, "learning_rate": 0.0005, "loss": 2.1187, "step": 108480 }, { "epoch": 0.4129397166629873, "grad_norm": 0.1326444149017334, "learning_rate": 0.0005, "loss": 2.1119, "step": 108490 }, { "epoch": 0.41297777913111, "grad_norm": 0.11822287738323212, "learning_rate": 0.0005, "loss": 2.1144, "step": 108500 }, { "epoch": 0.41301584159923266, "grad_norm": 0.12279457598924637, "learning_rate": 0.0005, "loss": 2.1049, "step": 108510 }, { "epoch": 0.41305390406735537, "grad_norm": 0.1423376500606537, "learning_rate": 0.0005, "loss": 2.1098, "step": 108520 }, { "epoch": 0.41309196653547803, "grad_norm": 0.13920536637306213, "learning_rate": 0.0005, "loss": 2.125, "step": 108530 }, { "epoch": 0.4131300290036007, "grad_norm": 0.11479239165782928, "learning_rate": 0.0005, "loss": 2.1187, "step": 108540 }, { "epoch": 0.4131680914717234, "grad_norm": 0.12645624577999115, "learning_rate": 0.0005, "loss": 2.1125, "step": 108550 }, { "epoch": 0.41320615393984605, "grad_norm": 0.13007549941539764, "learning_rate": 0.0005, "loss": 2.1115, "step": 108560 }, { "epoch": 0.41324421640796877, "grad_norm": 0.12195257842540741, "learning_rate": 0.0005, "loss": 2.1199, "step": 108570 }, { "epoch": 0.4132822788760914, "grad_norm": 0.19788667559623718, "learning_rate": 0.0005, "loss": 2.1311, "step": 108580 }, { "epoch": 0.41332034134421414, "grad_norm": 0.13742467761039734, "learning_rate": 0.0005, "loss": 2.1343, "step": 108590 }, { "epoch": 0.4133584038123368, "grad_norm": 0.13775934278964996, "learning_rate": 0.0005, "loss": 2.1101, "step": 108600 }, { "epoch": 0.4133964662804595, "grad_norm": 0.11977819353342056, "learning_rate": 0.0005, "loss": 2.1077, "step": 108610 }, { "epoch": 0.41343452874858216, "grad_norm": 0.12793083488941193, "learning_rate": 0.0005, "loss": 2.1075, "step": 108620 }, { "epoch": 0.4134725912167049, "grad_norm": 0.14065957069396973, "learning_rate": 0.0005, "loss": 2.1184, "step": 108630 }, { "epoch": 0.41351065368482753, "grad_norm": 0.13145712018013, "learning_rate": 0.0005, "loss": 2.1203, "step": 108640 }, { "epoch": 0.41354871615295025, "grad_norm": 0.11875452101230621, "learning_rate": 0.0005, "loss": 2.1108, "step": 108650 }, { "epoch": 0.4135867786210729, "grad_norm": 0.11871971189975739, "learning_rate": 0.0005, "loss": 2.1215, "step": 108660 }, { "epoch": 0.41362484108919556, "grad_norm": 0.13564030826091766, "learning_rate": 0.0005, "loss": 2.1109, "step": 108670 }, { "epoch": 0.4136629035573183, "grad_norm": 0.12264464795589447, "learning_rate": 0.0005, "loss": 2.1062, "step": 108680 }, { "epoch": 0.41370096602544093, "grad_norm": 0.11105269193649292, "learning_rate": 0.0005, "loss": 2.1193, "step": 108690 }, { "epoch": 0.41373902849356364, "grad_norm": 0.12471351027488708, "learning_rate": 0.0005, "loss": 2.1295, "step": 108700 }, { "epoch": 0.4137770909616863, "grad_norm": 0.14022184908390045, "learning_rate": 0.0005, "loss": 2.1227, "step": 108710 }, { "epoch": 0.413815153429809, "grad_norm": 0.13109190762043, "learning_rate": 0.0005, "loss": 2.1186, "step": 108720 }, { "epoch": 0.41385321589793167, "grad_norm": 0.11493074893951416, "learning_rate": 0.0005, "loss": 2.1056, "step": 108730 }, { "epoch": 0.4138912783660544, "grad_norm": 0.14010822772979736, "learning_rate": 0.0005, "loss": 2.1132, "step": 108740 }, { "epoch": 0.41392934083417704, "grad_norm": 0.12571784853935242, "learning_rate": 0.0005, "loss": 2.1163, "step": 108750 }, { "epoch": 0.41396740330229975, "grad_norm": 0.11984385550022125, "learning_rate": 0.0005, "loss": 2.1159, "step": 108760 }, { "epoch": 0.4140054657704224, "grad_norm": 0.1423097550868988, "learning_rate": 0.0005, "loss": 2.1139, "step": 108770 }, { "epoch": 0.4140435282385451, "grad_norm": 0.14567236602306366, "learning_rate": 0.0005, "loss": 2.1187, "step": 108780 }, { "epoch": 0.4140815907066678, "grad_norm": 0.18437223136425018, "learning_rate": 0.0005, "loss": 2.1065, "step": 108790 }, { "epoch": 0.4141196531747905, "grad_norm": 0.12724503874778748, "learning_rate": 0.0005, "loss": 2.111, "step": 108800 }, { "epoch": 0.41415771564291315, "grad_norm": 0.13286955654621124, "learning_rate": 0.0005, "loss": 2.1326, "step": 108810 }, { "epoch": 0.4141957781110358, "grad_norm": 0.1133560836315155, "learning_rate": 0.0005, "loss": 2.1196, "step": 108820 }, { "epoch": 0.4142338405791585, "grad_norm": 0.11960550397634506, "learning_rate": 0.0005, "loss": 2.1104, "step": 108830 }, { "epoch": 0.4142719030472812, "grad_norm": 0.13191118836402893, "learning_rate": 0.0005, "loss": 2.1324, "step": 108840 }, { "epoch": 0.4143099655154039, "grad_norm": 0.12312343716621399, "learning_rate": 0.0005, "loss": 2.1069, "step": 108850 }, { "epoch": 0.41434802798352655, "grad_norm": 0.1258789449930191, "learning_rate": 0.0005, "loss": 2.1221, "step": 108860 }, { "epoch": 0.41438609045164926, "grad_norm": 0.12822465598583221, "learning_rate": 0.0005, "loss": 2.1254, "step": 108870 }, { "epoch": 0.4144241529197719, "grad_norm": 0.1322626918554306, "learning_rate": 0.0005, "loss": 2.1133, "step": 108880 }, { "epoch": 0.41446221538789463, "grad_norm": 0.1310059130191803, "learning_rate": 0.0005, "loss": 2.1157, "step": 108890 }, { "epoch": 0.4145002778560173, "grad_norm": 0.12444756925106049, "learning_rate": 0.0005, "loss": 2.1127, "step": 108900 }, { "epoch": 0.41453834032414, "grad_norm": 0.191010981798172, "learning_rate": 0.0005, "loss": 2.1288, "step": 108910 }, { "epoch": 0.41457640279226265, "grad_norm": 0.13076089322566986, "learning_rate": 0.0005, "loss": 2.1176, "step": 108920 }, { "epoch": 0.41461446526038537, "grad_norm": 0.129410058259964, "learning_rate": 0.0005, "loss": 2.1263, "step": 108930 }, { "epoch": 0.414652527728508, "grad_norm": 0.12310182303190231, "learning_rate": 0.0005, "loss": 2.1103, "step": 108940 }, { "epoch": 0.41469059019663074, "grad_norm": 0.11926144361495972, "learning_rate": 0.0005, "loss": 2.1391, "step": 108950 }, { "epoch": 0.4147286526647534, "grad_norm": 0.12537449598312378, "learning_rate": 0.0005, "loss": 2.1012, "step": 108960 }, { "epoch": 0.41476671513287605, "grad_norm": 0.13044698536396027, "learning_rate": 0.0005, "loss": 2.1297, "step": 108970 }, { "epoch": 0.41480477760099876, "grad_norm": 0.1533544659614563, "learning_rate": 0.0005, "loss": 2.1293, "step": 108980 }, { "epoch": 0.4148428400691214, "grad_norm": 0.13309314846992493, "learning_rate": 0.0005, "loss": 2.1322, "step": 108990 }, { "epoch": 0.41488090253724413, "grad_norm": 0.11752888560295105, "learning_rate": 0.0005, "loss": 2.1136, "step": 109000 }, { "epoch": 0.4149189650053668, "grad_norm": 0.12122669070959091, "learning_rate": 0.0005, "loss": 2.1163, "step": 109010 }, { "epoch": 0.4149570274734895, "grad_norm": 0.12491890043020248, "learning_rate": 0.0005, "loss": 2.1242, "step": 109020 }, { "epoch": 0.41499508994161216, "grad_norm": 0.13501355051994324, "learning_rate": 0.0005, "loss": 2.1298, "step": 109030 }, { "epoch": 0.4150331524097349, "grad_norm": 0.1353226602077484, "learning_rate": 0.0005, "loss": 2.1021, "step": 109040 }, { "epoch": 0.41507121487785753, "grad_norm": 0.1288616806268692, "learning_rate": 0.0005, "loss": 2.1199, "step": 109050 }, { "epoch": 0.41510927734598024, "grad_norm": 0.11121494323015213, "learning_rate": 0.0005, "loss": 2.1336, "step": 109060 }, { "epoch": 0.4151473398141029, "grad_norm": 0.13124603033065796, "learning_rate": 0.0005, "loss": 2.1163, "step": 109070 }, { "epoch": 0.4151854022822256, "grad_norm": 0.14034585654735565, "learning_rate": 0.0005, "loss": 2.0972, "step": 109080 }, { "epoch": 0.41522346475034827, "grad_norm": 0.13125751912593842, "learning_rate": 0.0005, "loss": 2.1059, "step": 109090 }, { "epoch": 0.4152615272184709, "grad_norm": 0.13639673590660095, "learning_rate": 0.0005, "loss": 2.1251, "step": 109100 }, { "epoch": 0.41529958968659364, "grad_norm": 0.14386612176895142, "learning_rate": 0.0005, "loss": 2.1096, "step": 109110 }, { "epoch": 0.4153376521547163, "grad_norm": 0.11938704550266266, "learning_rate": 0.0005, "loss": 2.1118, "step": 109120 }, { "epoch": 0.415375714622839, "grad_norm": 0.1277218759059906, "learning_rate": 0.0005, "loss": 2.1162, "step": 109130 }, { "epoch": 0.41541377709096167, "grad_norm": 0.13642755150794983, "learning_rate": 0.0005, "loss": 2.1034, "step": 109140 }, { "epoch": 0.4154518395590844, "grad_norm": 0.11509495973587036, "learning_rate": 0.0005, "loss": 2.1352, "step": 109150 }, { "epoch": 0.41548990202720704, "grad_norm": 0.12859298288822174, "learning_rate": 0.0005, "loss": 2.1338, "step": 109160 }, { "epoch": 0.41552796449532975, "grad_norm": 0.11672429740428925, "learning_rate": 0.0005, "loss": 2.1201, "step": 109170 }, { "epoch": 0.4155660269634524, "grad_norm": 0.12255655974149704, "learning_rate": 0.0005, "loss": 2.1225, "step": 109180 }, { "epoch": 0.4156040894315751, "grad_norm": 0.12491326034069061, "learning_rate": 0.0005, "loss": 2.132, "step": 109190 }, { "epoch": 0.4156421518996978, "grad_norm": 0.1362716257572174, "learning_rate": 0.0005, "loss": 2.1273, "step": 109200 }, { "epoch": 0.4156802143678205, "grad_norm": 0.13377702236175537, "learning_rate": 0.0005, "loss": 2.1219, "step": 109210 }, { "epoch": 0.41571827683594315, "grad_norm": 0.13841603696346283, "learning_rate": 0.0005, "loss": 2.1163, "step": 109220 }, { "epoch": 0.41575633930406586, "grad_norm": 0.139837846159935, "learning_rate": 0.0005, "loss": 2.116, "step": 109230 }, { "epoch": 0.4157944017721885, "grad_norm": 0.12048280239105225, "learning_rate": 0.0005, "loss": 2.1144, "step": 109240 }, { "epoch": 0.4158324642403112, "grad_norm": 0.12809380888938904, "learning_rate": 0.0005, "loss": 2.1141, "step": 109250 }, { "epoch": 0.4158705267084339, "grad_norm": 0.13459837436676025, "learning_rate": 0.0005, "loss": 2.1115, "step": 109260 }, { "epoch": 0.41590858917655654, "grad_norm": 0.14781025052070618, "learning_rate": 0.0005, "loss": 2.1238, "step": 109270 }, { "epoch": 0.41594665164467925, "grad_norm": 0.12983685731887817, "learning_rate": 0.0005, "loss": 2.1344, "step": 109280 }, { "epoch": 0.4159847141128019, "grad_norm": 0.12982770800590515, "learning_rate": 0.0005, "loss": 2.106, "step": 109290 }, { "epoch": 0.4160227765809246, "grad_norm": 0.12221802771091461, "learning_rate": 0.0005, "loss": 2.1274, "step": 109300 }, { "epoch": 0.4160608390490473, "grad_norm": 0.1298155039548874, "learning_rate": 0.0005, "loss": 2.1276, "step": 109310 }, { "epoch": 0.41609890151717, "grad_norm": 0.12300161272287369, "learning_rate": 0.0005, "loss": 2.1289, "step": 109320 }, { "epoch": 0.41613696398529265, "grad_norm": 0.1435967981815338, "learning_rate": 0.0005, "loss": 2.1092, "step": 109330 }, { "epoch": 0.41617502645341536, "grad_norm": 0.1296965479850769, "learning_rate": 0.0005, "loss": 2.1347, "step": 109340 }, { "epoch": 0.416213088921538, "grad_norm": 0.1250772625207901, "learning_rate": 0.0005, "loss": 2.1216, "step": 109350 }, { "epoch": 0.41625115138966073, "grad_norm": 0.12681035697460175, "learning_rate": 0.0005, "loss": 2.1153, "step": 109360 }, { "epoch": 0.4162892138577834, "grad_norm": 0.12260159105062485, "learning_rate": 0.0005, "loss": 2.1162, "step": 109370 }, { "epoch": 0.4163272763259061, "grad_norm": 0.12412714958190918, "learning_rate": 0.0005, "loss": 2.1098, "step": 109380 }, { "epoch": 0.41636533879402876, "grad_norm": 0.13800972700119019, "learning_rate": 0.0005, "loss": 2.1146, "step": 109390 }, { "epoch": 0.4164034012621514, "grad_norm": 0.14430291950702667, "learning_rate": 0.0005, "loss": 2.1195, "step": 109400 }, { "epoch": 0.41644146373027413, "grad_norm": 0.1282130479812622, "learning_rate": 0.0005, "loss": 2.108, "step": 109410 }, { "epoch": 0.4164795261983968, "grad_norm": 0.12316697835922241, "learning_rate": 0.0005, "loss": 2.1168, "step": 109420 }, { "epoch": 0.4165175886665195, "grad_norm": 0.1190483421087265, "learning_rate": 0.0005, "loss": 2.1216, "step": 109430 }, { "epoch": 0.41655565113464216, "grad_norm": 0.1316487193107605, "learning_rate": 0.0005, "loss": 2.1331, "step": 109440 }, { "epoch": 0.41659371360276487, "grad_norm": 0.1279810070991516, "learning_rate": 0.0005, "loss": 2.1262, "step": 109450 }, { "epoch": 0.4166317760708875, "grad_norm": 0.12146064639091492, "learning_rate": 0.0005, "loss": 2.1118, "step": 109460 }, { "epoch": 0.41666983853901024, "grad_norm": 0.12453345954418182, "learning_rate": 0.0005, "loss": 2.1243, "step": 109470 }, { "epoch": 0.4167079010071329, "grad_norm": 0.12459386140108109, "learning_rate": 0.0005, "loss": 2.1227, "step": 109480 }, { "epoch": 0.4167459634752556, "grad_norm": 0.12696263194084167, "learning_rate": 0.0005, "loss": 2.1291, "step": 109490 }, { "epoch": 0.41678402594337827, "grad_norm": 0.13806460797786713, "learning_rate": 0.0005, "loss": 2.115, "step": 109500 }, { "epoch": 0.416822088411501, "grad_norm": 0.15819978713989258, "learning_rate": 0.0005, "loss": 2.1044, "step": 109510 }, { "epoch": 0.41686015087962364, "grad_norm": 0.14383956789970398, "learning_rate": 0.0005, "loss": 2.1245, "step": 109520 }, { "epoch": 0.4168982133477463, "grad_norm": 0.1303814798593521, "learning_rate": 0.0005, "loss": 2.1288, "step": 109530 }, { "epoch": 0.416936275815869, "grad_norm": 0.12524403631687164, "learning_rate": 0.0005, "loss": 2.1192, "step": 109540 }, { "epoch": 0.41697433828399166, "grad_norm": 0.13339385390281677, "learning_rate": 0.0005, "loss": 2.1148, "step": 109550 }, { "epoch": 0.4170124007521144, "grad_norm": 0.13040074706077576, "learning_rate": 0.0005, "loss": 2.1126, "step": 109560 }, { "epoch": 0.41705046322023703, "grad_norm": 0.14253298938274384, "learning_rate": 0.0005, "loss": 2.1382, "step": 109570 }, { "epoch": 0.41708852568835975, "grad_norm": 0.13015128672122955, "learning_rate": 0.0005, "loss": 2.1408, "step": 109580 }, { "epoch": 0.4171265881564824, "grad_norm": 0.11817413568496704, "learning_rate": 0.0005, "loss": 2.1167, "step": 109590 }, { "epoch": 0.4171646506246051, "grad_norm": 0.1301771104335785, "learning_rate": 0.0005, "loss": 2.1219, "step": 109600 }, { "epoch": 0.4172027130927278, "grad_norm": 0.1292882263660431, "learning_rate": 0.0005, "loss": 2.1095, "step": 109610 }, { "epoch": 0.4172407755608505, "grad_norm": 0.1226472333073616, "learning_rate": 0.0005, "loss": 2.1234, "step": 109620 }, { "epoch": 0.41727883802897314, "grad_norm": 0.12784558534622192, "learning_rate": 0.0005, "loss": 2.1111, "step": 109630 }, { "epoch": 0.41731690049709586, "grad_norm": 0.12261635810136795, "learning_rate": 0.0005, "loss": 2.1179, "step": 109640 }, { "epoch": 0.4173549629652185, "grad_norm": 0.13003894686698914, "learning_rate": 0.0005, "loss": 2.1141, "step": 109650 }, { "epoch": 0.4173930254333412, "grad_norm": 0.13531725108623505, "learning_rate": 0.0005, "loss": 2.1332, "step": 109660 }, { "epoch": 0.4174310879014639, "grad_norm": 0.1280001699924469, "learning_rate": 0.0005, "loss": 2.1139, "step": 109670 }, { "epoch": 0.41746915036958654, "grad_norm": 0.17585636675357819, "learning_rate": 0.0005, "loss": 2.1308, "step": 109680 }, { "epoch": 0.41750721283770925, "grad_norm": 0.1245546042919159, "learning_rate": 0.0005, "loss": 2.1134, "step": 109690 }, { "epoch": 0.4175452753058319, "grad_norm": 0.14035065472126007, "learning_rate": 0.0005, "loss": 2.1172, "step": 109700 }, { "epoch": 0.4175833377739546, "grad_norm": 0.127082958817482, "learning_rate": 0.0005, "loss": 2.123, "step": 109710 }, { "epoch": 0.4176214002420773, "grad_norm": 0.12953048944473267, "learning_rate": 0.0005, "loss": 2.1112, "step": 109720 }, { "epoch": 0.4176594627102, "grad_norm": 0.13001421093940735, "learning_rate": 0.0005, "loss": 2.1062, "step": 109730 }, { "epoch": 0.41769752517832265, "grad_norm": 0.130632683634758, "learning_rate": 0.0005, "loss": 2.1311, "step": 109740 }, { "epoch": 0.41773558764644536, "grad_norm": 0.13448363542556763, "learning_rate": 0.0005, "loss": 2.1171, "step": 109750 }, { "epoch": 0.417773650114568, "grad_norm": 0.12333223223686218, "learning_rate": 0.0005, "loss": 2.1029, "step": 109760 }, { "epoch": 0.41781171258269073, "grad_norm": 0.11951614171266556, "learning_rate": 0.0005, "loss": 2.115, "step": 109770 }, { "epoch": 0.4178497750508134, "grad_norm": 0.14794956147670746, "learning_rate": 0.0005, "loss": 2.1094, "step": 109780 }, { "epoch": 0.4178878375189361, "grad_norm": 0.13375519216060638, "learning_rate": 0.0005, "loss": 2.1057, "step": 109790 }, { "epoch": 0.41792589998705876, "grad_norm": 0.1377677023410797, "learning_rate": 0.0005, "loss": 2.1435, "step": 109800 }, { "epoch": 0.41796396245518147, "grad_norm": 0.12577351927757263, "learning_rate": 0.0005, "loss": 2.1294, "step": 109810 }, { "epoch": 0.4180020249233041, "grad_norm": 0.1207052618265152, "learning_rate": 0.0005, "loss": 2.1263, "step": 109820 }, { "epoch": 0.4180400873914268, "grad_norm": 0.11211320012807846, "learning_rate": 0.0005, "loss": 2.1208, "step": 109830 }, { "epoch": 0.4180781498595495, "grad_norm": 0.12020255625247955, "learning_rate": 0.0005, "loss": 2.124, "step": 109840 }, { "epoch": 0.41811621232767215, "grad_norm": 0.12813393771648407, "learning_rate": 0.0005, "loss": 2.1147, "step": 109850 }, { "epoch": 0.41815427479579487, "grad_norm": 0.11809521913528442, "learning_rate": 0.0005, "loss": 2.0994, "step": 109860 }, { "epoch": 0.4181923372639175, "grad_norm": 0.1264045387506485, "learning_rate": 0.0005, "loss": 2.1227, "step": 109870 }, { "epoch": 0.41823039973204024, "grad_norm": 0.11605829745531082, "learning_rate": 0.0005, "loss": 2.1316, "step": 109880 }, { "epoch": 0.4182684622001629, "grad_norm": 0.12971541285514832, "learning_rate": 0.0005, "loss": 2.1197, "step": 109890 }, { "epoch": 0.4183065246682856, "grad_norm": 0.11807116121053696, "learning_rate": 0.0005, "loss": 2.1248, "step": 109900 }, { "epoch": 0.41834458713640826, "grad_norm": 0.12423138320446014, "learning_rate": 0.0005, "loss": 2.1414, "step": 109910 }, { "epoch": 0.418382649604531, "grad_norm": 0.12973003089427948, "learning_rate": 0.0005, "loss": 2.1295, "step": 109920 }, { "epoch": 0.41842071207265363, "grad_norm": 0.1259157955646515, "learning_rate": 0.0005, "loss": 2.1051, "step": 109930 }, { "epoch": 0.41845877454077635, "grad_norm": 0.1268201619386673, "learning_rate": 0.0005, "loss": 2.1283, "step": 109940 }, { "epoch": 0.418496837008899, "grad_norm": 0.1224651113152504, "learning_rate": 0.0005, "loss": 2.1192, "step": 109950 }, { "epoch": 0.4185348994770217, "grad_norm": 0.1276530921459198, "learning_rate": 0.0005, "loss": 2.1268, "step": 109960 }, { "epoch": 0.4185729619451444, "grad_norm": 0.12536892294883728, "learning_rate": 0.0005, "loss": 2.125, "step": 109970 }, { "epoch": 0.41861102441326703, "grad_norm": 0.12223321944475174, "learning_rate": 0.0005, "loss": 2.1152, "step": 109980 }, { "epoch": 0.41864908688138974, "grad_norm": 0.11520811915397644, "learning_rate": 0.0005, "loss": 2.1315, "step": 109990 }, { "epoch": 0.4186871493495124, "grad_norm": 0.12322820723056793, "learning_rate": 0.0005, "loss": 2.1257, "step": 110000 }, { "epoch": 0.4187252118176351, "grad_norm": 0.12412966787815094, "learning_rate": 0.0005, "loss": 2.1208, "step": 110010 }, { "epoch": 0.41876327428575777, "grad_norm": 0.13063424825668335, "learning_rate": 0.0005, "loss": 2.1111, "step": 110020 }, { "epoch": 0.4188013367538805, "grad_norm": 0.13508233428001404, "learning_rate": 0.0005, "loss": 2.1083, "step": 110030 }, { "epoch": 0.41883939922200314, "grad_norm": 0.11833116412162781, "learning_rate": 0.0005, "loss": 2.1194, "step": 110040 }, { "epoch": 0.41887746169012585, "grad_norm": 0.12042485922574997, "learning_rate": 0.0005, "loss": 2.1287, "step": 110050 }, { "epoch": 0.4189155241582485, "grad_norm": 0.1296975016593933, "learning_rate": 0.0005, "loss": 2.1208, "step": 110060 }, { "epoch": 0.4189535866263712, "grad_norm": 0.12232377380132675, "learning_rate": 0.0005, "loss": 2.1284, "step": 110070 }, { "epoch": 0.4189916490944939, "grad_norm": 0.24385973811149597, "learning_rate": 0.0005, "loss": 2.1235, "step": 110080 }, { "epoch": 0.4190297115626166, "grad_norm": 0.13045060634613037, "learning_rate": 0.0005, "loss": 2.1327, "step": 110090 }, { "epoch": 0.41906777403073925, "grad_norm": 0.1269223541021347, "learning_rate": 0.0005, "loss": 2.12, "step": 110100 }, { "epoch": 0.4191058364988619, "grad_norm": 0.12370885163545609, "learning_rate": 0.0005, "loss": 2.1402, "step": 110110 }, { "epoch": 0.4191438989669846, "grad_norm": 0.12285878509283066, "learning_rate": 0.0005, "loss": 2.128, "step": 110120 }, { "epoch": 0.4191819614351073, "grad_norm": 0.12880626320838928, "learning_rate": 0.0005, "loss": 2.1181, "step": 110130 }, { "epoch": 0.41922002390323, "grad_norm": 0.1254742443561554, "learning_rate": 0.0005, "loss": 2.113, "step": 110140 }, { "epoch": 0.41925808637135265, "grad_norm": 0.14333873987197876, "learning_rate": 0.0005, "loss": 2.1123, "step": 110150 }, { "epoch": 0.41929614883947536, "grad_norm": 0.13128043711185455, "learning_rate": 0.0005, "loss": 2.136, "step": 110160 }, { "epoch": 0.419334211307598, "grad_norm": 0.15221351385116577, "learning_rate": 0.0005, "loss": 2.1264, "step": 110170 }, { "epoch": 0.4193722737757207, "grad_norm": 0.11888156831264496, "learning_rate": 0.0005, "loss": 2.1093, "step": 110180 }, { "epoch": 0.4194103362438434, "grad_norm": 0.12805290520191193, "learning_rate": 0.0005, "loss": 2.124, "step": 110190 }, { "epoch": 0.4194483987119661, "grad_norm": 0.12789416313171387, "learning_rate": 0.0005, "loss": 2.1261, "step": 110200 }, { "epoch": 0.41948646118008875, "grad_norm": 0.12878291308879852, "learning_rate": 0.0005, "loss": 2.1136, "step": 110210 }, { "epoch": 0.41952452364821147, "grad_norm": 0.12528999149799347, "learning_rate": 0.0005, "loss": 2.121, "step": 110220 }, { "epoch": 0.4195625861163341, "grad_norm": 0.12756268680095673, "learning_rate": 0.0005, "loss": 2.0986, "step": 110230 }, { "epoch": 0.41960064858445684, "grad_norm": 0.1297759711742401, "learning_rate": 0.0005, "loss": 2.1167, "step": 110240 }, { "epoch": 0.4196387110525795, "grad_norm": 0.11695853620767593, "learning_rate": 0.0005, "loss": 2.1208, "step": 110250 }, { "epoch": 0.41967677352070215, "grad_norm": 0.14145225286483765, "learning_rate": 0.0005, "loss": 2.1201, "step": 110260 }, { "epoch": 0.41971483598882486, "grad_norm": 0.13454757630825043, "learning_rate": 0.0005, "loss": 2.124, "step": 110270 }, { "epoch": 0.4197528984569475, "grad_norm": 0.1235680803656578, "learning_rate": 0.0005, "loss": 2.1181, "step": 110280 }, { "epoch": 0.41979096092507023, "grad_norm": 0.12550152838230133, "learning_rate": 0.0005, "loss": 2.1186, "step": 110290 }, { "epoch": 0.4198290233931929, "grad_norm": 0.1230005994439125, "learning_rate": 0.0005, "loss": 2.1266, "step": 110300 }, { "epoch": 0.4198670858613156, "grad_norm": 0.1317501813173294, "learning_rate": 0.0005, "loss": 2.1223, "step": 110310 }, { "epoch": 0.41990514832943826, "grad_norm": 0.1260969638824463, "learning_rate": 0.0005, "loss": 2.1159, "step": 110320 }, { "epoch": 0.419943210797561, "grad_norm": 0.1307699829339981, "learning_rate": 0.0005, "loss": 2.1289, "step": 110330 }, { "epoch": 0.41998127326568363, "grad_norm": 0.12259736657142639, "learning_rate": 0.0005, "loss": 2.1156, "step": 110340 }, { "epoch": 0.42001933573380634, "grad_norm": 0.12683376669883728, "learning_rate": 0.0005, "loss": 2.1307, "step": 110350 }, { "epoch": 0.420057398201929, "grad_norm": 0.12672072649002075, "learning_rate": 0.0005, "loss": 2.1152, "step": 110360 }, { "epoch": 0.4200954606700517, "grad_norm": 0.12184534966945648, "learning_rate": 0.0005, "loss": 2.1095, "step": 110370 }, { "epoch": 0.42013352313817437, "grad_norm": 0.13169150054454803, "learning_rate": 0.0005, "loss": 2.1004, "step": 110380 }, { "epoch": 0.4201715856062971, "grad_norm": 0.13503016531467438, "learning_rate": 0.0005, "loss": 2.1233, "step": 110390 }, { "epoch": 0.42020964807441974, "grad_norm": 0.12989014387130737, "learning_rate": 0.0005, "loss": 2.1338, "step": 110400 }, { "epoch": 0.4202477105425424, "grad_norm": 0.13313154876232147, "learning_rate": 0.0005, "loss": 2.1225, "step": 110410 }, { "epoch": 0.4202857730106651, "grad_norm": 0.12498737126588821, "learning_rate": 0.0005, "loss": 2.1223, "step": 110420 }, { "epoch": 0.42032383547878777, "grad_norm": 0.1535029113292694, "learning_rate": 0.0005, "loss": 2.1161, "step": 110430 }, { "epoch": 0.4203618979469105, "grad_norm": 0.1109837144613266, "learning_rate": 0.0005, "loss": 2.1225, "step": 110440 }, { "epoch": 0.42039996041503314, "grad_norm": 0.12009678035974503, "learning_rate": 0.0005, "loss": 2.1311, "step": 110450 }, { "epoch": 0.42043802288315585, "grad_norm": 0.12859448790550232, "learning_rate": 0.0005, "loss": 2.1244, "step": 110460 }, { "epoch": 0.4204760853512785, "grad_norm": 0.1498848795890808, "learning_rate": 0.0005, "loss": 2.1169, "step": 110470 }, { "epoch": 0.4205141478194012, "grad_norm": 0.1277942806482315, "learning_rate": 0.0005, "loss": 2.1196, "step": 110480 }, { "epoch": 0.4205522102875239, "grad_norm": 0.12816040217876434, "learning_rate": 0.0005, "loss": 2.1289, "step": 110490 }, { "epoch": 0.4205902727556466, "grad_norm": 0.12099416553974152, "learning_rate": 0.0005, "loss": 2.0978, "step": 110500 }, { "epoch": 0.42062833522376925, "grad_norm": 0.12932099401950836, "learning_rate": 0.0005, "loss": 2.113, "step": 110510 }, { "epoch": 0.42066639769189196, "grad_norm": 0.13165128231048584, "learning_rate": 0.0005, "loss": 2.1321, "step": 110520 }, { "epoch": 0.4207044601600146, "grad_norm": 0.12216272950172424, "learning_rate": 0.0005, "loss": 2.1112, "step": 110530 }, { "epoch": 0.42074252262813727, "grad_norm": 0.1164298951625824, "learning_rate": 0.0005, "loss": 2.1157, "step": 110540 }, { "epoch": 0.42078058509626, "grad_norm": 0.12431590259075165, "learning_rate": 0.0005, "loss": 2.1233, "step": 110550 }, { "epoch": 0.42081864756438264, "grad_norm": 0.1205281987786293, "learning_rate": 0.0005, "loss": 2.1316, "step": 110560 }, { "epoch": 0.42085671003250535, "grad_norm": 0.12753494083881378, "learning_rate": 0.0005, "loss": 2.1209, "step": 110570 }, { "epoch": 0.420894772500628, "grad_norm": 0.13406816124916077, "learning_rate": 0.0005, "loss": 2.1143, "step": 110580 }, { "epoch": 0.4209328349687507, "grad_norm": 0.13061662018299103, "learning_rate": 0.0005, "loss": 2.113, "step": 110590 }, { "epoch": 0.4209708974368734, "grad_norm": 0.13684722781181335, "learning_rate": 0.0005, "loss": 2.1136, "step": 110600 }, { "epoch": 0.4210089599049961, "grad_norm": 0.1287647932767868, "learning_rate": 0.0005, "loss": 2.1176, "step": 110610 }, { "epoch": 0.42104702237311875, "grad_norm": 0.14672592282295227, "learning_rate": 0.0005, "loss": 2.1242, "step": 110620 }, { "epoch": 0.42108508484124146, "grad_norm": 0.14222227036952972, "learning_rate": 0.0005, "loss": 2.1078, "step": 110630 }, { "epoch": 0.4211231473093641, "grad_norm": 0.13052073121070862, "learning_rate": 0.0005, "loss": 2.0995, "step": 110640 }, { "epoch": 0.42116120977748683, "grad_norm": 0.12732751667499542, "learning_rate": 0.0005, "loss": 2.1202, "step": 110650 }, { "epoch": 0.4211992722456095, "grad_norm": 0.11703453212976456, "learning_rate": 0.0005, "loss": 2.1176, "step": 110660 }, { "epoch": 0.4212373347137322, "grad_norm": 0.1263093650341034, "learning_rate": 0.0005, "loss": 2.1201, "step": 110670 }, { "epoch": 0.42127539718185486, "grad_norm": 0.12344750761985779, "learning_rate": 0.0005, "loss": 2.1085, "step": 110680 }, { "epoch": 0.4213134596499775, "grad_norm": 0.12565580010414124, "learning_rate": 0.0005, "loss": 2.1051, "step": 110690 }, { "epoch": 0.42135152211810023, "grad_norm": 0.12912888824939728, "learning_rate": 0.0005, "loss": 2.1347, "step": 110700 }, { "epoch": 0.4213895845862229, "grad_norm": 0.12103404849767685, "learning_rate": 0.0005, "loss": 2.1169, "step": 110710 }, { "epoch": 0.4214276470543456, "grad_norm": 0.1245717853307724, "learning_rate": 0.0005, "loss": 2.1252, "step": 110720 }, { "epoch": 0.42146570952246826, "grad_norm": 0.1308036893606186, "learning_rate": 0.0005, "loss": 2.1106, "step": 110730 }, { "epoch": 0.42150377199059097, "grad_norm": 0.133273184299469, "learning_rate": 0.0005, "loss": 2.1323, "step": 110740 }, { "epoch": 0.4215418344587136, "grad_norm": 0.11989910155534744, "learning_rate": 0.0005, "loss": 2.1121, "step": 110750 }, { "epoch": 0.42157989692683634, "grad_norm": 0.12504975497722626, "learning_rate": 0.0005, "loss": 2.1069, "step": 110760 }, { "epoch": 0.421617959394959, "grad_norm": 0.12130273878574371, "learning_rate": 0.0005, "loss": 2.1234, "step": 110770 }, { "epoch": 0.4216560218630817, "grad_norm": 0.12154131382703781, "learning_rate": 0.0005, "loss": 2.1183, "step": 110780 }, { "epoch": 0.42169408433120437, "grad_norm": 0.13687552511692047, "learning_rate": 0.0005, "loss": 2.1289, "step": 110790 }, { "epoch": 0.4217321467993271, "grad_norm": 0.13189378380775452, "learning_rate": 0.0005, "loss": 2.1223, "step": 110800 }, { "epoch": 0.42177020926744974, "grad_norm": 0.14396044611930847, "learning_rate": 0.0005, "loss": 2.127, "step": 110810 }, { "epoch": 0.42180827173557245, "grad_norm": 0.11544430255889893, "learning_rate": 0.0005, "loss": 2.1222, "step": 110820 }, { "epoch": 0.4218463342036951, "grad_norm": 0.12114161998033524, "learning_rate": 0.0005, "loss": 2.1187, "step": 110830 }, { "epoch": 0.42188439667181776, "grad_norm": 0.1264815628528595, "learning_rate": 0.0005, "loss": 2.1263, "step": 110840 }, { "epoch": 0.4219224591399405, "grad_norm": 0.1236187294125557, "learning_rate": 0.0005, "loss": 2.1248, "step": 110850 }, { "epoch": 0.42196052160806313, "grad_norm": 0.1219010129570961, "learning_rate": 0.0005, "loss": 2.1157, "step": 110860 }, { "epoch": 0.42199858407618585, "grad_norm": 0.11491255462169647, "learning_rate": 0.0005, "loss": 2.1269, "step": 110870 }, { "epoch": 0.4220366465443085, "grad_norm": 0.12548251450061798, "learning_rate": 0.0005, "loss": 2.1095, "step": 110880 }, { "epoch": 0.4220747090124312, "grad_norm": 0.1220545768737793, "learning_rate": 0.0005, "loss": 2.1149, "step": 110890 }, { "epoch": 0.42211277148055387, "grad_norm": 0.12204521894454956, "learning_rate": 0.0005, "loss": 2.1361, "step": 110900 }, { "epoch": 0.4221508339486766, "grad_norm": 0.12067580968141556, "learning_rate": 0.0005, "loss": 2.1242, "step": 110910 }, { "epoch": 0.42218889641679924, "grad_norm": 0.13050217926502228, "learning_rate": 0.0005, "loss": 2.1058, "step": 110920 }, { "epoch": 0.42222695888492195, "grad_norm": 0.11881910264492035, "learning_rate": 0.0005, "loss": 2.1132, "step": 110930 }, { "epoch": 0.4222650213530446, "grad_norm": 0.12050008773803711, "learning_rate": 0.0005, "loss": 2.1281, "step": 110940 }, { "epoch": 0.4223030838211673, "grad_norm": 0.12418985366821289, "learning_rate": 0.0005, "loss": 2.1265, "step": 110950 }, { "epoch": 0.42234114628929, "grad_norm": 0.13076156377792358, "learning_rate": 0.0005, "loss": 2.1165, "step": 110960 }, { "epoch": 0.42237920875741264, "grad_norm": 0.12248754501342773, "learning_rate": 0.0005, "loss": 2.109, "step": 110970 }, { "epoch": 0.42241727122553535, "grad_norm": 0.12590964138507843, "learning_rate": 0.0005, "loss": 2.1291, "step": 110980 }, { "epoch": 0.422455333693658, "grad_norm": 0.12941862642765045, "learning_rate": 0.0005, "loss": 2.1245, "step": 110990 }, { "epoch": 0.4224933961617807, "grad_norm": 0.12657110393047333, "learning_rate": 0.0005, "loss": 2.1204, "step": 111000 }, { "epoch": 0.4225314586299034, "grad_norm": 0.2467130869626999, "learning_rate": 0.0005, "loss": 2.1226, "step": 111010 }, { "epoch": 0.4225695210980261, "grad_norm": 0.12490589171648026, "learning_rate": 0.0005, "loss": 2.1182, "step": 111020 }, { "epoch": 0.42260758356614875, "grad_norm": 0.5077695250511169, "learning_rate": 0.0005, "loss": 2.1241, "step": 111030 }, { "epoch": 0.42264564603427146, "grad_norm": 0.13496220111846924, "learning_rate": 0.0005, "loss": 2.1147, "step": 111040 }, { "epoch": 0.4226837085023941, "grad_norm": 0.12376722693443298, "learning_rate": 0.0005, "loss": 2.1186, "step": 111050 }, { "epoch": 0.42272177097051683, "grad_norm": 0.12206427752971649, "learning_rate": 0.0005, "loss": 2.11, "step": 111060 }, { "epoch": 0.4227598334386395, "grad_norm": 0.11902330070734024, "learning_rate": 0.0005, "loss": 2.1095, "step": 111070 }, { "epoch": 0.4227978959067622, "grad_norm": 0.12036629021167755, "learning_rate": 0.0005, "loss": 2.1221, "step": 111080 }, { "epoch": 0.42283595837488486, "grad_norm": 0.11524273455142975, "learning_rate": 0.0005, "loss": 2.1121, "step": 111090 }, { "epoch": 0.42287402084300757, "grad_norm": 0.12553663551807404, "learning_rate": 0.0005, "loss": 2.1172, "step": 111100 }, { "epoch": 0.4229120833111302, "grad_norm": 0.11619086563587189, "learning_rate": 0.0005, "loss": 2.1445, "step": 111110 }, { "epoch": 0.4229501457792529, "grad_norm": 0.12655623257160187, "learning_rate": 0.0005, "loss": 2.1165, "step": 111120 }, { "epoch": 0.4229882082473756, "grad_norm": 0.14359208941459656, "learning_rate": 0.0005, "loss": 2.1352, "step": 111130 }, { "epoch": 0.42302627071549825, "grad_norm": 0.1263459175825119, "learning_rate": 0.0005, "loss": 2.1154, "step": 111140 }, { "epoch": 0.42306433318362097, "grad_norm": 0.12120271474123001, "learning_rate": 0.0005, "loss": 2.1046, "step": 111150 }, { "epoch": 0.4231023956517436, "grad_norm": 0.14535973966121674, "learning_rate": 0.0005, "loss": 2.1274, "step": 111160 }, { "epoch": 0.42314045811986634, "grad_norm": 0.1274825483560562, "learning_rate": 0.0005, "loss": 2.1143, "step": 111170 }, { "epoch": 0.423178520587989, "grad_norm": 0.1486794650554657, "learning_rate": 0.0005, "loss": 2.1174, "step": 111180 }, { "epoch": 0.4232165830561117, "grad_norm": 0.13074061274528503, "learning_rate": 0.0005, "loss": 2.1094, "step": 111190 }, { "epoch": 0.42325464552423436, "grad_norm": 0.12979604303836823, "learning_rate": 0.0005, "loss": 2.1146, "step": 111200 }, { "epoch": 0.4232927079923571, "grad_norm": 0.1272452175617218, "learning_rate": 0.0005, "loss": 2.1278, "step": 111210 }, { "epoch": 0.42333077046047973, "grad_norm": 0.13075511157512665, "learning_rate": 0.0005, "loss": 2.1026, "step": 111220 }, { "epoch": 0.42336883292860245, "grad_norm": 0.13066555559635162, "learning_rate": 0.0005, "loss": 2.1208, "step": 111230 }, { "epoch": 0.4234068953967251, "grad_norm": 0.11858844757080078, "learning_rate": 0.0005, "loss": 2.1103, "step": 111240 }, { "epoch": 0.4234449578648478, "grad_norm": 0.14340876042842865, "learning_rate": 0.0005, "loss": 2.0985, "step": 111250 }, { "epoch": 0.4234830203329705, "grad_norm": 0.13483618199825287, "learning_rate": 0.0005, "loss": 2.1235, "step": 111260 }, { "epoch": 0.42352108280109313, "grad_norm": 0.12093952298164368, "learning_rate": 0.0005, "loss": 2.1146, "step": 111270 }, { "epoch": 0.42355914526921584, "grad_norm": 0.13822363317012787, "learning_rate": 0.0005, "loss": 2.1108, "step": 111280 }, { "epoch": 0.4235972077373385, "grad_norm": 0.120503731071949, "learning_rate": 0.0005, "loss": 2.1253, "step": 111290 }, { "epoch": 0.4236352702054612, "grad_norm": 0.11571825295686722, "learning_rate": 0.0005, "loss": 2.1126, "step": 111300 }, { "epoch": 0.42367333267358387, "grad_norm": 0.12174150347709656, "learning_rate": 0.0005, "loss": 2.1297, "step": 111310 }, { "epoch": 0.4237113951417066, "grad_norm": 0.12075323611497879, "learning_rate": 0.0005, "loss": 2.1155, "step": 111320 }, { "epoch": 0.42374945760982924, "grad_norm": 0.12751325964927673, "learning_rate": 0.0005, "loss": 2.1102, "step": 111330 }, { "epoch": 0.42378752007795195, "grad_norm": 0.13146482408046722, "learning_rate": 0.0005, "loss": 2.1116, "step": 111340 }, { "epoch": 0.4238255825460746, "grad_norm": 0.1132122352719307, "learning_rate": 0.0005, "loss": 2.1189, "step": 111350 }, { "epoch": 0.4238636450141973, "grad_norm": 0.12749198079109192, "learning_rate": 0.0005, "loss": 2.0998, "step": 111360 }, { "epoch": 0.42390170748232, "grad_norm": 0.14566144347190857, "learning_rate": 0.0005, "loss": 2.1123, "step": 111370 }, { "epoch": 0.4239397699504427, "grad_norm": 0.13449816405773163, "learning_rate": 0.0005, "loss": 2.1251, "step": 111380 }, { "epoch": 0.42397783241856535, "grad_norm": 0.1352243423461914, "learning_rate": 0.0005, "loss": 2.1204, "step": 111390 }, { "epoch": 0.424015894886688, "grad_norm": 0.12569397687911987, "learning_rate": 0.0005, "loss": 2.1349, "step": 111400 }, { "epoch": 0.4240539573548107, "grad_norm": 0.12069183588027954, "learning_rate": 0.0005, "loss": 2.1047, "step": 111410 }, { "epoch": 0.4240920198229334, "grad_norm": 0.13708123564720154, "learning_rate": 0.0005, "loss": 2.1145, "step": 111420 }, { "epoch": 0.4241300822910561, "grad_norm": 0.14026005566120148, "learning_rate": 0.0005, "loss": 2.1296, "step": 111430 }, { "epoch": 0.42416814475917874, "grad_norm": 0.12508703768253326, "learning_rate": 0.0005, "loss": 2.1191, "step": 111440 }, { "epoch": 0.42420620722730146, "grad_norm": 0.12320968508720398, "learning_rate": 0.0005, "loss": 2.1227, "step": 111450 }, { "epoch": 0.4242442696954241, "grad_norm": 0.128920778632164, "learning_rate": 0.0005, "loss": 2.118, "step": 111460 }, { "epoch": 0.4242823321635468, "grad_norm": 0.13704611361026764, "learning_rate": 0.0005, "loss": 2.1118, "step": 111470 }, { "epoch": 0.4243203946316695, "grad_norm": 0.12090374529361725, "learning_rate": 0.0005, "loss": 2.1099, "step": 111480 }, { "epoch": 0.4243584570997922, "grad_norm": 0.1212400496006012, "learning_rate": 0.0005, "loss": 2.1308, "step": 111490 }, { "epoch": 0.42439651956791485, "grad_norm": 0.13863784074783325, "learning_rate": 0.0005, "loss": 2.134, "step": 111500 }, { "epoch": 0.42443458203603757, "grad_norm": 0.13154999911785126, "learning_rate": 0.0005, "loss": 2.1085, "step": 111510 }, { "epoch": 0.4244726445041602, "grad_norm": 0.12200278043746948, "learning_rate": 0.0005, "loss": 2.1113, "step": 111520 }, { "epoch": 0.42451070697228294, "grad_norm": 0.11427143961191177, "learning_rate": 0.0005, "loss": 2.1134, "step": 111530 }, { "epoch": 0.4245487694404056, "grad_norm": 0.12608174979686737, "learning_rate": 0.0005, "loss": 2.1231, "step": 111540 }, { "epoch": 0.42458683190852825, "grad_norm": 0.11711114645004272, "learning_rate": 0.0005, "loss": 2.1274, "step": 111550 }, { "epoch": 0.42462489437665096, "grad_norm": 0.13191458582878113, "learning_rate": 0.0005, "loss": 2.1114, "step": 111560 }, { "epoch": 0.4246629568447736, "grad_norm": 0.13757798075675964, "learning_rate": 0.0005, "loss": 2.1361, "step": 111570 }, { "epoch": 0.42470101931289633, "grad_norm": 0.13054348528385162, "learning_rate": 0.0005, "loss": 2.12, "step": 111580 }, { "epoch": 0.424739081781019, "grad_norm": 0.12154204398393631, "learning_rate": 0.0005, "loss": 2.1224, "step": 111590 }, { "epoch": 0.4247771442491417, "grad_norm": 0.11809223145246506, "learning_rate": 0.0005, "loss": 2.1135, "step": 111600 }, { "epoch": 0.42481520671726436, "grad_norm": 0.11923825740814209, "learning_rate": 0.0005, "loss": 2.1058, "step": 111610 }, { "epoch": 0.4248532691853871, "grad_norm": 0.12535466253757477, "learning_rate": 0.0005, "loss": 2.1135, "step": 111620 }, { "epoch": 0.42489133165350973, "grad_norm": 0.1255112588405609, "learning_rate": 0.0005, "loss": 2.1219, "step": 111630 }, { "epoch": 0.42492939412163244, "grad_norm": 0.12606526911258698, "learning_rate": 0.0005, "loss": 2.1202, "step": 111640 }, { "epoch": 0.4249674565897551, "grad_norm": 0.13942117989063263, "learning_rate": 0.0005, "loss": 2.143, "step": 111650 }, { "epoch": 0.4250055190578778, "grad_norm": 0.12549205124378204, "learning_rate": 0.0005, "loss": 2.1174, "step": 111660 }, { "epoch": 0.42504358152600047, "grad_norm": 0.11477110534906387, "learning_rate": 0.0005, "loss": 2.1088, "step": 111670 }, { "epoch": 0.4250816439941232, "grad_norm": 0.12400896847248077, "learning_rate": 0.0005, "loss": 2.1251, "step": 111680 }, { "epoch": 0.42511970646224584, "grad_norm": 0.12146724760532379, "learning_rate": 0.0005, "loss": 2.106, "step": 111690 }, { "epoch": 0.4251577689303685, "grad_norm": 0.14244869351387024, "learning_rate": 0.0005, "loss": 2.1014, "step": 111700 }, { "epoch": 0.4251958313984912, "grad_norm": 0.13024653494358063, "learning_rate": 0.0005, "loss": 2.1186, "step": 111710 }, { "epoch": 0.42523389386661387, "grad_norm": 0.11714961379766464, "learning_rate": 0.0005, "loss": 2.1206, "step": 111720 }, { "epoch": 0.4252719563347366, "grad_norm": 0.11902771890163422, "learning_rate": 0.0005, "loss": 2.1243, "step": 111730 }, { "epoch": 0.42531001880285924, "grad_norm": 0.12405514717102051, "learning_rate": 0.0005, "loss": 2.1139, "step": 111740 }, { "epoch": 0.42534808127098195, "grad_norm": 0.1149262934923172, "learning_rate": 0.0005, "loss": 2.1098, "step": 111750 }, { "epoch": 0.4253861437391046, "grad_norm": 0.12680381536483765, "learning_rate": 0.0005, "loss": 2.1116, "step": 111760 }, { "epoch": 0.4254242062072273, "grad_norm": 0.1344337910413742, "learning_rate": 0.0005, "loss": 2.1143, "step": 111770 }, { "epoch": 0.42546226867535, "grad_norm": 0.12774929404258728, "learning_rate": 0.0005, "loss": 2.1189, "step": 111780 }, { "epoch": 0.4255003311434727, "grad_norm": 0.11900809407234192, "learning_rate": 0.0005, "loss": 2.1204, "step": 111790 }, { "epoch": 0.42553839361159534, "grad_norm": 0.13062550127506256, "learning_rate": 0.0005, "loss": 2.1316, "step": 111800 }, { "epoch": 0.42557645607971806, "grad_norm": 0.1314554363489151, "learning_rate": 0.0005, "loss": 2.1054, "step": 111810 }, { "epoch": 0.4256145185478407, "grad_norm": 0.12474439293146133, "learning_rate": 0.0005, "loss": 2.1281, "step": 111820 }, { "epoch": 0.42565258101596337, "grad_norm": 0.11799319088459015, "learning_rate": 0.0005, "loss": 2.1117, "step": 111830 }, { "epoch": 0.4256906434840861, "grad_norm": 0.13469941914081573, "learning_rate": 0.0005, "loss": 2.1186, "step": 111840 }, { "epoch": 0.42572870595220874, "grad_norm": 0.13602536916732788, "learning_rate": 0.0005, "loss": 2.1079, "step": 111850 }, { "epoch": 0.42576676842033145, "grad_norm": 0.13909238576889038, "learning_rate": 0.0005, "loss": 2.108, "step": 111860 }, { "epoch": 0.4258048308884541, "grad_norm": 0.11958032101392746, "learning_rate": 0.0005, "loss": 2.1146, "step": 111870 }, { "epoch": 0.4258428933565768, "grad_norm": 0.11667779088020325, "learning_rate": 0.0005, "loss": 2.1288, "step": 111880 }, { "epoch": 0.4258809558246995, "grad_norm": 0.13016003370285034, "learning_rate": 0.0005, "loss": 2.1137, "step": 111890 }, { "epoch": 0.4259190182928222, "grad_norm": 0.12318616360425949, "learning_rate": 0.0005, "loss": 2.123, "step": 111900 }, { "epoch": 0.42595708076094485, "grad_norm": 0.13127361238002777, "learning_rate": 0.0005, "loss": 2.1036, "step": 111910 }, { "epoch": 0.42599514322906756, "grad_norm": 0.1385347694158554, "learning_rate": 0.0005, "loss": 2.1206, "step": 111920 }, { "epoch": 0.4260332056971902, "grad_norm": 0.11675745248794556, "learning_rate": 0.0005, "loss": 2.1274, "step": 111930 }, { "epoch": 0.42607126816531293, "grad_norm": 0.1306353360414505, "learning_rate": 0.0005, "loss": 2.1263, "step": 111940 }, { "epoch": 0.4261093306334356, "grad_norm": 0.1188969761133194, "learning_rate": 0.0005, "loss": 2.1223, "step": 111950 }, { "epoch": 0.4261473931015583, "grad_norm": 0.12761636078357697, "learning_rate": 0.0005, "loss": 2.1191, "step": 111960 }, { "epoch": 0.42618545556968096, "grad_norm": 0.1282307654619217, "learning_rate": 0.0005, "loss": 2.1253, "step": 111970 }, { "epoch": 0.4262235180378036, "grad_norm": 0.12889324128627777, "learning_rate": 0.0005, "loss": 2.1302, "step": 111980 }, { "epoch": 0.42626158050592633, "grad_norm": 0.12320785969495773, "learning_rate": 0.0005, "loss": 2.1294, "step": 111990 }, { "epoch": 0.426299642974049, "grad_norm": 0.13121485710144043, "learning_rate": 0.0005, "loss": 2.1201, "step": 112000 }, { "epoch": 0.4263377054421717, "grad_norm": 0.1245298683643341, "learning_rate": 0.0005, "loss": 2.1281, "step": 112010 }, { "epoch": 0.42637576791029436, "grad_norm": 0.1187111884355545, "learning_rate": 0.0005, "loss": 2.1076, "step": 112020 }, { "epoch": 0.42641383037841707, "grad_norm": 0.11888144165277481, "learning_rate": 0.0005, "loss": 2.1256, "step": 112030 }, { "epoch": 0.4264518928465397, "grad_norm": 0.12159156799316406, "learning_rate": 0.0005, "loss": 2.1312, "step": 112040 }, { "epoch": 0.42648995531466244, "grad_norm": 0.13763214647769928, "learning_rate": 0.0005, "loss": 2.1226, "step": 112050 }, { "epoch": 0.4265280177827851, "grad_norm": 0.12213977426290512, "learning_rate": 0.0005, "loss": 2.1069, "step": 112060 }, { "epoch": 0.4265660802509078, "grad_norm": 0.13264048099517822, "learning_rate": 0.0005, "loss": 2.1209, "step": 112070 }, { "epoch": 0.42660414271903047, "grad_norm": 0.12648789584636688, "learning_rate": 0.0005, "loss": 2.1148, "step": 112080 }, { "epoch": 0.4266422051871532, "grad_norm": 0.13669641315937042, "learning_rate": 0.0005, "loss": 2.1074, "step": 112090 }, { "epoch": 0.42668026765527584, "grad_norm": 0.12726552784442902, "learning_rate": 0.0005, "loss": 2.1208, "step": 112100 }, { "epoch": 0.42671833012339855, "grad_norm": 0.13499999046325684, "learning_rate": 0.0005, "loss": 2.134, "step": 112110 }, { "epoch": 0.4267563925915212, "grad_norm": 0.12138763815164566, "learning_rate": 0.0005, "loss": 2.1029, "step": 112120 }, { "epoch": 0.42679445505964386, "grad_norm": 0.11880981177091599, "learning_rate": 0.0005, "loss": 2.1283, "step": 112130 }, { "epoch": 0.4268325175277666, "grad_norm": 0.127463236451149, "learning_rate": 0.0005, "loss": 2.1052, "step": 112140 }, { "epoch": 0.42687057999588923, "grad_norm": 0.13302980363368988, "learning_rate": 0.0005, "loss": 2.1007, "step": 112150 }, { "epoch": 0.42690864246401194, "grad_norm": 0.14324358105659485, "learning_rate": 0.0005, "loss": 2.1147, "step": 112160 }, { "epoch": 0.4269467049321346, "grad_norm": 0.1454227864742279, "learning_rate": 0.0005, "loss": 2.1109, "step": 112170 }, { "epoch": 0.4269847674002573, "grad_norm": 0.12385271489620209, "learning_rate": 0.0005, "loss": 2.1314, "step": 112180 }, { "epoch": 0.42702282986837997, "grad_norm": 0.12873312830924988, "learning_rate": 0.0005, "loss": 2.1355, "step": 112190 }, { "epoch": 0.4270608923365027, "grad_norm": 0.11225327104330063, "learning_rate": 0.0005, "loss": 2.1144, "step": 112200 }, { "epoch": 0.42709895480462534, "grad_norm": 0.12275451421737671, "learning_rate": 0.0005, "loss": 2.129, "step": 112210 }, { "epoch": 0.42713701727274805, "grad_norm": 0.12081906199455261, "learning_rate": 0.0005, "loss": 2.0946, "step": 112220 }, { "epoch": 0.4271750797408707, "grad_norm": 0.14207085967063904, "learning_rate": 0.0005, "loss": 2.1239, "step": 112230 }, { "epoch": 0.4272131422089934, "grad_norm": 0.12282105535268784, "learning_rate": 0.0005, "loss": 2.1169, "step": 112240 }, { "epoch": 0.4272512046771161, "grad_norm": 0.1292695701122284, "learning_rate": 0.0005, "loss": 2.1099, "step": 112250 }, { "epoch": 0.4272892671452388, "grad_norm": 0.12830670177936554, "learning_rate": 0.0005, "loss": 2.1268, "step": 112260 }, { "epoch": 0.42732732961336145, "grad_norm": 0.12263115495443344, "learning_rate": 0.0005, "loss": 2.1136, "step": 112270 }, { "epoch": 0.4273653920814841, "grad_norm": 0.12189716100692749, "learning_rate": 0.0005, "loss": 2.1205, "step": 112280 }, { "epoch": 0.4274034545496068, "grad_norm": 0.12638132274150848, "learning_rate": 0.0005, "loss": 2.1128, "step": 112290 }, { "epoch": 0.4274415170177295, "grad_norm": 0.12426239252090454, "learning_rate": 0.0005, "loss": 2.1354, "step": 112300 }, { "epoch": 0.4274795794858522, "grad_norm": 0.1269400417804718, "learning_rate": 0.0005, "loss": 2.1136, "step": 112310 }, { "epoch": 0.42751764195397485, "grad_norm": 0.12956169247627258, "learning_rate": 0.0005, "loss": 2.1159, "step": 112320 }, { "epoch": 0.42755570442209756, "grad_norm": 0.14513267576694489, "learning_rate": 0.0005, "loss": 2.1155, "step": 112330 }, { "epoch": 0.4275937668902202, "grad_norm": 0.12880150973796844, "learning_rate": 0.0005, "loss": 2.1237, "step": 112340 }, { "epoch": 0.42763182935834293, "grad_norm": 0.1391003578901291, "learning_rate": 0.0005, "loss": 2.109, "step": 112350 }, { "epoch": 0.4276698918264656, "grad_norm": 0.12618103623390198, "learning_rate": 0.0005, "loss": 2.1071, "step": 112360 }, { "epoch": 0.4277079542945883, "grad_norm": 0.1251031756401062, "learning_rate": 0.0005, "loss": 2.1225, "step": 112370 }, { "epoch": 0.42774601676271096, "grad_norm": 0.11405282467603683, "learning_rate": 0.0005, "loss": 2.1172, "step": 112380 }, { "epoch": 0.42778407923083367, "grad_norm": 0.13576087355613708, "learning_rate": 0.0005, "loss": 2.1171, "step": 112390 }, { "epoch": 0.4278221416989563, "grad_norm": 0.1274125576019287, "learning_rate": 0.0005, "loss": 2.1093, "step": 112400 }, { "epoch": 0.427860204167079, "grad_norm": 0.11096604913473129, "learning_rate": 0.0005, "loss": 2.1178, "step": 112410 }, { "epoch": 0.4278982666352017, "grad_norm": 0.12114623188972473, "learning_rate": 0.0005, "loss": 2.0998, "step": 112420 }, { "epoch": 0.42793632910332435, "grad_norm": 0.13127700984477997, "learning_rate": 0.0005, "loss": 2.1073, "step": 112430 }, { "epoch": 0.42797439157144707, "grad_norm": 0.1115710511803627, "learning_rate": 0.0005, "loss": 2.1098, "step": 112440 }, { "epoch": 0.4280124540395697, "grad_norm": 0.12019863724708557, "learning_rate": 0.0005, "loss": 2.1112, "step": 112450 }, { "epoch": 0.42805051650769244, "grad_norm": 0.12405234575271606, "learning_rate": 0.0005, "loss": 2.1202, "step": 112460 }, { "epoch": 0.4280885789758151, "grad_norm": 0.11588817089796066, "learning_rate": 0.0005, "loss": 2.0985, "step": 112470 }, { "epoch": 0.4281266414439378, "grad_norm": 0.14037273824214935, "learning_rate": 0.0005, "loss": 2.1439, "step": 112480 }, { "epoch": 0.42816470391206046, "grad_norm": 0.14530320465564728, "learning_rate": 0.0005, "loss": 2.1195, "step": 112490 }, { "epoch": 0.4282027663801832, "grad_norm": 0.11581036448478699, "learning_rate": 0.0005, "loss": 2.1209, "step": 112500 }, { "epoch": 0.42824082884830583, "grad_norm": 0.12013707309961319, "learning_rate": 0.0005, "loss": 2.1192, "step": 112510 }, { "epoch": 0.42827889131642854, "grad_norm": 0.1147102639079094, "learning_rate": 0.0005, "loss": 2.1311, "step": 112520 }, { "epoch": 0.4283169537845512, "grad_norm": 0.1331748068332672, "learning_rate": 0.0005, "loss": 2.1158, "step": 112530 }, { "epoch": 0.4283550162526739, "grad_norm": 0.13831809163093567, "learning_rate": 0.0005, "loss": 2.1336, "step": 112540 }, { "epoch": 0.42839307872079657, "grad_norm": 0.12723985314369202, "learning_rate": 0.0005, "loss": 2.1175, "step": 112550 }, { "epoch": 0.42843114118891923, "grad_norm": 0.13498298823833466, "learning_rate": 0.0005, "loss": 2.1125, "step": 112560 }, { "epoch": 0.42846920365704194, "grad_norm": 0.11712668091058731, "learning_rate": 0.0005, "loss": 2.1157, "step": 112570 }, { "epoch": 0.4285072661251646, "grad_norm": 0.12820938229560852, "learning_rate": 0.0005, "loss": 2.1044, "step": 112580 }, { "epoch": 0.4285453285932873, "grad_norm": 0.14569640159606934, "learning_rate": 0.0005, "loss": 2.1162, "step": 112590 }, { "epoch": 0.42858339106140997, "grad_norm": 0.13953521847724915, "learning_rate": 0.0005, "loss": 2.1191, "step": 112600 }, { "epoch": 0.4286214535295327, "grad_norm": 0.1234491690993309, "learning_rate": 0.0005, "loss": 2.1306, "step": 112610 }, { "epoch": 0.42865951599765534, "grad_norm": 0.1317553073167801, "learning_rate": 0.0005, "loss": 2.1137, "step": 112620 }, { "epoch": 0.42869757846577805, "grad_norm": 0.11450919508934021, "learning_rate": 0.0005, "loss": 2.1219, "step": 112630 }, { "epoch": 0.4287356409339007, "grad_norm": 0.12273668497800827, "learning_rate": 0.0005, "loss": 2.1268, "step": 112640 }, { "epoch": 0.4287737034020234, "grad_norm": 0.11965305358171463, "learning_rate": 0.0005, "loss": 2.1318, "step": 112650 }, { "epoch": 0.4288117658701461, "grad_norm": 0.12610210478305817, "learning_rate": 0.0005, "loss": 2.121, "step": 112660 }, { "epoch": 0.4288498283382688, "grad_norm": 0.1309218555688858, "learning_rate": 0.0005, "loss": 2.1199, "step": 112670 }, { "epoch": 0.42888789080639145, "grad_norm": 0.3877588212490082, "learning_rate": 0.0005, "loss": 2.1317, "step": 112680 }, { "epoch": 0.42892595327451416, "grad_norm": 0.1310279667377472, "learning_rate": 0.0005, "loss": 2.12, "step": 112690 }, { "epoch": 0.4289640157426368, "grad_norm": 0.12395985424518585, "learning_rate": 0.0005, "loss": 2.1142, "step": 112700 }, { "epoch": 0.4290020782107595, "grad_norm": 0.11569735407829285, "learning_rate": 0.0005, "loss": 2.1163, "step": 112710 }, { "epoch": 0.4290401406788822, "grad_norm": 0.11640530824661255, "learning_rate": 0.0005, "loss": 2.1127, "step": 112720 }, { "epoch": 0.42907820314700484, "grad_norm": 0.1238589659333229, "learning_rate": 0.0005, "loss": 2.1188, "step": 112730 }, { "epoch": 0.42911626561512756, "grad_norm": 0.12123946845531464, "learning_rate": 0.0005, "loss": 2.1361, "step": 112740 }, { "epoch": 0.4291543280832502, "grad_norm": 0.13304486870765686, "learning_rate": 0.0005, "loss": 2.1217, "step": 112750 }, { "epoch": 0.4291923905513729, "grad_norm": 0.1193198636174202, "learning_rate": 0.0005, "loss": 2.1248, "step": 112760 }, { "epoch": 0.4292304530194956, "grad_norm": 0.1310795098543167, "learning_rate": 0.0005, "loss": 2.1046, "step": 112770 }, { "epoch": 0.4292685154876183, "grad_norm": 0.13365091383457184, "learning_rate": 0.0005, "loss": 2.1252, "step": 112780 }, { "epoch": 0.42930657795574095, "grad_norm": 0.12833775579929352, "learning_rate": 0.0005, "loss": 2.1122, "step": 112790 }, { "epoch": 0.42934464042386367, "grad_norm": 0.14498308300971985, "learning_rate": 0.0005, "loss": 2.1202, "step": 112800 }, { "epoch": 0.4293827028919863, "grad_norm": 0.12884864211082458, "learning_rate": 0.0005, "loss": 2.1149, "step": 112810 }, { "epoch": 0.42942076536010904, "grad_norm": 0.1267465502023697, "learning_rate": 0.0005, "loss": 2.1194, "step": 112820 }, { "epoch": 0.4294588278282317, "grad_norm": 0.12270502001047134, "learning_rate": 0.0005, "loss": 2.1227, "step": 112830 }, { "epoch": 0.42949689029635435, "grad_norm": 0.13606403768062592, "learning_rate": 0.0005, "loss": 2.1188, "step": 112840 }, { "epoch": 0.42953495276447706, "grad_norm": 0.12270118296146393, "learning_rate": 0.0005, "loss": 2.1091, "step": 112850 }, { "epoch": 0.4295730152325997, "grad_norm": 0.14218850433826447, "learning_rate": 0.0005, "loss": 2.1172, "step": 112860 }, { "epoch": 0.42961107770072243, "grad_norm": 0.12145689129829407, "learning_rate": 0.0005, "loss": 2.123, "step": 112870 }, { "epoch": 0.4296491401688451, "grad_norm": 0.12895521521568298, "learning_rate": 0.0005, "loss": 2.1063, "step": 112880 }, { "epoch": 0.4296872026369678, "grad_norm": 0.12357887625694275, "learning_rate": 0.0005, "loss": 2.1219, "step": 112890 }, { "epoch": 0.42972526510509046, "grad_norm": 0.13553652167320251, "learning_rate": 0.0005, "loss": 2.1201, "step": 112900 }, { "epoch": 0.42976332757321317, "grad_norm": 0.12917789816856384, "learning_rate": 0.0005, "loss": 2.1267, "step": 112910 }, { "epoch": 0.42980139004133583, "grad_norm": 0.11609556525945663, "learning_rate": 0.0005, "loss": 2.1171, "step": 112920 }, { "epoch": 0.42983945250945854, "grad_norm": 0.1176832914352417, "learning_rate": 0.0005, "loss": 2.1132, "step": 112930 }, { "epoch": 0.4298775149775812, "grad_norm": 0.1269991546869278, "learning_rate": 0.0005, "loss": 2.0931, "step": 112940 }, { "epoch": 0.4299155774457039, "grad_norm": 0.13056680560112, "learning_rate": 0.0005, "loss": 2.1066, "step": 112950 }, { "epoch": 0.42995363991382657, "grad_norm": 0.15302568674087524, "learning_rate": 0.0005, "loss": 2.1131, "step": 112960 }, { "epoch": 0.4299917023819493, "grad_norm": 0.1258421242237091, "learning_rate": 0.0005, "loss": 2.1114, "step": 112970 }, { "epoch": 0.43002976485007194, "grad_norm": 0.1366618275642395, "learning_rate": 0.0005, "loss": 2.1206, "step": 112980 }, { "epoch": 0.4300678273181946, "grad_norm": 0.15595024824142456, "learning_rate": 0.0005, "loss": 2.1072, "step": 112990 }, { "epoch": 0.4301058897863173, "grad_norm": 0.12370587140321732, "learning_rate": 0.0005, "loss": 2.1151, "step": 113000 }, { "epoch": 0.43014395225443997, "grad_norm": 0.12987452745437622, "learning_rate": 0.0005, "loss": 2.1218, "step": 113010 }, { "epoch": 0.4301820147225627, "grad_norm": 0.11600293964147568, "learning_rate": 0.0005, "loss": 2.1189, "step": 113020 }, { "epoch": 0.43022007719068533, "grad_norm": 0.13424547016620636, "learning_rate": 0.0005, "loss": 2.1171, "step": 113030 }, { "epoch": 0.43025813965880805, "grad_norm": 0.1201629564166069, "learning_rate": 0.0005, "loss": 2.1169, "step": 113040 }, { "epoch": 0.4302962021269307, "grad_norm": 0.12029099464416504, "learning_rate": 0.0005, "loss": 2.1179, "step": 113050 }, { "epoch": 0.4303342645950534, "grad_norm": 0.11914312094449997, "learning_rate": 0.0005, "loss": 2.1301, "step": 113060 }, { "epoch": 0.4303723270631761, "grad_norm": 0.12347181886434555, "learning_rate": 0.0005, "loss": 2.1062, "step": 113070 }, { "epoch": 0.4304103895312988, "grad_norm": 0.12621614336967468, "learning_rate": 0.0005, "loss": 2.1158, "step": 113080 }, { "epoch": 0.43044845199942144, "grad_norm": 0.12609432637691498, "learning_rate": 0.0005, "loss": 2.1233, "step": 113090 }, { "epoch": 0.43048651446754416, "grad_norm": 0.13581635057926178, "learning_rate": 0.0005, "loss": 2.1251, "step": 113100 }, { "epoch": 0.4305245769356668, "grad_norm": 0.12690937519073486, "learning_rate": 0.0005, "loss": 2.1069, "step": 113110 }, { "epoch": 0.4305626394037895, "grad_norm": 0.1215238943696022, "learning_rate": 0.0005, "loss": 2.0967, "step": 113120 }, { "epoch": 0.4306007018719122, "grad_norm": 0.12563979625701904, "learning_rate": 0.0005, "loss": 2.1141, "step": 113130 }, { "epoch": 0.43063876434003484, "grad_norm": 0.11845796555280685, "learning_rate": 0.0005, "loss": 2.121, "step": 113140 }, { "epoch": 0.43067682680815755, "grad_norm": 0.11353427916765213, "learning_rate": 0.0005, "loss": 2.1135, "step": 113150 }, { "epoch": 0.4307148892762802, "grad_norm": 0.1303396373987198, "learning_rate": 0.0005, "loss": 2.1197, "step": 113160 }, { "epoch": 0.4307529517444029, "grad_norm": 0.1315041482448578, "learning_rate": 0.0005, "loss": 2.1232, "step": 113170 }, { "epoch": 0.4307910142125256, "grad_norm": 0.12611375749111176, "learning_rate": 0.0005, "loss": 2.127, "step": 113180 }, { "epoch": 0.4308290766806483, "grad_norm": 0.12498685717582703, "learning_rate": 0.0005, "loss": 2.1123, "step": 113190 }, { "epoch": 0.43086713914877095, "grad_norm": 0.12216173857450485, "learning_rate": 0.0005, "loss": 2.1165, "step": 113200 }, { "epoch": 0.43090520161689366, "grad_norm": 0.1408940851688385, "learning_rate": 0.0005, "loss": 2.111, "step": 113210 }, { "epoch": 0.4309432640850163, "grad_norm": 0.1221887543797493, "learning_rate": 0.0005, "loss": 2.1318, "step": 113220 }, { "epoch": 0.43098132655313903, "grad_norm": 0.1298237144947052, "learning_rate": 0.0005, "loss": 2.1269, "step": 113230 }, { "epoch": 0.4310193890212617, "grad_norm": 0.13541021943092346, "learning_rate": 0.0005, "loss": 2.109, "step": 113240 }, { "epoch": 0.4310574514893844, "grad_norm": 0.13242541253566742, "learning_rate": 0.0005, "loss": 2.1083, "step": 113250 }, { "epoch": 0.43109551395750706, "grad_norm": 0.161054328083992, "learning_rate": 0.0005, "loss": 2.1138, "step": 113260 }, { "epoch": 0.4311335764256297, "grad_norm": 0.12451450526714325, "learning_rate": 0.0005, "loss": 2.1023, "step": 113270 }, { "epoch": 0.43117163889375243, "grad_norm": 0.1456080824136734, "learning_rate": 0.0005, "loss": 2.1226, "step": 113280 }, { "epoch": 0.4312097013618751, "grad_norm": 0.13522973656654358, "learning_rate": 0.0005, "loss": 2.1248, "step": 113290 }, { "epoch": 0.4312477638299978, "grad_norm": 0.13681624829769135, "learning_rate": 0.0005, "loss": 2.1215, "step": 113300 }, { "epoch": 0.43128582629812046, "grad_norm": 0.12967310845851898, "learning_rate": 0.0005, "loss": 2.1189, "step": 113310 }, { "epoch": 0.43132388876624317, "grad_norm": 0.1302393525838852, "learning_rate": 0.0005, "loss": 2.1184, "step": 113320 }, { "epoch": 0.4313619512343658, "grad_norm": 0.12448181957006454, "learning_rate": 0.0005, "loss": 2.1159, "step": 113330 }, { "epoch": 0.43140001370248854, "grad_norm": 0.12567047774791718, "learning_rate": 0.0005, "loss": 2.1466, "step": 113340 }, { "epoch": 0.4314380761706112, "grad_norm": 0.13223430514335632, "learning_rate": 0.0005, "loss": 2.113, "step": 113350 }, { "epoch": 0.4314761386387339, "grad_norm": 0.12238463014364243, "learning_rate": 0.0005, "loss": 2.1324, "step": 113360 }, { "epoch": 0.43151420110685657, "grad_norm": 0.1190914511680603, "learning_rate": 0.0005, "loss": 2.1074, "step": 113370 }, { "epoch": 0.4315522635749793, "grad_norm": 0.13166844844818115, "learning_rate": 0.0005, "loss": 2.1063, "step": 113380 }, { "epoch": 0.43159032604310194, "grad_norm": 0.12852655351161957, "learning_rate": 0.0005, "loss": 2.1305, "step": 113390 }, { "epoch": 0.43162838851122465, "grad_norm": 0.11806194484233856, "learning_rate": 0.0005, "loss": 2.1081, "step": 113400 }, { "epoch": 0.4316664509793473, "grad_norm": 0.12974756956100464, "learning_rate": 0.0005, "loss": 2.1191, "step": 113410 }, { "epoch": 0.43170451344746996, "grad_norm": 0.1320747286081314, "learning_rate": 0.0005, "loss": 2.121, "step": 113420 }, { "epoch": 0.4317425759155927, "grad_norm": 0.11793147027492523, "learning_rate": 0.0005, "loss": 2.1249, "step": 113430 }, { "epoch": 0.43178063838371533, "grad_norm": 0.12105303257703781, "learning_rate": 0.0005, "loss": 2.1231, "step": 113440 }, { "epoch": 0.43181870085183804, "grad_norm": 0.1268729716539383, "learning_rate": 0.0005, "loss": 2.1248, "step": 113450 }, { "epoch": 0.4318567633199607, "grad_norm": 0.1271156668663025, "learning_rate": 0.0005, "loss": 2.1208, "step": 113460 }, { "epoch": 0.4318948257880834, "grad_norm": 0.13248351216316223, "learning_rate": 0.0005, "loss": 2.1137, "step": 113470 }, { "epoch": 0.43193288825620607, "grad_norm": 0.13424064218997955, "learning_rate": 0.0005, "loss": 2.1279, "step": 113480 }, { "epoch": 0.4319709507243288, "grad_norm": 0.13179951906204224, "learning_rate": 0.0005, "loss": 2.1193, "step": 113490 }, { "epoch": 0.43200901319245144, "grad_norm": 0.11430752277374268, "learning_rate": 0.0005, "loss": 2.1106, "step": 113500 }, { "epoch": 0.43204707566057415, "grad_norm": 0.12928007543087006, "learning_rate": 0.0005, "loss": 2.1199, "step": 113510 }, { "epoch": 0.4320851381286968, "grad_norm": 0.13041894137859344, "learning_rate": 0.0005, "loss": 2.1156, "step": 113520 }, { "epoch": 0.4321232005968195, "grad_norm": 0.12620759010314941, "learning_rate": 0.0005, "loss": 2.1174, "step": 113530 }, { "epoch": 0.4321612630649422, "grad_norm": 0.12515173852443695, "learning_rate": 0.0005, "loss": 2.1201, "step": 113540 }, { "epoch": 0.4321993255330649, "grad_norm": 0.11779354512691498, "learning_rate": 0.0005, "loss": 2.124, "step": 113550 }, { "epoch": 0.43223738800118755, "grad_norm": 0.11467306315898895, "learning_rate": 0.0005, "loss": 2.1075, "step": 113560 }, { "epoch": 0.4322754504693102, "grad_norm": 0.13338513672351837, "learning_rate": 0.0005, "loss": 2.1163, "step": 113570 }, { "epoch": 0.4323135129374329, "grad_norm": 0.11753935366868973, "learning_rate": 0.0005, "loss": 2.1233, "step": 113580 }, { "epoch": 0.4323515754055556, "grad_norm": 0.11876005679368973, "learning_rate": 0.0005, "loss": 2.1075, "step": 113590 }, { "epoch": 0.4323896378736783, "grad_norm": 0.14191961288452148, "learning_rate": 0.0005, "loss": 2.1121, "step": 113600 }, { "epoch": 0.43242770034180095, "grad_norm": 0.12320465594530106, "learning_rate": 0.0005, "loss": 2.1214, "step": 113610 }, { "epoch": 0.43246576280992366, "grad_norm": 0.12368667870759964, "learning_rate": 0.0005, "loss": 2.1218, "step": 113620 }, { "epoch": 0.4325038252780463, "grad_norm": 0.1309211701154709, "learning_rate": 0.0005, "loss": 2.1147, "step": 113630 }, { "epoch": 0.43254188774616903, "grad_norm": 0.13724461197853088, "learning_rate": 0.0005, "loss": 2.12, "step": 113640 }, { "epoch": 0.4325799502142917, "grad_norm": 0.12140762060880661, "learning_rate": 0.0005, "loss": 2.1086, "step": 113650 }, { "epoch": 0.4326180126824144, "grad_norm": 0.12964437901973724, "learning_rate": 0.0005, "loss": 2.1126, "step": 113660 }, { "epoch": 0.43265607515053706, "grad_norm": 0.14219939708709717, "learning_rate": 0.0005, "loss": 2.1234, "step": 113670 }, { "epoch": 0.43269413761865977, "grad_norm": 0.1289874166250229, "learning_rate": 0.0005, "loss": 2.1091, "step": 113680 }, { "epoch": 0.4327322000867824, "grad_norm": 0.12109565734863281, "learning_rate": 0.0005, "loss": 2.1271, "step": 113690 }, { "epoch": 0.4327702625549051, "grad_norm": 0.12907147407531738, "learning_rate": 0.0005, "loss": 2.1081, "step": 113700 }, { "epoch": 0.4328083250230278, "grad_norm": 0.12966330349445343, "learning_rate": 0.0005, "loss": 2.1166, "step": 113710 }, { "epoch": 0.43284638749115045, "grad_norm": 0.1305544078350067, "learning_rate": 0.0005, "loss": 2.123, "step": 113720 }, { "epoch": 0.43288444995927317, "grad_norm": 0.1428816020488739, "learning_rate": 0.0005, "loss": 2.1285, "step": 113730 }, { "epoch": 0.4329225124273958, "grad_norm": 0.12114094942808151, "learning_rate": 0.0005, "loss": 2.1118, "step": 113740 }, { "epoch": 0.43296057489551854, "grad_norm": 0.14122043550014496, "learning_rate": 0.0005, "loss": 2.1163, "step": 113750 }, { "epoch": 0.4329986373636412, "grad_norm": 0.13917775452136993, "learning_rate": 0.0005, "loss": 2.1081, "step": 113760 }, { "epoch": 0.4330366998317639, "grad_norm": 0.1277030110359192, "learning_rate": 0.0005, "loss": 2.1271, "step": 113770 }, { "epoch": 0.43307476229988656, "grad_norm": 0.14055971801280975, "learning_rate": 0.0005, "loss": 2.1207, "step": 113780 }, { "epoch": 0.4331128247680093, "grad_norm": 0.1363455355167389, "learning_rate": 0.0005, "loss": 2.1239, "step": 113790 }, { "epoch": 0.43315088723613193, "grad_norm": 0.12260470539331436, "learning_rate": 0.0005, "loss": 2.1317, "step": 113800 }, { "epoch": 0.43318894970425464, "grad_norm": 0.13288481533527374, "learning_rate": 0.0005, "loss": 2.1418, "step": 113810 }, { "epoch": 0.4332270121723773, "grad_norm": 0.1359279304742813, "learning_rate": 0.0005, "loss": 2.1281, "step": 113820 }, { "epoch": 0.4332650746405, "grad_norm": 0.14037518203258514, "learning_rate": 0.0005, "loss": 2.1313, "step": 113830 }, { "epoch": 0.43330313710862267, "grad_norm": 0.1292182356119156, "learning_rate": 0.0005, "loss": 2.1066, "step": 113840 }, { "epoch": 0.43334119957674533, "grad_norm": 0.12281746417284012, "learning_rate": 0.0005, "loss": 2.1032, "step": 113850 }, { "epoch": 0.43337926204486804, "grad_norm": 0.12527795135974884, "learning_rate": 0.0005, "loss": 2.1182, "step": 113860 }, { "epoch": 0.4334173245129907, "grad_norm": 0.12615898251533508, "learning_rate": 0.0005, "loss": 2.1173, "step": 113870 }, { "epoch": 0.4334553869811134, "grad_norm": 0.13604030013084412, "learning_rate": 0.0005, "loss": 2.1045, "step": 113880 }, { "epoch": 0.43349344944923607, "grad_norm": 0.12271784245967865, "learning_rate": 0.0005, "loss": 2.118, "step": 113890 }, { "epoch": 0.4335315119173588, "grad_norm": 0.12712498009204865, "learning_rate": 0.0005, "loss": 2.116, "step": 113900 }, { "epoch": 0.43356957438548144, "grad_norm": 0.11638306826353073, "learning_rate": 0.0005, "loss": 2.1038, "step": 113910 }, { "epoch": 0.43360763685360415, "grad_norm": 0.12177691608667374, "learning_rate": 0.0005, "loss": 2.134, "step": 113920 }, { "epoch": 0.4336456993217268, "grad_norm": 0.13364951312541962, "learning_rate": 0.0005, "loss": 2.1112, "step": 113930 }, { "epoch": 0.4336837617898495, "grad_norm": 0.12290096282958984, "learning_rate": 0.0005, "loss": 2.1144, "step": 113940 }, { "epoch": 0.4337218242579722, "grad_norm": 0.12592563033103943, "learning_rate": 0.0005, "loss": 2.0961, "step": 113950 }, { "epoch": 0.4337598867260949, "grad_norm": 0.12498262524604797, "learning_rate": 0.0005, "loss": 2.1453, "step": 113960 }, { "epoch": 0.43379794919421755, "grad_norm": 0.15119707584381104, "learning_rate": 0.0005, "loss": 2.1214, "step": 113970 }, { "epoch": 0.43383601166234026, "grad_norm": 0.13209018111228943, "learning_rate": 0.0005, "loss": 2.112, "step": 113980 }, { "epoch": 0.4338740741304629, "grad_norm": 0.13218539953231812, "learning_rate": 0.0005, "loss": 2.1164, "step": 113990 }, { "epoch": 0.4339121365985856, "grad_norm": 0.1241869181394577, "learning_rate": 0.0005, "loss": 2.1146, "step": 114000 }, { "epoch": 0.4339501990667083, "grad_norm": 0.12801161408424377, "learning_rate": 0.0005, "loss": 2.1082, "step": 114010 }, { "epoch": 0.43398826153483094, "grad_norm": 0.1200789138674736, "learning_rate": 0.0005, "loss": 2.1164, "step": 114020 }, { "epoch": 0.43402632400295366, "grad_norm": 0.12127118557691574, "learning_rate": 0.0005, "loss": 2.12, "step": 114030 }, { "epoch": 0.4340643864710763, "grad_norm": 0.1316060721874237, "learning_rate": 0.0005, "loss": 2.105, "step": 114040 }, { "epoch": 0.434102448939199, "grad_norm": 0.137965127825737, "learning_rate": 0.0005, "loss": 2.0959, "step": 114050 }, { "epoch": 0.4341405114073217, "grad_norm": 0.12087521702051163, "learning_rate": 0.0005, "loss": 2.1275, "step": 114060 }, { "epoch": 0.4341785738754444, "grad_norm": 0.14593859016895294, "learning_rate": 0.0005, "loss": 2.1211, "step": 114070 }, { "epoch": 0.43421663634356705, "grad_norm": 0.12967590987682343, "learning_rate": 0.0005, "loss": 2.1149, "step": 114080 }, { "epoch": 0.43425469881168977, "grad_norm": 0.1307557374238968, "learning_rate": 0.0005, "loss": 2.1303, "step": 114090 }, { "epoch": 0.4342927612798124, "grad_norm": 0.1126394271850586, "learning_rate": 0.0005, "loss": 2.1062, "step": 114100 }, { "epoch": 0.43433082374793514, "grad_norm": 0.13772854208946228, "learning_rate": 0.0005, "loss": 2.1137, "step": 114110 }, { "epoch": 0.4343688862160578, "grad_norm": 0.11546389758586884, "learning_rate": 0.0005, "loss": 2.1124, "step": 114120 }, { "epoch": 0.43440694868418045, "grad_norm": 0.12694381177425385, "learning_rate": 0.0005, "loss": 2.1272, "step": 114130 }, { "epoch": 0.43444501115230316, "grad_norm": 0.1779816597700119, "learning_rate": 0.0005, "loss": 2.1264, "step": 114140 }, { "epoch": 0.4344830736204258, "grad_norm": 0.13322043418884277, "learning_rate": 0.0005, "loss": 2.1215, "step": 114150 }, { "epoch": 0.43452113608854853, "grad_norm": 0.13278986513614655, "learning_rate": 0.0005, "loss": 2.1035, "step": 114160 }, { "epoch": 0.4345591985566712, "grad_norm": 0.13025440275669098, "learning_rate": 0.0005, "loss": 2.108, "step": 114170 }, { "epoch": 0.4345972610247939, "grad_norm": 0.12454594671726227, "learning_rate": 0.0005, "loss": 2.1138, "step": 114180 }, { "epoch": 0.43463532349291656, "grad_norm": 0.12686704099178314, "learning_rate": 0.0005, "loss": 2.1168, "step": 114190 }, { "epoch": 0.43467338596103927, "grad_norm": 0.11205738037824631, "learning_rate": 0.0005, "loss": 2.1149, "step": 114200 }, { "epoch": 0.43471144842916193, "grad_norm": 0.1268397867679596, "learning_rate": 0.0005, "loss": 2.1242, "step": 114210 }, { "epoch": 0.43474951089728464, "grad_norm": 0.1218252032995224, "learning_rate": 0.0005, "loss": 2.1292, "step": 114220 }, { "epoch": 0.4347875733654073, "grad_norm": 0.12137608975172043, "learning_rate": 0.0005, "loss": 2.1299, "step": 114230 }, { "epoch": 0.43482563583353, "grad_norm": 0.11886157840490341, "learning_rate": 0.0005, "loss": 2.1232, "step": 114240 }, { "epoch": 0.43486369830165267, "grad_norm": 0.11996449530124664, "learning_rate": 0.0005, "loss": 2.1274, "step": 114250 }, { "epoch": 0.4349017607697754, "grad_norm": 0.13811056315898895, "learning_rate": 0.0005, "loss": 2.1339, "step": 114260 }, { "epoch": 0.43493982323789804, "grad_norm": 0.11954815685749054, "learning_rate": 0.0005, "loss": 2.1085, "step": 114270 }, { "epoch": 0.4349778857060207, "grad_norm": 0.1311139613389969, "learning_rate": 0.0005, "loss": 2.1256, "step": 114280 }, { "epoch": 0.4350159481741434, "grad_norm": 0.11974254250526428, "learning_rate": 0.0005, "loss": 2.1179, "step": 114290 }, { "epoch": 0.43505401064226606, "grad_norm": 0.13784751296043396, "learning_rate": 0.0005, "loss": 2.1279, "step": 114300 }, { "epoch": 0.4350920731103888, "grad_norm": 0.13397420942783356, "learning_rate": 0.0005, "loss": 2.1255, "step": 114310 }, { "epoch": 0.43513013557851143, "grad_norm": 0.11783301830291748, "learning_rate": 0.0005, "loss": 2.1146, "step": 114320 }, { "epoch": 0.43516819804663415, "grad_norm": 0.12785419821739197, "learning_rate": 0.0005, "loss": 2.1155, "step": 114330 }, { "epoch": 0.4352062605147568, "grad_norm": 0.1297289878129959, "learning_rate": 0.0005, "loss": 2.1117, "step": 114340 }, { "epoch": 0.4352443229828795, "grad_norm": 0.1216789036989212, "learning_rate": 0.0005, "loss": 2.1188, "step": 114350 }, { "epoch": 0.4352823854510022, "grad_norm": 0.1163412556052208, "learning_rate": 0.0005, "loss": 2.1102, "step": 114360 }, { "epoch": 0.4353204479191249, "grad_norm": 0.13293077051639557, "learning_rate": 0.0005, "loss": 2.12, "step": 114370 }, { "epoch": 0.43535851038724754, "grad_norm": 0.12136878818273544, "learning_rate": 0.0005, "loss": 2.117, "step": 114380 }, { "epoch": 0.43539657285537026, "grad_norm": 0.1324099898338318, "learning_rate": 0.0005, "loss": 2.1429, "step": 114390 }, { "epoch": 0.4354346353234929, "grad_norm": 0.1286437064409256, "learning_rate": 0.0005, "loss": 2.0947, "step": 114400 }, { "epoch": 0.4354726977916156, "grad_norm": 0.13256339728832245, "learning_rate": 0.0005, "loss": 2.1104, "step": 114410 }, { "epoch": 0.4355107602597383, "grad_norm": 0.1252366006374359, "learning_rate": 0.0005, "loss": 2.1374, "step": 114420 }, { "epoch": 0.43554882272786094, "grad_norm": 0.13053369522094727, "learning_rate": 0.0005, "loss": 2.1299, "step": 114430 }, { "epoch": 0.43558688519598365, "grad_norm": 0.12626564502716064, "learning_rate": 0.0005, "loss": 2.1208, "step": 114440 }, { "epoch": 0.4356249476641063, "grad_norm": 0.12247197329998016, "learning_rate": 0.0005, "loss": 2.1122, "step": 114450 }, { "epoch": 0.435663010132229, "grad_norm": 0.115814208984375, "learning_rate": 0.0005, "loss": 2.1212, "step": 114460 }, { "epoch": 0.4357010726003517, "grad_norm": 0.12693926692008972, "learning_rate": 0.0005, "loss": 2.1309, "step": 114470 }, { "epoch": 0.4357391350684744, "grad_norm": 0.12500695884227753, "learning_rate": 0.0005, "loss": 2.1226, "step": 114480 }, { "epoch": 0.43577719753659705, "grad_norm": 0.1235467940568924, "learning_rate": 0.0005, "loss": 2.1328, "step": 114490 }, { "epoch": 0.43581526000471976, "grad_norm": 0.12184032797813416, "learning_rate": 0.0005, "loss": 2.1274, "step": 114500 }, { "epoch": 0.4358533224728424, "grad_norm": 0.12942011654376984, "learning_rate": 0.0005, "loss": 2.11, "step": 114510 }, { "epoch": 0.43589138494096513, "grad_norm": 0.13408392667770386, "learning_rate": 0.0005, "loss": 2.1249, "step": 114520 }, { "epoch": 0.4359294474090878, "grad_norm": 0.12868361175060272, "learning_rate": 0.0005, "loss": 2.1047, "step": 114530 }, { "epoch": 0.4359675098772105, "grad_norm": 0.13124553859233856, "learning_rate": 0.0005, "loss": 2.1108, "step": 114540 }, { "epoch": 0.43600557234533316, "grad_norm": 0.1267099529504776, "learning_rate": 0.0005, "loss": 2.1108, "step": 114550 }, { "epoch": 0.4360436348134558, "grad_norm": 0.12259211391210556, "learning_rate": 0.0005, "loss": 2.1125, "step": 114560 }, { "epoch": 0.43608169728157853, "grad_norm": 0.11855865269899368, "learning_rate": 0.0005, "loss": 2.1156, "step": 114570 }, { "epoch": 0.4361197597497012, "grad_norm": 0.12500205636024475, "learning_rate": 0.0005, "loss": 2.1115, "step": 114580 }, { "epoch": 0.4361578222178239, "grad_norm": 0.12759044766426086, "learning_rate": 0.0005, "loss": 2.134, "step": 114590 }, { "epoch": 0.43619588468594656, "grad_norm": 0.11896944046020508, "learning_rate": 0.0005, "loss": 2.1055, "step": 114600 }, { "epoch": 0.43623394715406927, "grad_norm": 0.12041134387254715, "learning_rate": 0.0005, "loss": 2.1157, "step": 114610 }, { "epoch": 0.4362720096221919, "grad_norm": 0.12240686267614365, "learning_rate": 0.0005, "loss": 2.1242, "step": 114620 }, { "epoch": 0.43631007209031464, "grad_norm": 0.11856778711080551, "learning_rate": 0.0005, "loss": 2.1174, "step": 114630 }, { "epoch": 0.4363481345584373, "grad_norm": 0.1220201849937439, "learning_rate": 0.0005, "loss": 2.1108, "step": 114640 }, { "epoch": 0.43638619702656, "grad_norm": 0.1374635100364685, "learning_rate": 0.0005, "loss": 2.1199, "step": 114650 }, { "epoch": 0.43642425949468266, "grad_norm": 0.13042598962783813, "learning_rate": 0.0005, "loss": 2.1136, "step": 114660 }, { "epoch": 0.4364623219628054, "grad_norm": 0.11881065368652344, "learning_rate": 0.0005, "loss": 2.1199, "step": 114670 }, { "epoch": 0.43650038443092803, "grad_norm": 0.12820273637771606, "learning_rate": 0.0005, "loss": 2.1145, "step": 114680 }, { "epoch": 0.43653844689905075, "grad_norm": 0.12528343498706818, "learning_rate": 0.0005, "loss": 2.1167, "step": 114690 }, { "epoch": 0.4365765093671734, "grad_norm": 0.12068533897399902, "learning_rate": 0.0005, "loss": 2.1071, "step": 114700 }, { "epoch": 0.43661457183529606, "grad_norm": 0.10928542166948318, "learning_rate": 0.0005, "loss": 2.126, "step": 114710 }, { "epoch": 0.4366526343034188, "grad_norm": 0.1172463670372963, "learning_rate": 0.0005, "loss": 2.114, "step": 114720 }, { "epoch": 0.43669069677154143, "grad_norm": 0.13667593896389008, "learning_rate": 0.0005, "loss": 2.1152, "step": 114730 }, { "epoch": 0.43672875923966414, "grad_norm": 0.12498161941766739, "learning_rate": 0.0005, "loss": 2.1059, "step": 114740 }, { "epoch": 0.4367668217077868, "grad_norm": 0.1113005205988884, "learning_rate": 0.0005, "loss": 2.1005, "step": 114750 }, { "epoch": 0.4368048841759095, "grad_norm": 0.13782349228858948, "learning_rate": 0.0005, "loss": 2.1282, "step": 114760 }, { "epoch": 0.43684294664403217, "grad_norm": 0.11374063789844513, "learning_rate": 0.0005, "loss": 2.0997, "step": 114770 }, { "epoch": 0.4368810091121549, "grad_norm": 0.114725261926651, "learning_rate": 0.0005, "loss": 2.13, "step": 114780 }, { "epoch": 0.43691907158027754, "grad_norm": 0.13262836635112762, "learning_rate": 0.0005, "loss": 2.097, "step": 114790 }, { "epoch": 0.43695713404840025, "grad_norm": 0.11711817979812622, "learning_rate": 0.0005, "loss": 2.1087, "step": 114800 }, { "epoch": 0.4369951965165229, "grad_norm": 0.30082806944847107, "learning_rate": 0.0005, "loss": 2.1271, "step": 114810 }, { "epoch": 0.4370332589846456, "grad_norm": 0.12366364896297455, "learning_rate": 0.0005, "loss": 2.1181, "step": 114820 }, { "epoch": 0.4370713214527683, "grad_norm": 0.13910606503486633, "learning_rate": 0.0005, "loss": 2.1067, "step": 114830 }, { "epoch": 0.437109383920891, "grad_norm": 0.1297772377729416, "learning_rate": 0.0005, "loss": 2.1211, "step": 114840 }, { "epoch": 0.43714744638901365, "grad_norm": 0.1312246471643448, "learning_rate": 0.0005, "loss": 2.1238, "step": 114850 }, { "epoch": 0.4371855088571363, "grad_norm": 0.12575647234916687, "learning_rate": 0.0005, "loss": 2.1178, "step": 114860 }, { "epoch": 0.437223571325259, "grad_norm": 0.11815674602985382, "learning_rate": 0.0005, "loss": 2.1144, "step": 114870 }, { "epoch": 0.4372616337933817, "grad_norm": 0.13090112805366516, "learning_rate": 0.0005, "loss": 2.1288, "step": 114880 }, { "epoch": 0.4372996962615044, "grad_norm": 0.1162065863609314, "learning_rate": 0.0005, "loss": 2.1189, "step": 114890 }, { "epoch": 0.43733775872962705, "grad_norm": 0.12324568629264832, "learning_rate": 0.0005, "loss": 2.1226, "step": 114900 }, { "epoch": 0.43737582119774976, "grad_norm": 0.11646682769060135, "learning_rate": 0.0005, "loss": 2.1184, "step": 114910 }, { "epoch": 0.4374138836658724, "grad_norm": 0.13061708211898804, "learning_rate": 0.0005, "loss": 2.1239, "step": 114920 }, { "epoch": 0.43745194613399513, "grad_norm": 0.138249471783638, "learning_rate": 0.0005, "loss": 2.1099, "step": 114930 }, { "epoch": 0.4374900086021178, "grad_norm": 0.11456823348999023, "learning_rate": 0.0005, "loss": 2.1143, "step": 114940 }, { "epoch": 0.4375280710702405, "grad_norm": 0.13056766986846924, "learning_rate": 0.0005, "loss": 2.1041, "step": 114950 }, { "epoch": 0.43756613353836316, "grad_norm": 0.1262996345758438, "learning_rate": 0.0005, "loss": 2.1296, "step": 114960 }, { "epoch": 0.43760419600648587, "grad_norm": 0.12462644279003143, "learning_rate": 0.0005, "loss": 2.1175, "step": 114970 }, { "epoch": 0.4376422584746085, "grad_norm": 0.11949781328439713, "learning_rate": 0.0005, "loss": 2.1124, "step": 114980 }, { "epoch": 0.43768032094273124, "grad_norm": 0.12507055699825287, "learning_rate": 0.0005, "loss": 2.1078, "step": 114990 }, { "epoch": 0.4377183834108539, "grad_norm": 0.13478222489356995, "learning_rate": 0.0005, "loss": 2.1233, "step": 115000 }, { "epoch": 0.43775644587897655, "grad_norm": 0.12094340473413467, "learning_rate": 0.0005, "loss": 2.1139, "step": 115010 }, { "epoch": 0.43779450834709926, "grad_norm": 0.12723763287067413, "learning_rate": 0.0005, "loss": 2.1139, "step": 115020 }, { "epoch": 0.4378325708152219, "grad_norm": 0.130209282040596, "learning_rate": 0.0005, "loss": 2.1099, "step": 115030 }, { "epoch": 0.43787063328334463, "grad_norm": 0.2890703082084656, "learning_rate": 0.0005, "loss": 2.1267, "step": 115040 }, { "epoch": 0.4379086957514673, "grad_norm": 0.13002605736255646, "learning_rate": 0.0005, "loss": 2.1305, "step": 115050 }, { "epoch": 0.43794675821959, "grad_norm": 0.1323857605457306, "learning_rate": 0.0005, "loss": 2.1082, "step": 115060 }, { "epoch": 0.43798482068771266, "grad_norm": 0.12515832483768463, "learning_rate": 0.0005, "loss": 2.1144, "step": 115070 }, { "epoch": 0.4380228831558354, "grad_norm": 0.12032574415206909, "learning_rate": 0.0005, "loss": 2.1208, "step": 115080 }, { "epoch": 0.43806094562395803, "grad_norm": 0.12321025133132935, "learning_rate": 0.0005, "loss": 2.131, "step": 115090 }, { "epoch": 0.43809900809208074, "grad_norm": 0.125, "learning_rate": 0.0005, "loss": 2.1165, "step": 115100 }, { "epoch": 0.4381370705602034, "grad_norm": 0.1492929458618164, "learning_rate": 0.0005, "loss": 2.1168, "step": 115110 }, { "epoch": 0.4381751330283261, "grad_norm": 0.12361513078212738, "learning_rate": 0.0005, "loss": 2.1197, "step": 115120 }, { "epoch": 0.43821319549644877, "grad_norm": 0.1414598673582077, "learning_rate": 0.0005, "loss": 2.1187, "step": 115130 }, { "epoch": 0.43825125796457143, "grad_norm": 0.12446663528680801, "learning_rate": 0.0005, "loss": 2.1206, "step": 115140 }, { "epoch": 0.43828932043269414, "grad_norm": 0.13060636818408966, "learning_rate": 0.0005, "loss": 2.1106, "step": 115150 }, { "epoch": 0.4383273829008168, "grad_norm": 0.12709937989711761, "learning_rate": 0.0005, "loss": 2.1212, "step": 115160 }, { "epoch": 0.4383654453689395, "grad_norm": 0.12051993608474731, "learning_rate": 0.0005, "loss": 2.1094, "step": 115170 }, { "epoch": 0.43840350783706217, "grad_norm": 0.12320751696825027, "learning_rate": 0.0005, "loss": 2.1233, "step": 115180 }, { "epoch": 0.4384415703051849, "grad_norm": 0.12034042924642563, "learning_rate": 0.0005, "loss": 2.1327, "step": 115190 }, { "epoch": 0.43847963277330754, "grad_norm": 0.11698071658611298, "learning_rate": 0.0005, "loss": 2.1231, "step": 115200 }, { "epoch": 0.43851769524143025, "grad_norm": 0.12866592407226562, "learning_rate": 0.0005, "loss": 2.1111, "step": 115210 }, { "epoch": 0.4385557577095529, "grad_norm": 0.13302206993103027, "learning_rate": 0.0005, "loss": 2.1309, "step": 115220 }, { "epoch": 0.4385938201776756, "grad_norm": 0.13836079835891724, "learning_rate": 0.0005, "loss": 2.14, "step": 115230 }, { "epoch": 0.4386318826457983, "grad_norm": 0.11796905100345612, "learning_rate": 0.0005, "loss": 2.1168, "step": 115240 }, { "epoch": 0.438669945113921, "grad_norm": 0.12501636147499084, "learning_rate": 0.0005, "loss": 2.1118, "step": 115250 }, { "epoch": 0.43870800758204365, "grad_norm": 0.13494443893432617, "learning_rate": 0.0005, "loss": 2.1177, "step": 115260 }, { "epoch": 0.43874607005016636, "grad_norm": 0.12201546132564545, "learning_rate": 0.0005, "loss": 2.1183, "step": 115270 }, { "epoch": 0.438784132518289, "grad_norm": 0.12451411038637161, "learning_rate": 0.0005, "loss": 2.1023, "step": 115280 }, { "epoch": 0.4388221949864117, "grad_norm": 0.12425916641950607, "learning_rate": 0.0005, "loss": 2.1276, "step": 115290 }, { "epoch": 0.4388602574545344, "grad_norm": 0.12091124802827835, "learning_rate": 0.0005, "loss": 2.1185, "step": 115300 }, { "epoch": 0.43889831992265704, "grad_norm": 0.13333760201931, "learning_rate": 0.0005, "loss": 2.1413, "step": 115310 }, { "epoch": 0.43893638239077976, "grad_norm": 0.13249099254608154, "learning_rate": 0.0005, "loss": 2.1208, "step": 115320 }, { "epoch": 0.4389744448589024, "grad_norm": 0.11971089243888855, "learning_rate": 0.0005, "loss": 2.1222, "step": 115330 }, { "epoch": 0.4390125073270251, "grad_norm": 0.12167433649301529, "learning_rate": 0.0005, "loss": 2.0983, "step": 115340 }, { "epoch": 0.4390505697951478, "grad_norm": 0.12018298357725143, "learning_rate": 0.0005, "loss": 2.1143, "step": 115350 }, { "epoch": 0.4390886322632705, "grad_norm": 0.13596774637699127, "learning_rate": 0.0005, "loss": 2.1147, "step": 115360 }, { "epoch": 0.43912669473139315, "grad_norm": 0.11545957624912262, "learning_rate": 0.0005, "loss": 2.1218, "step": 115370 }, { "epoch": 0.43916475719951586, "grad_norm": 0.11990311741828918, "learning_rate": 0.0005, "loss": 2.119, "step": 115380 }, { "epoch": 0.4392028196676385, "grad_norm": 0.1435178518295288, "learning_rate": 0.0005, "loss": 2.1169, "step": 115390 }, { "epoch": 0.43924088213576123, "grad_norm": 0.13450974225997925, "learning_rate": 0.0005, "loss": 2.1264, "step": 115400 }, { "epoch": 0.4392789446038839, "grad_norm": 0.13755057752132416, "learning_rate": 0.0005, "loss": 2.1161, "step": 115410 }, { "epoch": 0.4393170070720066, "grad_norm": 0.13020038604736328, "learning_rate": 0.0005, "loss": 2.1211, "step": 115420 }, { "epoch": 0.43935506954012926, "grad_norm": 0.12308257818222046, "learning_rate": 0.0005, "loss": 2.121, "step": 115430 }, { "epoch": 0.4393931320082519, "grad_norm": 0.12247392535209656, "learning_rate": 0.0005, "loss": 2.113, "step": 115440 }, { "epoch": 0.43943119447637463, "grad_norm": 0.12096204608678818, "learning_rate": 0.0005, "loss": 2.1067, "step": 115450 }, { "epoch": 0.4394692569444973, "grad_norm": 0.12142517417669296, "learning_rate": 0.0005, "loss": 2.116, "step": 115460 }, { "epoch": 0.43950731941262, "grad_norm": 0.12045764923095703, "learning_rate": 0.0005, "loss": 2.1076, "step": 115470 }, { "epoch": 0.43954538188074266, "grad_norm": 0.12330371886491776, "learning_rate": 0.0005, "loss": 2.113, "step": 115480 }, { "epoch": 0.43958344434886537, "grad_norm": 0.12864790856838226, "learning_rate": 0.0005, "loss": 2.119, "step": 115490 }, { "epoch": 0.43962150681698803, "grad_norm": 0.12794655561447144, "learning_rate": 0.0005, "loss": 2.1158, "step": 115500 }, { "epoch": 0.43965956928511074, "grad_norm": 0.14096996188163757, "learning_rate": 0.0005, "loss": 2.1357, "step": 115510 }, { "epoch": 0.4396976317532334, "grad_norm": 0.11860933899879456, "learning_rate": 0.0005, "loss": 2.121, "step": 115520 }, { "epoch": 0.4397356942213561, "grad_norm": 0.11834219098091125, "learning_rate": 0.0005, "loss": 2.1344, "step": 115530 }, { "epoch": 0.43977375668947877, "grad_norm": 0.13245339691638947, "learning_rate": 0.0005, "loss": 2.1143, "step": 115540 }, { "epoch": 0.4398118191576015, "grad_norm": 0.12836237251758575, "learning_rate": 0.0005, "loss": 2.1131, "step": 115550 }, { "epoch": 0.43984988162572414, "grad_norm": 0.13424669206142426, "learning_rate": 0.0005, "loss": 2.1198, "step": 115560 }, { "epoch": 0.4398879440938468, "grad_norm": 0.13692979514598846, "learning_rate": 0.0005, "loss": 2.1065, "step": 115570 }, { "epoch": 0.4399260065619695, "grad_norm": 0.1289914846420288, "learning_rate": 0.0005, "loss": 2.1372, "step": 115580 }, { "epoch": 0.43996406903009216, "grad_norm": 0.11068768054246902, "learning_rate": 0.0005, "loss": 2.1229, "step": 115590 }, { "epoch": 0.4400021314982149, "grad_norm": 0.12041833996772766, "learning_rate": 0.0005, "loss": 2.1192, "step": 115600 }, { "epoch": 0.44004019396633753, "grad_norm": 0.11720996350049973, "learning_rate": 0.0005, "loss": 2.1231, "step": 115610 }, { "epoch": 0.44007825643446025, "grad_norm": 0.12740816175937653, "learning_rate": 0.0005, "loss": 2.1113, "step": 115620 }, { "epoch": 0.4401163189025829, "grad_norm": 0.13115811347961426, "learning_rate": 0.0005, "loss": 2.1106, "step": 115630 }, { "epoch": 0.4401543813707056, "grad_norm": 0.13463477790355682, "learning_rate": 0.0005, "loss": 2.1148, "step": 115640 }, { "epoch": 0.4401924438388283, "grad_norm": 0.1432303637266159, "learning_rate": 0.0005, "loss": 2.1104, "step": 115650 }, { "epoch": 0.440230506306951, "grad_norm": 0.12418785691261292, "learning_rate": 0.0005, "loss": 2.1151, "step": 115660 }, { "epoch": 0.44026856877507364, "grad_norm": 0.13557015359401703, "learning_rate": 0.0005, "loss": 2.1239, "step": 115670 }, { "epoch": 0.44030663124319636, "grad_norm": 0.132261261343956, "learning_rate": 0.0005, "loss": 2.1221, "step": 115680 }, { "epoch": 0.440344693711319, "grad_norm": 0.1169886440038681, "learning_rate": 0.0005, "loss": 2.1321, "step": 115690 }, { "epoch": 0.4403827561794417, "grad_norm": 0.14475859701633453, "learning_rate": 0.0005, "loss": 2.1108, "step": 115700 }, { "epoch": 0.4404208186475644, "grad_norm": 0.13027706742286682, "learning_rate": 0.0005, "loss": 2.1196, "step": 115710 }, { "epoch": 0.44045888111568704, "grad_norm": 0.11329112946987152, "learning_rate": 0.0005, "loss": 2.1037, "step": 115720 }, { "epoch": 0.44049694358380975, "grad_norm": 0.12300235778093338, "learning_rate": 0.0005, "loss": 2.1211, "step": 115730 }, { "epoch": 0.4405350060519324, "grad_norm": 0.12906724214553833, "learning_rate": 0.0005, "loss": 2.099, "step": 115740 }, { "epoch": 0.4405730685200551, "grad_norm": 0.14534308016300201, "learning_rate": 0.0005, "loss": 2.1186, "step": 115750 }, { "epoch": 0.4406111309881778, "grad_norm": 0.1248600035905838, "learning_rate": 0.0005, "loss": 2.1133, "step": 115760 }, { "epoch": 0.4406491934563005, "grad_norm": 0.12258388847112656, "learning_rate": 0.0005, "loss": 2.1176, "step": 115770 }, { "epoch": 0.44068725592442315, "grad_norm": 0.13123729825019836, "learning_rate": 0.0005, "loss": 2.1154, "step": 115780 }, { "epoch": 0.44072531839254586, "grad_norm": 0.12473517656326294, "learning_rate": 0.0005, "loss": 2.1147, "step": 115790 }, { "epoch": 0.4407633808606685, "grad_norm": 0.13003067672252655, "learning_rate": 0.0005, "loss": 2.1289, "step": 115800 }, { "epoch": 0.44080144332879123, "grad_norm": 0.11639449745416641, "learning_rate": 0.0005, "loss": 2.1241, "step": 115810 }, { "epoch": 0.4408395057969139, "grad_norm": 0.1302078813314438, "learning_rate": 0.0005, "loss": 2.1054, "step": 115820 }, { "epoch": 0.4408775682650366, "grad_norm": 0.1332431435585022, "learning_rate": 0.0005, "loss": 2.0991, "step": 115830 }, { "epoch": 0.44091563073315926, "grad_norm": 0.12588725984096527, "learning_rate": 0.0005, "loss": 2.1205, "step": 115840 }, { "epoch": 0.44095369320128197, "grad_norm": 0.11563403904438019, "learning_rate": 0.0005, "loss": 2.1216, "step": 115850 }, { "epoch": 0.44099175566940463, "grad_norm": 0.13421843945980072, "learning_rate": 0.0005, "loss": 2.1342, "step": 115860 }, { "epoch": 0.4410298181375273, "grad_norm": 0.1310349702835083, "learning_rate": 0.0005, "loss": 2.1147, "step": 115870 }, { "epoch": 0.44106788060565, "grad_norm": 0.11716281622648239, "learning_rate": 0.0005, "loss": 2.1146, "step": 115880 }, { "epoch": 0.44110594307377265, "grad_norm": 0.12190917134284973, "learning_rate": 0.0005, "loss": 2.132, "step": 115890 }, { "epoch": 0.44114400554189537, "grad_norm": 0.1441725641489029, "learning_rate": 0.0005, "loss": 2.1113, "step": 115900 }, { "epoch": 0.441182068010018, "grad_norm": 0.1274310201406479, "learning_rate": 0.0005, "loss": 2.1298, "step": 115910 }, { "epoch": 0.44122013047814074, "grad_norm": 0.1373417228460312, "learning_rate": 0.0005, "loss": 2.1064, "step": 115920 }, { "epoch": 0.4412581929462634, "grad_norm": 0.1468418836593628, "learning_rate": 0.0005, "loss": 2.1201, "step": 115930 }, { "epoch": 0.4412962554143861, "grad_norm": 0.12073398381471634, "learning_rate": 0.0005, "loss": 2.112, "step": 115940 }, { "epoch": 0.44133431788250876, "grad_norm": 0.12780290842056274, "learning_rate": 0.0005, "loss": 2.1261, "step": 115950 }, { "epoch": 0.4413723803506315, "grad_norm": 0.13828471302986145, "learning_rate": 0.0005, "loss": 2.1154, "step": 115960 }, { "epoch": 0.44141044281875413, "grad_norm": 0.11829525977373123, "learning_rate": 0.0005, "loss": 2.1117, "step": 115970 }, { "epoch": 0.44144850528687685, "grad_norm": 0.12415596842765808, "learning_rate": 0.0005, "loss": 2.1287, "step": 115980 }, { "epoch": 0.4414865677549995, "grad_norm": 0.12892639636993408, "learning_rate": 0.0005, "loss": 2.133, "step": 115990 }, { "epoch": 0.44152463022312216, "grad_norm": 0.11956531554460526, "learning_rate": 0.0005, "loss": 2.1038, "step": 116000 }, { "epoch": 0.4415626926912449, "grad_norm": 0.1185465082526207, "learning_rate": 0.0005, "loss": 2.0992, "step": 116010 }, { "epoch": 0.44160075515936753, "grad_norm": 0.12413859367370605, "learning_rate": 0.0005, "loss": 2.1266, "step": 116020 }, { "epoch": 0.44163881762749024, "grad_norm": 0.1255383938550949, "learning_rate": 0.0005, "loss": 2.1242, "step": 116030 }, { "epoch": 0.4416768800956129, "grad_norm": 0.14520691335201263, "learning_rate": 0.0005, "loss": 2.1408, "step": 116040 }, { "epoch": 0.4417149425637356, "grad_norm": 0.1431220918893814, "learning_rate": 0.0005, "loss": 2.1112, "step": 116050 }, { "epoch": 0.44175300503185827, "grad_norm": 0.11558663100004196, "learning_rate": 0.0005, "loss": 2.113, "step": 116060 }, { "epoch": 0.441791067499981, "grad_norm": 0.14282682538032532, "learning_rate": 0.0005, "loss": 2.132, "step": 116070 }, { "epoch": 0.44182912996810364, "grad_norm": 0.12574829161167145, "learning_rate": 0.0005, "loss": 2.1163, "step": 116080 }, { "epoch": 0.44186719243622635, "grad_norm": 0.12052618712186813, "learning_rate": 0.0005, "loss": 2.1254, "step": 116090 }, { "epoch": 0.441905254904349, "grad_norm": 0.11520765721797943, "learning_rate": 0.0005, "loss": 2.1167, "step": 116100 }, { "epoch": 0.4419433173724717, "grad_norm": 0.14078138768672943, "learning_rate": 0.0005, "loss": 2.1179, "step": 116110 }, { "epoch": 0.4419813798405944, "grad_norm": 0.11563374847173691, "learning_rate": 0.0005, "loss": 2.1227, "step": 116120 }, { "epoch": 0.4420194423087171, "grad_norm": 0.11840452253818512, "learning_rate": 0.0005, "loss": 2.1073, "step": 116130 }, { "epoch": 0.44205750477683975, "grad_norm": 0.12553977966308594, "learning_rate": 0.0005, "loss": 2.1214, "step": 116140 }, { "epoch": 0.4420955672449624, "grad_norm": 0.12525105476379395, "learning_rate": 0.0005, "loss": 2.1164, "step": 116150 }, { "epoch": 0.4421336297130851, "grad_norm": 0.11949176341295242, "learning_rate": 0.0005, "loss": 2.1226, "step": 116160 }, { "epoch": 0.4421716921812078, "grad_norm": 0.12283053249120712, "learning_rate": 0.0005, "loss": 2.1234, "step": 116170 }, { "epoch": 0.4422097546493305, "grad_norm": 0.11786118894815445, "learning_rate": 0.0005, "loss": 2.1168, "step": 116180 }, { "epoch": 0.44224781711745315, "grad_norm": 0.12331389635801315, "learning_rate": 0.0005, "loss": 2.1003, "step": 116190 }, { "epoch": 0.44228587958557586, "grad_norm": 0.1577860713005066, "learning_rate": 0.0005, "loss": 2.116, "step": 116200 }, { "epoch": 0.4423239420536985, "grad_norm": 0.12965896725654602, "learning_rate": 0.0005, "loss": 2.1453, "step": 116210 }, { "epoch": 0.44236200452182123, "grad_norm": 0.11895183473825455, "learning_rate": 0.0005, "loss": 2.1201, "step": 116220 }, { "epoch": 0.4424000669899439, "grad_norm": 0.11798092722892761, "learning_rate": 0.0005, "loss": 2.1173, "step": 116230 }, { "epoch": 0.4424381294580666, "grad_norm": 0.11680356413125992, "learning_rate": 0.0005, "loss": 2.103, "step": 116240 }, { "epoch": 0.44247619192618926, "grad_norm": 0.11378675699234009, "learning_rate": 0.0005, "loss": 2.1077, "step": 116250 }, { "epoch": 0.44251425439431197, "grad_norm": 0.14306718111038208, "learning_rate": 0.0005, "loss": 2.1081, "step": 116260 }, { "epoch": 0.4425523168624346, "grad_norm": 0.11955001205205917, "learning_rate": 0.0005, "loss": 2.1248, "step": 116270 }, { "epoch": 0.44259037933055734, "grad_norm": 0.12565234303474426, "learning_rate": 0.0005, "loss": 2.1316, "step": 116280 }, { "epoch": 0.44262844179868, "grad_norm": 0.11439774930477142, "learning_rate": 0.0005, "loss": 2.1214, "step": 116290 }, { "epoch": 0.44266650426680265, "grad_norm": 0.11682023108005524, "learning_rate": 0.0005, "loss": 2.1161, "step": 116300 }, { "epoch": 0.44270456673492536, "grad_norm": 0.1343322992324829, "learning_rate": 0.0005, "loss": 2.1073, "step": 116310 }, { "epoch": 0.442742629203048, "grad_norm": 0.11617587506771088, "learning_rate": 0.0005, "loss": 2.1089, "step": 116320 }, { "epoch": 0.44278069167117073, "grad_norm": 0.11658541113138199, "learning_rate": 0.0005, "loss": 2.1133, "step": 116330 }, { "epoch": 0.4428187541392934, "grad_norm": 0.12785518169403076, "learning_rate": 0.0005, "loss": 2.1283, "step": 116340 }, { "epoch": 0.4428568166074161, "grad_norm": 0.11588647216558456, "learning_rate": 0.0005, "loss": 2.1241, "step": 116350 }, { "epoch": 0.44289487907553876, "grad_norm": 0.1187388226389885, "learning_rate": 0.0005, "loss": 2.1146, "step": 116360 }, { "epoch": 0.4429329415436615, "grad_norm": 0.12749643623828888, "learning_rate": 0.0005, "loss": 2.1208, "step": 116370 }, { "epoch": 0.44297100401178413, "grad_norm": 0.11822959780693054, "learning_rate": 0.0005, "loss": 2.1174, "step": 116380 }, { "epoch": 0.44300906647990684, "grad_norm": 0.13491389155387878, "learning_rate": 0.0005, "loss": 2.1155, "step": 116390 }, { "epoch": 0.4430471289480295, "grad_norm": 0.11838964372873306, "learning_rate": 0.0005, "loss": 2.1219, "step": 116400 }, { "epoch": 0.4430851914161522, "grad_norm": 0.1275443136692047, "learning_rate": 0.0005, "loss": 2.127, "step": 116410 }, { "epoch": 0.44312325388427487, "grad_norm": 0.12287093698978424, "learning_rate": 0.0005, "loss": 2.1189, "step": 116420 }, { "epoch": 0.4431613163523975, "grad_norm": 0.12383338809013367, "learning_rate": 0.0005, "loss": 2.1138, "step": 116430 }, { "epoch": 0.44319937882052024, "grad_norm": 0.18328292667865753, "learning_rate": 0.0005, "loss": 2.108, "step": 116440 }, { "epoch": 0.4432374412886429, "grad_norm": 0.12247510254383087, "learning_rate": 0.0005, "loss": 2.1225, "step": 116450 }, { "epoch": 0.4432755037567656, "grad_norm": 0.12218674272298813, "learning_rate": 0.0005, "loss": 2.1273, "step": 116460 }, { "epoch": 0.44331356622488827, "grad_norm": 0.11914005130529404, "learning_rate": 0.0005, "loss": 2.122, "step": 116470 }, { "epoch": 0.443351628693011, "grad_norm": 0.11963188648223877, "learning_rate": 0.0005, "loss": 2.1042, "step": 116480 }, { "epoch": 0.44338969116113364, "grad_norm": 0.11633283644914627, "learning_rate": 0.0005, "loss": 2.1199, "step": 116490 }, { "epoch": 0.44342775362925635, "grad_norm": 0.12306850403547287, "learning_rate": 0.0005, "loss": 2.1091, "step": 116500 }, { "epoch": 0.443465816097379, "grad_norm": 0.1175755187869072, "learning_rate": 0.0005, "loss": 2.1206, "step": 116510 }, { "epoch": 0.4435038785655017, "grad_norm": 0.13746808469295502, "learning_rate": 0.0005, "loss": 2.1184, "step": 116520 }, { "epoch": 0.4435419410336244, "grad_norm": 0.12363948673009872, "learning_rate": 0.0005, "loss": 2.1119, "step": 116530 }, { "epoch": 0.4435800035017471, "grad_norm": 0.12091385573148727, "learning_rate": 0.0005, "loss": 2.1162, "step": 116540 }, { "epoch": 0.44361806596986975, "grad_norm": 0.12593325972557068, "learning_rate": 0.0005, "loss": 2.1151, "step": 116550 }, { "epoch": 0.44365612843799246, "grad_norm": 0.14146867394447327, "learning_rate": 0.0005, "loss": 2.1049, "step": 116560 }, { "epoch": 0.4436941909061151, "grad_norm": 0.11309035122394562, "learning_rate": 0.0005, "loss": 2.1256, "step": 116570 }, { "epoch": 0.4437322533742378, "grad_norm": 0.11507805436849594, "learning_rate": 0.0005, "loss": 2.1094, "step": 116580 }, { "epoch": 0.4437703158423605, "grad_norm": 0.12811732292175293, "learning_rate": 0.0005, "loss": 2.1138, "step": 116590 }, { "epoch": 0.44380837831048314, "grad_norm": 0.13018152117729187, "learning_rate": 0.0005, "loss": 2.1078, "step": 116600 }, { "epoch": 0.44384644077860586, "grad_norm": 0.12831726670265198, "learning_rate": 0.0005, "loss": 2.1189, "step": 116610 }, { "epoch": 0.4438845032467285, "grad_norm": 0.13873030245304108, "learning_rate": 0.0005, "loss": 2.1101, "step": 116620 }, { "epoch": 0.4439225657148512, "grad_norm": 0.11311579495668411, "learning_rate": 0.0005, "loss": 2.126, "step": 116630 }, { "epoch": 0.4439606281829739, "grad_norm": 0.12090139091014862, "learning_rate": 0.0005, "loss": 2.1243, "step": 116640 }, { "epoch": 0.4439986906510966, "grad_norm": 0.1276332587003708, "learning_rate": 0.0005, "loss": 2.1162, "step": 116650 }, { "epoch": 0.44403675311921925, "grad_norm": 0.1259016990661621, "learning_rate": 0.0005, "loss": 2.114, "step": 116660 }, { "epoch": 0.44407481558734196, "grad_norm": 0.130734384059906, "learning_rate": 0.0005, "loss": 2.1099, "step": 116670 }, { "epoch": 0.4441128780554646, "grad_norm": 0.1242118775844574, "learning_rate": 0.0005, "loss": 2.1321, "step": 116680 }, { "epoch": 0.44415094052358733, "grad_norm": 0.12554258108139038, "learning_rate": 0.0005, "loss": 2.1209, "step": 116690 }, { "epoch": 0.44418900299171, "grad_norm": 0.12029888480901718, "learning_rate": 0.0005, "loss": 2.1058, "step": 116700 }, { "epoch": 0.4442270654598327, "grad_norm": 0.12346048653125763, "learning_rate": 0.0005, "loss": 2.1249, "step": 116710 }, { "epoch": 0.44426512792795536, "grad_norm": 0.13442575931549072, "learning_rate": 0.0005, "loss": 2.1199, "step": 116720 }, { "epoch": 0.444303190396078, "grad_norm": 0.15281645953655243, "learning_rate": 0.0005, "loss": 2.1141, "step": 116730 }, { "epoch": 0.44434125286420073, "grad_norm": 0.13485336303710938, "learning_rate": 0.0005, "loss": 2.1188, "step": 116740 }, { "epoch": 0.4443793153323234, "grad_norm": 0.12896035611629486, "learning_rate": 0.0005, "loss": 2.1105, "step": 116750 }, { "epoch": 0.4444173778004461, "grad_norm": 0.6141171455383301, "learning_rate": 0.0005, "loss": 2.1244, "step": 116760 }, { "epoch": 0.44445544026856876, "grad_norm": 0.27819642424583435, "learning_rate": 0.0005, "loss": 2.0998, "step": 116770 }, { "epoch": 0.44449350273669147, "grad_norm": 0.12349634617567062, "learning_rate": 0.0005, "loss": 2.1268, "step": 116780 }, { "epoch": 0.4445315652048141, "grad_norm": 0.1380884200334549, "learning_rate": 0.0005, "loss": 2.1243, "step": 116790 }, { "epoch": 0.44456962767293684, "grad_norm": 0.1299584060907364, "learning_rate": 0.0005, "loss": 2.1234, "step": 116800 }, { "epoch": 0.4446076901410595, "grad_norm": 0.14807891845703125, "learning_rate": 0.0005, "loss": 2.1258, "step": 116810 }, { "epoch": 0.4446457526091822, "grad_norm": 0.12537524104118347, "learning_rate": 0.0005, "loss": 2.1207, "step": 116820 }, { "epoch": 0.44468381507730487, "grad_norm": 0.12808291614055634, "learning_rate": 0.0005, "loss": 2.0979, "step": 116830 }, { "epoch": 0.4447218775454276, "grad_norm": 0.1370694786310196, "learning_rate": 0.0005, "loss": 2.1293, "step": 116840 }, { "epoch": 0.44475994001355024, "grad_norm": 0.12036896497011185, "learning_rate": 0.0005, "loss": 2.1326, "step": 116850 }, { "epoch": 0.4447980024816729, "grad_norm": 0.13645969331264496, "learning_rate": 0.0005, "loss": 2.116, "step": 116860 }, { "epoch": 0.4448360649497956, "grad_norm": 0.13020938634872437, "learning_rate": 0.0005, "loss": 2.1215, "step": 116870 }, { "epoch": 0.44487412741791826, "grad_norm": 0.1492963284254074, "learning_rate": 0.0005, "loss": 2.1324, "step": 116880 }, { "epoch": 0.444912189886041, "grad_norm": 0.11927158385515213, "learning_rate": 0.0005, "loss": 2.1251, "step": 116890 }, { "epoch": 0.44495025235416363, "grad_norm": 0.1152534931898117, "learning_rate": 0.0005, "loss": 2.1157, "step": 116900 }, { "epoch": 0.44498831482228635, "grad_norm": 0.11636972427368164, "learning_rate": 0.0005, "loss": 2.1194, "step": 116910 }, { "epoch": 0.445026377290409, "grad_norm": 0.1195891723036766, "learning_rate": 0.0005, "loss": 2.1221, "step": 116920 }, { "epoch": 0.4450644397585317, "grad_norm": 0.11876848340034485, "learning_rate": 0.0005, "loss": 2.124, "step": 116930 }, { "epoch": 0.4451025022266544, "grad_norm": 0.11512313038110733, "learning_rate": 0.0005, "loss": 2.1251, "step": 116940 }, { "epoch": 0.4451405646947771, "grad_norm": 0.1324792355298996, "learning_rate": 0.0005, "loss": 2.107, "step": 116950 }, { "epoch": 0.44517862716289974, "grad_norm": 0.13453422486782074, "learning_rate": 0.0005, "loss": 2.1117, "step": 116960 }, { "epoch": 0.44521668963102246, "grad_norm": 0.12057196348905563, "learning_rate": 0.0005, "loss": 2.1361, "step": 116970 }, { "epoch": 0.4452547520991451, "grad_norm": 0.11862435191869736, "learning_rate": 0.0005, "loss": 2.102, "step": 116980 }, { "epoch": 0.4452928145672678, "grad_norm": 0.1300220936536789, "learning_rate": 0.0005, "loss": 2.1042, "step": 116990 }, { "epoch": 0.4453308770353905, "grad_norm": 0.13250984251499176, "learning_rate": 0.0005, "loss": 2.1104, "step": 117000 }, { "epoch": 0.44536893950351314, "grad_norm": 0.1232600212097168, "learning_rate": 0.0005, "loss": 2.116, "step": 117010 }, { "epoch": 0.44540700197163585, "grad_norm": 0.12628987431526184, "learning_rate": 0.0005, "loss": 2.128, "step": 117020 }, { "epoch": 0.4454450644397585, "grad_norm": 0.13797855377197266, "learning_rate": 0.0005, "loss": 2.105, "step": 117030 }, { "epoch": 0.4454831269078812, "grad_norm": 0.12938253581523895, "learning_rate": 0.0005, "loss": 2.1191, "step": 117040 }, { "epoch": 0.4455211893760039, "grad_norm": 0.1354011744260788, "learning_rate": 0.0005, "loss": 2.1045, "step": 117050 }, { "epoch": 0.4455592518441266, "grad_norm": 0.12878401577472687, "learning_rate": 0.0005, "loss": 2.0976, "step": 117060 }, { "epoch": 0.44559731431224925, "grad_norm": 0.13308361172676086, "learning_rate": 0.0005, "loss": 2.1272, "step": 117070 }, { "epoch": 0.44563537678037196, "grad_norm": 0.1289629489183426, "learning_rate": 0.0005, "loss": 2.1174, "step": 117080 }, { "epoch": 0.4456734392484946, "grad_norm": 0.1313554346561432, "learning_rate": 0.0005, "loss": 2.1145, "step": 117090 }, { "epoch": 0.44571150171661733, "grad_norm": 0.14513030648231506, "learning_rate": 0.0005, "loss": 2.1233, "step": 117100 }, { "epoch": 0.44574956418474, "grad_norm": 0.11918684840202332, "learning_rate": 0.0005, "loss": 2.1289, "step": 117110 }, { "epoch": 0.4457876266528627, "grad_norm": 0.12679529190063477, "learning_rate": 0.0005, "loss": 2.1109, "step": 117120 }, { "epoch": 0.44582568912098536, "grad_norm": 0.11493398994207382, "learning_rate": 0.0005, "loss": 2.1159, "step": 117130 }, { "epoch": 0.44586375158910807, "grad_norm": 0.11769044399261475, "learning_rate": 0.0005, "loss": 2.1169, "step": 117140 }, { "epoch": 0.4459018140572307, "grad_norm": 0.1203865334391594, "learning_rate": 0.0005, "loss": 2.1065, "step": 117150 }, { "epoch": 0.4459398765253534, "grad_norm": 0.13108976185321808, "learning_rate": 0.0005, "loss": 2.1082, "step": 117160 }, { "epoch": 0.4459779389934761, "grad_norm": 0.12912702560424805, "learning_rate": 0.0005, "loss": 2.1262, "step": 117170 }, { "epoch": 0.44601600146159875, "grad_norm": 0.12747284770011902, "learning_rate": 0.0005, "loss": 2.1204, "step": 117180 }, { "epoch": 0.44605406392972147, "grad_norm": 0.11677516996860504, "learning_rate": 0.0005, "loss": 2.112, "step": 117190 }, { "epoch": 0.4460921263978441, "grad_norm": 0.12442485243082047, "learning_rate": 0.0005, "loss": 2.1095, "step": 117200 }, { "epoch": 0.44613018886596684, "grad_norm": 0.1313370019197464, "learning_rate": 0.0005, "loss": 2.1109, "step": 117210 }, { "epoch": 0.4461682513340895, "grad_norm": 0.12267378717660904, "learning_rate": 0.0005, "loss": 2.1326, "step": 117220 }, { "epoch": 0.4462063138022122, "grad_norm": 0.13062329590320587, "learning_rate": 0.0005, "loss": 2.1273, "step": 117230 }, { "epoch": 0.44624437627033486, "grad_norm": 0.13419316709041595, "learning_rate": 0.0005, "loss": 2.125, "step": 117240 }, { "epoch": 0.4462824387384576, "grad_norm": 0.12934669852256775, "learning_rate": 0.0005, "loss": 2.1333, "step": 117250 }, { "epoch": 0.44632050120658023, "grad_norm": 0.11728876829147339, "learning_rate": 0.0005, "loss": 2.1298, "step": 117260 }, { "epoch": 0.44635856367470295, "grad_norm": 0.1360110491514206, "learning_rate": 0.0005, "loss": 2.1124, "step": 117270 }, { "epoch": 0.4463966261428256, "grad_norm": 0.1395176500082016, "learning_rate": 0.0005, "loss": 2.1152, "step": 117280 }, { "epoch": 0.4464346886109483, "grad_norm": 0.143223375082016, "learning_rate": 0.0005, "loss": 2.11, "step": 117290 }, { "epoch": 0.446472751079071, "grad_norm": 0.13000799715518951, "learning_rate": 0.0005, "loss": 2.129, "step": 117300 }, { "epoch": 0.44651081354719363, "grad_norm": 0.12335141748189926, "learning_rate": 0.0005, "loss": 2.121, "step": 117310 }, { "epoch": 0.44654887601531634, "grad_norm": 0.11523394286632538, "learning_rate": 0.0005, "loss": 2.115, "step": 117320 }, { "epoch": 0.446586938483439, "grad_norm": 0.12152241170406342, "learning_rate": 0.0005, "loss": 2.1292, "step": 117330 }, { "epoch": 0.4466250009515617, "grad_norm": 0.12862616777420044, "learning_rate": 0.0005, "loss": 2.1394, "step": 117340 }, { "epoch": 0.44666306341968437, "grad_norm": 0.12332156300544739, "learning_rate": 0.0005, "loss": 2.1091, "step": 117350 }, { "epoch": 0.4467011258878071, "grad_norm": 0.14175184071063995, "learning_rate": 0.0005, "loss": 2.1073, "step": 117360 }, { "epoch": 0.44673918835592974, "grad_norm": 0.12240368872880936, "learning_rate": 0.0005, "loss": 2.1192, "step": 117370 }, { "epoch": 0.44677725082405245, "grad_norm": 0.13649588823318481, "learning_rate": 0.0005, "loss": 2.1151, "step": 117380 }, { "epoch": 0.4468153132921751, "grad_norm": 0.12765458226203918, "learning_rate": 0.0005, "loss": 2.1179, "step": 117390 }, { "epoch": 0.4468533757602978, "grad_norm": 0.12199946492910385, "learning_rate": 0.0005, "loss": 2.1124, "step": 117400 }, { "epoch": 0.4468914382284205, "grad_norm": 0.11614719778299332, "learning_rate": 0.0005, "loss": 2.1372, "step": 117410 }, { "epoch": 0.4469295006965432, "grad_norm": 0.11507588624954224, "learning_rate": 0.0005, "loss": 2.1175, "step": 117420 }, { "epoch": 0.44696756316466585, "grad_norm": 0.13302651047706604, "learning_rate": 0.0005, "loss": 2.1167, "step": 117430 }, { "epoch": 0.4470056256327885, "grad_norm": 0.1279689520597458, "learning_rate": 0.0005, "loss": 2.1213, "step": 117440 }, { "epoch": 0.4470436881009112, "grad_norm": 0.13449238240718842, "learning_rate": 0.0005, "loss": 2.1159, "step": 117450 }, { "epoch": 0.4470817505690339, "grad_norm": 0.13003098964691162, "learning_rate": 0.0005, "loss": 2.1162, "step": 117460 }, { "epoch": 0.4471198130371566, "grad_norm": 0.1376037746667862, "learning_rate": 0.0005, "loss": 2.1215, "step": 117470 }, { "epoch": 0.44715787550527925, "grad_norm": 0.12537793815135956, "learning_rate": 0.0005, "loss": 2.1152, "step": 117480 }, { "epoch": 0.44719593797340196, "grad_norm": 0.1256958544254303, "learning_rate": 0.0005, "loss": 2.1193, "step": 117490 }, { "epoch": 0.4472340004415246, "grad_norm": 0.1190306544303894, "learning_rate": 0.0005, "loss": 2.1174, "step": 117500 }, { "epoch": 0.4472720629096473, "grad_norm": 0.14315101504325867, "learning_rate": 0.0005, "loss": 2.1151, "step": 117510 }, { "epoch": 0.44731012537777, "grad_norm": 0.12836386263370514, "learning_rate": 0.0005, "loss": 2.1088, "step": 117520 }, { "epoch": 0.4473481878458927, "grad_norm": 0.11426154524087906, "learning_rate": 0.0005, "loss": 2.1106, "step": 117530 }, { "epoch": 0.44738625031401535, "grad_norm": 0.29652196168899536, "learning_rate": 0.0005, "loss": 2.1174, "step": 117540 }, { "epoch": 0.44742431278213807, "grad_norm": 0.13330289721488953, "learning_rate": 0.0005, "loss": 2.1299, "step": 117550 }, { "epoch": 0.4474623752502607, "grad_norm": 0.11894863098859787, "learning_rate": 0.0005, "loss": 2.1235, "step": 117560 }, { "epoch": 0.44750043771838344, "grad_norm": 0.11975117772817612, "learning_rate": 0.0005, "loss": 2.1219, "step": 117570 }, { "epoch": 0.4475385001865061, "grad_norm": 0.12065289169549942, "learning_rate": 0.0005, "loss": 2.12, "step": 117580 }, { "epoch": 0.44757656265462875, "grad_norm": 0.1413719654083252, "learning_rate": 0.0005, "loss": 2.1155, "step": 117590 }, { "epoch": 0.44761462512275146, "grad_norm": 0.12168973684310913, "learning_rate": 0.0005, "loss": 2.1124, "step": 117600 }, { "epoch": 0.4476526875908741, "grad_norm": 0.1406327337026596, "learning_rate": 0.0005, "loss": 2.119, "step": 117610 }, { "epoch": 0.44769075005899683, "grad_norm": 0.1254739761352539, "learning_rate": 0.0005, "loss": 2.12, "step": 117620 }, { "epoch": 0.4477288125271195, "grad_norm": 0.18341228365898132, "learning_rate": 0.0005, "loss": 2.1163, "step": 117630 }, { "epoch": 0.4477668749952422, "grad_norm": 0.36629945039749146, "learning_rate": 0.0005, "loss": 2.1258, "step": 117640 }, { "epoch": 0.44780493746336486, "grad_norm": 0.1329488456249237, "learning_rate": 0.0005, "loss": 2.1272, "step": 117650 }, { "epoch": 0.4478429999314876, "grad_norm": 0.11533725261688232, "learning_rate": 0.0005, "loss": 2.1146, "step": 117660 }, { "epoch": 0.44788106239961023, "grad_norm": 0.12699326872825623, "learning_rate": 0.0005, "loss": 2.1241, "step": 117670 }, { "epoch": 0.44791912486773294, "grad_norm": 0.1253686547279358, "learning_rate": 0.0005, "loss": 2.1347, "step": 117680 }, { "epoch": 0.4479571873358556, "grad_norm": 0.12520533800125122, "learning_rate": 0.0005, "loss": 2.13, "step": 117690 }, { "epoch": 0.4479952498039783, "grad_norm": 0.12939919531345367, "learning_rate": 0.0005, "loss": 2.1256, "step": 117700 }, { "epoch": 0.44803331227210097, "grad_norm": 0.12653513252735138, "learning_rate": 0.0005, "loss": 2.1313, "step": 117710 }, { "epoch": 0.4480713747402237, "grad_norm": 0.12521785497665405, "learning_rate": 0.0005, "loss": 2.1139, "step": 117720 }, { "epoch": 0.44810943720834634, "grad_norm": 0.1243586465716362, "learning_rate": 0.0005, "loss": 2.1118, "step": 117730 }, { "epoch": 0.448147499676469, "grad_norm": 0.12475720793008804, "learning_rate": 0.0005, "loss": 2.1167, "step": 117740 }, { "epoch": 0.4481855621445917, "grad_norm": 0.12455473840236664, "learning_rate": 0.0005, "loss": 2.1293, "step": 117750 }, { "epoch": 0.44822362461271437, "grad_norm": 0.13083574175834656, "learning_rate": 0.0005, "loss": 2.1083, "step": 117760 }, { "epoch": 0.4482616870808371, "grad_norm": 0.13085252046585083, "learning_rate": 0.0005, "loss": 2.1125, "step": 117770 }, { "epoch": 0.44829974954895974, "grad_norm": 0.12105019390583038, "learning_rate": 0.0005, "loss": 2.1142, "step": 117780 }, { "epoch": 0.44833781201708245, "grad_norm": 0.1281406283378601, "learning_rate": 0.0005, "loss": 2.1238, "step": 117790 }, { "epoch": 0.4483758744852051, "grad_norm": 0.14238356053829193, "learning_rate": 0.0005, "loss": 2.1091, "step": 117800 }, { "epoch": 0.4484139369533278, "grad_norm": 0.13639430701732635, "learning_rate": 0.0005, "loss": 2.1249, "step": 117810 }, { "epoch": 0.4484519994214505, "grad_norm": 0.11508873105049133, "learning_rate": 0.0005, "loss": 2.1225, "step": 117820 }, { "epoch": 0.4484900618895732, "grad_norm": 0.13065040111541748, "learning_rate": 0.0005, "loss": 2.1085, "step": 117830 }, { "epoch": 0.44852812435769585, "grad_norm": 0.1213822215795517, "learning_rate": 0.0005, "loss": 2.1026, "step": 117840 }, { "epoch": 0.44856618682581856, "grad_norm": 0.11827198415994644, "learning_rate": 0.0005, "loss": 2.1222, "step": 117850 }, { "epoch": 0.4486042492939412, "grad_norm": 0.13303664326667786, "learning_rate": 0.0005, "loss": 2.1276, "step": 117860 }, { "epoch": 0.4486423117620639, "grad_norm": 0.12218086421489716, "learning_rate": 0.0005, "loss": 2.1137, "step": 117870 }, { "epoch": 0.4486803742301866, "grad_norm": 0.13076268136501312, "learning_rate": 0.0005, "loss": 2.1098, "step": 117880 }, { "epoch": 0.44871843669830924, "grad_norm": 0.14012432098388672, "learning_rate": 0.0005, "loss": 2.1094, "step": 117890 }, { "epoch": 0.44875649916643195, "grad_norm": 0.12652704119682312, "learning_rate": 0.0005, "loss": 2.1136, "step": 117900 }, { "epoch": 0.4487945616345546, "grad_norm": 0.12594130635261536, "learning_rate": 0.0005, "loss": 2.1208, "step": 117910 }, { "epoch": 0.4488326241026773, "grad_norm": 0.12226948142051697, "learning_rate": 0.0005, "loss": 2.1179, "step": 117920 }, { "epoch": 0.4488706865708, "grad_norm": 0.1363731175661087, "learning_rate": 0.0005, "loss": 2.1141, "step": 117930 }, { "epoch": 0.4489087490389227, "grad_norm": 0.12058139592409134, "learning_rate": 0.0005, "loss": 2.1302, "step": 117940 }, { "epoch": 0.44894681150704535, "grad_norm": 0.12362996488809586, "learning_rate": 0.0005, "loss": 2.1101, "step": 117950 }, { "epoch": 0.44898487397516806, "grad_norm": 0.13131392002105713, "learning_rate": 0.0005, "loss": 2.1137, "step": 117960 }, { "epoch": 0.4490229364432907, "grad_norm": 0.14916491508483887, "learning_rate": 0.0005, "loss": 2.1318, "step": 117970 }, { "epoch": 0.44906099891141343, "grad_norm": 0.14153233170509338, "learning_rate": 0.0005, "loss": 2.1338, "step": 117980 }, { "epoch": 0.4490990613795361, "grad_norm": 0.12029541283845901, "learning_rate": 0.0005, "loss": 2.1119, "step": 117990 }, { "epoch": 0.4491371238476588, "grad_norm": 0.12881696224212646, "learning_rate": 0.0005, "loss": 2.1303, "step": 118000 }, { "epoch": 0.44917518631578146, "grad_norm": 0.11942708492279053, "learning_rate": 0.0005, "loss": 2.1093, "step": 118010 }, { "epoch": 0.4492132487839041, "grad_norm": 0.12929148972034454, "learning_rate": 0.0005, "loss": 2.1192, "step": 118020 }, { "epoch": 0.44925131125202683, "grad_norm": 0.1250161975622177, "learning_rate": 0.0005, "loss": 2.111, "step": 118030 }, { "epoch": 0.4492893737201495, "grad_norm": 0.12096728384494781, "learning_rate": 0.0005, "loss": 2.121, "step": 118040 }, { "epoch": 0.4493274361882722, "grad_norm": 0.12000516802072525, "learning_rate": 0.0005, "loss": 2.1126, "step": 118050 }, { "epoch": 0.44936549865639486, "grad_norm": 0.13474564254283905, "learning_rate": 0.0005, "loss": 2.1173, "step": 118060 }, { "epoch": 0.44940356112451757, "grad_norm": 0.12466412782669067, "learning_rate": 0.0005, "loss": 2.1226, "step": 118070 }, { "epoch": 0.4494416235926402, "grad_norm": 0.12615053355693817, "learning_rate": 0.0005, "loss": 2.1013, "step": 118080 }, { "epoch": 0.44947968606076294, "grad_norm": 0.1310206949710846, "learning_rate": 0.0005, "loss": 2.1129, "step": 118090 }, { "epoch": 0.4495177485288856, "grad_norm": 0.1346968561410904, "learning_rate": 0.0005, "loss": 2.1203, "step": 118100 }, { "epoch": 0.4495558109970083, "grad_norm": 0.1367679089307785, "learning_rate": 0.0005, "loss": 2.1282, "step": 118110 }, { "epoch": 0.44959387346513097, "grad_norm": 0.13630978763103485, "learning_rate": 0.0005, "loss": 2.1162, "step": 118120 }, { "epoch": 0.4496319359332537, "grad_norm": 0.12824587523937225, "learning_rate": 0.0005, "loss": 2.1198, "step": 118130 }, { "epoch": 0.44966999840137634, "grad_norm": 0.13851365447044373, "learning_rate": 0.0005, "loss": 2.1115, "step": 118140 }, { "epoch": 0.44970806086949905, "grad_norm": 0.12221483141183853, "learning_rate": 0.0005, "loss": 2.1203, "step": 118150 }, { "epoch": 0.4497461233376217, "grad_norm": 0.12099538743495941, "learning_rate": 0.0005, "loss": 2.1166, "step": 118160 }, { "epoch": 0.44978418580574436, "grad_norm": 0.133246511220932, "learning_rate": 0.0005, "loss": 2.1255, "step": 118170 }, { "epoch": 0.4498222482738671, "grad_norm": 0.1311062127351761, "learning_rate": 0.0005, "loss": 2.1099, "step": 118180 }, { "epoch": 0.44986031074198973, "grad_norm": 0.12322074174880981, "learning_rate": 0.0005, "loss": 2.127, "step": 118190 }, { "epoch": 0.44989837321011245, "grad_norm": 0.13975688815116882, "learning_rate": 0.0005, "loss": 2.1101, "step": 118200 }, { "epoch": 0.4499364356782351, "grad_norm": 0.11596209555864334, "learning_rate": 0.0005, "loss": 2.1145, "step": 118210 }, { "epoch": 0.4499744981463578, "grad_norm": 0.12691253423690796, "learning_rate": 0.0005, "loss": 2.1084, "step": 118220 }, { "epoch": 0.4500125606144805, "grad_norm": 0.12624835968017578, "learning_rate": 0.0005, "loss": 2.1121, "step": 118230 }, { "epoch": 0.4500506230826032, "grad_norm": 0.1271945983171463, "learning_rate": 0.0005, "loss": 2.1024, "step": 118240 }, { "epoch": 0.45008868555072584, "grad_norm": 0.13511629402637482, "learning_rate": 0.0005, "loss": 2.1372, "step": 118250 }, { "epoch": 0.45012674801884855, "grad_norm": 0.1170211210846901, "learning_rate": 0.0005, "loss": 2.1162, "step": 118260 }, { "epoch": 0.4501648104869712, "grad_norm": 0.12537570297718048, "learning_rate": 0.0005, "loss": 2.1118, "step": 118270 }, { "epoch": 0.4502028729550939, "grad_norm": 0.12275852262973785, "learning_rate": 0.0005, "loss": 2.1251, "step": 118280 }, { "epoch": 0.4502409354232166, "grad_norm": 0.12469831109046936, "learning_rate": 0.0005, "loss": 2.1252, "step": 118290 }, { "epoch": 0.45027899789133924, "grad_norm": 0.11219992488622665, "learning_rate": 0.0005, "loss": 2.116, "step": 118300 }, { "epoch": 0.45031706035946195, "grad_norm": 0.12267205864191055, "learning_rate": 0.0005, "loss": 2.1139, "step": 118310 }, { "epoch": 0.4503551228275846, "grad_norm": 0.1314876824617386, "learning_rate": 0.0005, "loss": 2.1161, "step": 118320 }, { "epoch": 0.4503931852957073, "grad_norm": 0.33807751536369324, "learning_rate": 0.0005, "loss": 2.1232, "step": 118330 }, { "epoch": 0.45043124776383, "grad_norm": 0.1275404542684555, "learning_rate": 0.0005, "loss": 2.1098, "step": 118340 }, { "epoch": 0.4504693102319527, "grad_norm": 0.1214970275759697, "learning_rate": 0.0005, "loss": 2.1164, "step": 118350 }, { "epoch": 0.45050737270007535, "grad_norm": 0.1309918314218521, "learning_rate": 0.0005, "loss": 2.107, "step": 118360 }, { "epoch": 0.45054543516819806, "grad_norm": 0.1250380277633667, "learning_rate": 0.0005, "loss": 2.1178, "step": 118370 }, { "epoch": 0.4505834976363207, "grad_norm": 0.1208469420671463, "learning_rate": 0.0005, "loss": 2.1186, "step": 118380 }, { "epoch": 0.45062156010444343, "grad_norm": 0.11546842753887177, "learning_rate": 0.0005, "loss": 2.134, "step": 118390 }, { "epoch": 0.4506596225725661, "grad_norm": 0.12166312336921692, "learning_rate": 0.0005, "loss": 2.1185, "step": 118400 }, { "epoch": 0.4506976850406888, "grad_norm": 0.12632746994495392, "learning_rate": 0.0005, "loss": 2.1319, "step": 118410 }, { "epoch": 0.45073574750881146, "grad_norm": 0.12581978738307953, "learning_rate": 0.0005, "loss": 2.1167, "step": 118420 }, { "epoch": 0.45077380997693417, "grad_norm": 0.13596107065677643, "learning_rate": 0.0005, "loss": 2.1097, "step": 118430 }, { "epoch": 0.4508118724450568, "grad_norm": 0.12639673054218292, "learning_rate": 0.0005, "loss": 2.1277, "step": 118440 }, { "epoch": 0.4508499349131795, "grad_norm": 0.13983288407325745, "learning_rate": 0.0005, "loss": 2.1065, "step": 118450 }, { "epoch": 0.4508879973813022, "grad_norm": 0.11982744187116623, "learning_rate": 0.0005, "loss": 2.1226, "step": 118460 }, { "epoch": 0.45092605984942485, "grad_norm": 0.1236228197813034, "learning_rate": 0.0005, "loss": 2.1145, "step": 118470 }, { "epoch": 0.45096412231754757, "grad_norm": 0.130223348736763, "learning_rate": 0.0005, "loss": 2.119, "step": 118480 }, { "epoch": 0.4510021847856702, "grad_norm": 0.12643398344516754, "learning_rate": 0.0005, "loss": 2.1252, "step": 118490 }, { "epoch": 0.45104024725379294, "grad_norm": 0.12892575562000275, "learning_rate": 0.0005, "loss": 2.1259, "step": 118500 }, { "epoch": 0.4510783097219156, "grad_norm": 0.12612701952457428, "learning_rate": 0.0005, "loss": 2.1018, "step": 118510 }, { "epoch": 0.4511163721900383, "grad_norm": 0.16297845542430878, "learning_rate": 0.0005, "loss": 2.1259, "step": 118520 }, { "epoch": 0.45115443465816096, "grad_norm": 0.12743675708770752, "learning_rate": 0.0005, "loss": 2.1228, "step": 118530 }, { "epoch": 0.4511924971262837, "grad_norm": 0.12563501298427582, "learning_rate": 0.0005, "loss": 2.126, "step": 118540 }, { "epoch": 0.45123055959440633, "grad_norm": 0.1309593915939331, "learning_rate": 0.0005, "loss": 2.1146, "step": 118550 }, { "epoch": 0.45126862206252905, "grad_norm": 0.1179802417755127, "learning_rate": 0.0005, "loss": 2.1174, "step": 118560 }, { "epoch": 0.4513066845306517, "grad_norm": 0.11997191607952118, "learning_rate": 0.0005, "loss": 2.1132, "step": 118570 }, { "epoch": 0.4513447469987744, "grad_norm": 0.1290757805109024, "learning_rate": 0.0005, "loss": 2.1307, "step": 118580 }, { "epoch": 0.4513828094668971, "grad_norm": 0.12476909905672073, "learning_rate": 0.0005, "loss": 2.107, "step": 118590 }, { "epoch": 0.45142087193501973, "grad_norm": 0.1275877058506012, "learning_rate": 0.0005, "loss": 2.1195, "step": 118600 }, { "epoch": 0.45145893440314244, "grad_norm": 0.1198502779006958, "learning_rate": 0.0005, "loss": 2.1218, "step": 118610 }, { "epoch": 0.4514969968712651, "grad_norm": 0.13268321752548218, "learning_rate": 0.0005, "loss": 2.1085, "step": 118620 }, { "epoch": 0.4515350593393878, "grad_norm": 0.12470264732837677, "learning_rate": 0.0005, "loss": 2.1177, "step": 118630 }, { "epoch": 0.45157312180751047, "grad_norm": 0.12971359491348267, "learning_rate": 0.0005, "loss": 2.1115, "step": 118640 }, { "epoch": 0.4516111842756332, "grad_norm": 0.12227847427129745, "learning_rate": 0.0005, "loss": 2.1138, "step": 118650 }, { "epoch": 0.45164924674375584, "grad_norm": 0.12495315819978714, "learning_rate": 0.0005, "loss": 2.1317, "step": 118660 }, { "epoch": 0.45168730921187855, "grad_norm": 0.12900105118751526, "learning_rate": 0.0005, "loss": 2.1157, "step": 118670 }, { "epoch": 0.4517253716800012, "grad_norm": 0.13668887317180634, "learning_rate": 0.0005, "loss": 2.1263, "step": 118680 }, { "epoch": 0.4517634341481239, "grad_norm": 0.13190940022468567, "learning_rate": 0.0005, "loss": 2.1163, "step": 118690 }, { "epoch": 0.4518014966162466, "grad_norm": 0.1314956396818161, "learning_rate": 0.0005, "loss": 2.1271, "step": 118700 }, { "epoch": 0.4518395590843693, "grad_norm": 0.12434843927621841, "learning_rate": 0.0005, "loss": 2.1177, "step": 118710 }, { "epoch": 0.45187762155249195, "grad_norm": 0.12370478361845016, "learning_rate": 0.0005, "loss": 2.1151, "step": 118720 }, { "epoch": 0.4519156840206146, "grad_norm": 0.11864922940731049, "learning_rate": 0.0005, "loss": 2.1165, "step": 118730 }, { "epoch": 0.4519537464887373, "grad_norm": 0.14261092245578766, "learning_rate": 0.0005, "loss": 2.1046, "step": 118740 }, { "epoch": 0.45199180895686, "grad_norm": 0.1361154317855835, "learning_rate": 0.0005, "loss": 2.1104, "step": 118750 }, { "epoch": 0.4520298714249827, "grad_norm": 0.1354462206363678, "learning_rate": 0.0005, "loss": 2.1041, "step": 118760 }, { "epoch": 0.45206793389310534, "grad_norm": 0.12536115944385529, "learning_rate": 0.0005, "loss": 2.113, "step": 118770 }, { "epoch": 0.45210599636122806, "grad_norm": 0.14076842367649078, "learning_rate": 0.0005, "loss": 2.1154, "step": 118780 }, { "epoch": 0.4521440588293507, "grad_norm": 0.12235935777425766, "learning_rate": 0.0005, "loss": 2.1107, "step": 118790 }, { "epoch": 0.4521821212974734, "grad_norm": 0.13245408236980438, "learning_rate": 0.0005, "loss": 2.1078, "step": 118800 }, { "epoch": 0.4522201837655961, "grad_norm": 0.11906524002552032, "learning_rate": 0.0005, "loss": 2.1334, "step": 118810 }, { "epoch": 0.4522582462337188, "grad_norm": 0.1383557766675949, "learning_rate": 0.0005, "loss": 2.1179, "step": 118820 }, { "epoch": 0.45229630870184145, "grad_norm": 0.1393071711063385, "learning_rate": 0.0005, "loss": 2.1011, "step": 118830 }, { "epoch": 0.45233437116996417, "grad_norm": 0.12187449634075165, "learning_rate": 0.0005, "loss": 2.1076, "step": 118840 }, { "epoch": 0.4523724336380868, "grad_norm": 0.11439521610736847, "learning_rate": 0.0005, "loss": 2.1077, "step": 118850 }, { "epoch": 0.45241049610620954, "grad_norm": 0.12313267588615417, "learning_rate": 0.0005, "loss": 2.1105, "step": 118860 }, { "epoch": 0.4524485585743322, "grad_norm": 0.1316363662481308, "learning_rate": 0.0005, "loss": 2.1034, "step": 118870 }, { "epoch": 0.45248662104245485, "grad_norm": 0.13418705761432648, "learning_rate": 0.0005, "loss": 2.1143, "step": 118880 }, { "epoch": 0.45252468351057756, "grad_norm": 0.12311230599880219, "learning_rate": 0.0005, "loss": 2.1284, "step": 118890 }, { "epoch": 0.4525627459787002, "grad_norm": 0.12272609025239944, "learning_rate": 0.0005, "loss": 2.1248, "step": 118900 }, { "epoch": 0.45260080844682293, "grad_norm": 0.19879673421382904, "learning_rate": 0.0005, "loss": 2.118, "step": 118910 }, { "epoch": 0.4526388709149456, "grad_norm": 0.11573578417301178, "learning_rate": 0.0005, "loss": 2.1189, "step": 118920 }, { "epoch": 0.4526769333830683, "grad_norm": 0.12728090584278107, "learning_rate": 0.0005, "loss": 2.1185, "step": 118930 }, { "epoch": 0.45271499585119096, "grad_norm": 0.1399592161178589, "learning_rate": 0.0005, "loss": 2.1225, "step": 118940 }, { "epoch": 0.4527530583193137, "grad_norm": 0.1357944756746292, "learning_rate": 0.0005, "loss": 2.1164, "step": 118950 }, { "epoch": 0.45279112078743633, "grad_norm": 0.13509675860404968, "learning_rate": 0.0005, "loss": 2.1186, "step": 118960 }, { "epoch": 0.45282918325555904, "grad_norm": 0.13070659339427948, "learning_rate": 0.0005, "loss": 2.1115, "step": 118970 }, { "epoch": 0.4528672457236817, "grad_norm": 0.13457068800926208, "learning_rate": 0.0005, "loss": 2.1273, "step": 118980 }, { "epoch": 0.4529053081918044, "grad_norm": 0.13255858421325684, "learning_rate": 0.0005, "loss": 2.1235, "step": 118990 }, { "epoch": 0.45294337065992707, "grad_norm": 0.1382453590631485, "learning_rate": 0.0005, "loss": 2.1266, "step": 119000 }, { "epoch": 0.4529814331280498, "grad_norm": 0.12324276566505432, "learning_rate": 0.0005, "loss": 2.1109, "step": 119010 }, { "epoch": 0.45301949559617244, "grad_norm": 0.12160609662532806, "learning_rate": 0.0005, "loss": 2.1069, "step": 119020 }, { "epoch": 0.4530575580642951, "grad_norm": 0.13407763838768005, "learning_rate": 0.0005, "loss": 2.0978, "step": 119030 }, { "epoch": 0.4530956205324178, "grad_norm": 0.1184903234243393, "learning_rate": 0.0005, "loss": 2.1136, "step": 119040 }, { "epoch": 0.45313368300054047, "grad_norm": 0.13084222376346588, "learning_rate": 0.0005, "loss": 2.111, "step": 119050 }, { "epoch": 0.4531717454686632, "grad_norm": 0.13515977561473846, "learning_rate": 0.0005, "loss": 2.1111, "step": 119060 }, { "epoch": 0.45320980793678584, "grad_norm": 0.12019907683134079, "learning_rate": 0.0005, "loss": 2.1271, "step": 119070 }, { "epoch": 0.45324787040490855, "grad_norm": 0.13739806413650513, "learning_rate": 0.0005, "loss": 2.1073, "step": 119080 }, { "epoch": 0.4532859328730312, "grad_norm": 0.12706531584262848, "learning_rate": 0.0005, "loss": 2.1061, "step": 119090 }, { "epoch": 0.4533239953411539, "grad_norm": 0.13009122014045715, "learning_rate": 0.0005, "loss": 2.131, "step": 119100 }, { "epoch": 0.4533620578092766, "grad_norm": 0.11646008491516113, "learning_rate": 0.0005, "loss": 2.1205, "step": 119110 }, { "epoch": 0.4534001202773993, "grad_norm": 0.1327752023935318, "learning_rate": 0.0005, "loss": 2.1085, "step": 119120 }, { "epoch": 0.45343818274552194, "grad_norm": 0.12436755001544952, "learning_rate": 0.0005, "loss": 2.1127, "step": 119130 }, { "epoch": 0.45347624521364466, "grad_norm": 0.12094046920537949, "learning_rate": 0.0005, "loss": 2.1278, "step": 119140 }, { "epoch": 0.4535143076817673, "grad_norm": 0.11031058430671692, "learning_rate": 0.0005, "loss": 2.1342, "step": 119150 }, { "epoch": 0.45355237014988997, "grad_norm": 0.11060208082199097, "learning_rate": 0.0005, "loss": 2.1157, "step": 119160 }, { "epoch": 0.4535904326180127, "grad_norm": 0.1298583447933197, "learning_rate": 0.0005, "loss": 2.1148, "step": 119170 }, { "epoch": 0.45362849508613534, "grad_norm": 0.1342618614435196, "learning_rate": 0.0005, "loss": 2.1139, "step": 119180 }, { "epoch": 0.45366655755425805, "grad_norm": 0.12159561365842819, "learning_rate": 0.0005, "loss": 2.119, "step": 119190 }, { "epoch": 0.4537046200223807, "grad_norm": 0.12042984366416931, "learning_rate": 0.0005, "loss": 2.127, "step": 119200 }, { "epoch": 0.4537426824905034, "grad_norm": 0.13506831228733063, "learning_rate": 0.0005, "loss": 2.1248, "step": 119210 }, { "epoch": 0.4537807449586261, "grad_norm": 0.12235353887081146, "learning_rate": 0.0005, "loss": 2.1021, "step": 119220 }, { "epoch": 0.4538188074267488, "grad_norm": 0.12120179831981659, "learning_rate": 0.0005, "loss": 2.11, "step": 119230 }, { "epoch": 0.45385686989487145, "grad_norm": 0.12782564759254456, "learning_rate": 0.0005, "loss": 2.1183, "step": 119240 }, { "epoch": 0.45389493236299416, "grad_norm": 0.12691909074783325, "learning_rate": 0.0005, "loss": 2.1035, "step": 119250 }, { "epoch": 0.4539329948311168, "grad_norm": 0.12976911664009094, "learning_rate": 0.0005, "loss": 2.1298, "step": 119260 }, { "epoch": 0.45397105729923953, "grad_norm": 0.12581056356430054, "learning_rate": 0.0005, "loss": 2.1091, "step": 119270 }, { "epoch": 0.4540091197673622, "grad_norm": 0.11505091190338135, "learning_rate": 0.0005, "loss": 2.1229, "step": 119280 }, { "epoch": 0.4540471822354849, "grad_norm": 0.1203627660870552, "learning_rate": 0.0005, "loss": 2.1116, "step": 119290 }, { "epoch": 0.45408524470360756, "grad_norm": 0.13732272386550903, "learning_rate": 0.0005, "loss": 2.1382, "step": 119300 }, { "epoch": 0.4541233071717302, "grad_norm": 0.12399745732545853, "learning_rate": 0.0005, "loss": 2.118, "step": 119310 }, { "epoch": 0.45416136963985293, "grad_norm": 0.13304689526557922, "learning_rate": 0.0005, "loss": 2.1259, "step": 119320 }, { "epoch": 0.4541994321079756, "grad_norm": 0.12508970499038696, "learning_rate": 0.0005, "loss": 2.1087, "step": 119330 }, { "epoch": 0.4542374945760983, "grad_norm": 0.12920893728733063, "learning_rate": 0.0005, "loss": 2.1241, "step": 119340 }, { "epoch": 0.45427555704422096, "grad_norm": 0.12676669657230377, "learning_rate": 0.0005, "loss": 2.1073, "step": 119350 }, { "epoch": 0.45431361951234367, "grad_norm": 0.1385577917098999, "learning_rate": 0.0005, "loss": 2.1087, "step": 119360 }, { "epoch": 0.4543516819804663, "grad_norm": 0.13354946672916412, "learning_rate": 0.0005, "loss": 2.1165, "step": 119370 }, { "epoch": 0.45438974444858904, "grad_norm": 0.11637165397405624, "learning_rate": 0.0005, "loss": 2.0929, "step": 119380 }, { "epoch": 0.4544278069167117, "grad_norm": 0.12723296880722046, "learning_rate": 0.0005, "loss": 2.1104, "step": 119390 }, { "epoch": 0.4544658693848344, "grad_norm": 0.1271418035030365, "learning_rate": 0.0005, "loss": 2.1048, "step": 119400 }, { "epoch": 0.45450393185295707, "grad_norm": 0.1238766461610794, "learning_rate": 0.0005, "loss": 2.1163, "step": 119410 }, { "epoch": 0.4545419943210798, "grad_norm": 0.12604381144046783, "learning_rate": 0.0005, "loss": 2.1144, "step": 119420 }, { "epoch": 0.45458005678920244, "grad_norm": 0.12242569029331207, "learning_rate": 0.0005, "loss": 2.1164, "step": 119430 }, { "epoch": 0.45461811925732515, "grad_norm": 0.12519843876361847, "learning_rate": 0.0005, "loss": 2.1088, "step": 119440 }, { "epoch": 0.4546561817254478, "grad_norm": 0.1285882592201233, "learning_rate": 0.0005, "loss": 2.0992, "step": 119450 }, { "epoch": 0.45469424419357046, "grad_norm": 0.13689425587654114, "learning_rate": 0.0005, "loss": 2.1074, "step": 119460 }, { "epoch": 0.4547323066616932, "grad_norm": 0.11747127771377563, "learning_rate": 0.0005, "loss": 2.1132, "step": 119470 }, { "epoch": 0.45477036912981583, "grad_norm": 0.1399627923965454, "learning_rate": 0.0005, "loss": 2.1144, "step": 119480 }, { "epoch": 0.45480843159793855, "grad_norm": 0.1311253160238266, "learning_rate": 0.0005, "loss": 2.1077, "step": 119490 }, { "epoch": 0.4548464940660612, "grad_norm": 0.1293565183877945, "learning_rate": 0.0005, "loss": 2.0939, "step": 119500 }, { "epoch": 0.4548845565341839, "grad_norm": 0.13139833509922028, "learning_rate": 0.0005, "loss": 2.1202, "step": 119510 }, { "epoch": 0.45492261900230657, "grad_norm": 0.13386030495166779, "learning_rate": 0.0005, "loss": 2.1082, "step": 119520 }, { "epoch": 0.4549606814704293, "grad_norm": 0.12311594933271408, "learning_rate": 0.0005, "loss": 2.1154, "step": 119530 }, { "epoch": 0.45499874393855194, "grad_norm": 0.11787018179893494, "learning_rate": 0.0005, "loss": 2.12, "step": 119540 }, { "epoch": 0.45503680640667465, "grad_norm": 0.13005182147026062, "learning_rate": 0.0005, "loss": 2.1028, "step": 119550 }, { "epoch": 0.4550748688747973, "grad_norm": 0.13512970507144928, "learning_rate": 0.0005, "loss": 2.1132, "step": 119560 }, { "epoch": 0.45511293134292, "grad_norm": 0.1321474313735962, "learning_rate": 0.0005, "loss": 2.1312, "step": 119570 }, { "epoch": 0.4551509938110427, "grad_norm": 0.12341282516717911, "learning_rate": 0.0005, "loss": 2.1119, "step": 119580 }, { "epoch": 0.4551890562791654, "grad_norm": 0.1225198283791542, "learning_rate": 0.0005, "loss": 2.1229, "step": 119590 }, { "epoch": 0.45522711874728805, "grad_norm": 0.12121542543172836, "learning_rate": 0.0005, "loss": 2.1093, "step": 119600 }, { "epoch": 0.4552651812154107, "grad_norm": 0.12483129650354385, "learning_rate": 0.0005, "loss": 2.1201, "step": 119610 }, { "epoch": 0.4553032436835334, "grad_norm": 0.11805702745914459, "learning_rate": 0.0005, "loss": 2.1139, "step": 119620 }, { "epoch": 0.4553413061516561, "grad_norm": 0.12919245660305023, "learning_rate": 0.0005, "loss": 2.1272, "step": 119630 }, { "epoch": 0.4553793686197788, "grad_norm": 0.14529746770858765, "learning_rate": 0.0005, "loss": 2.1187, "step": 119640 }, { "epoch": 0.45541743108790145, "grad_norm": 0.13631920516490936, "learning_rate": 0.0005, "loss": 2.1215, "step": 119650 }, { "epoch": 0.45545549355602416, "grad_norm": 0.1259685754776001, "learning_rate": 0.0005, "loss": 2.1186, "step": 119660 }, { "epoch": 0.4554935560241468, "grad_norm": 0.12155263125896454, "learning_rate": 0.0005, "loss": 2.1213, "step": 119670 }, { "epoch": 0.45553161849226953, "grad_norm": 0.1261385977268219, "learning_rate": 0.0005, "loss": 2.1167, "step": 119680 }, { "epoch": 0.4555696809603922, "grad_norm": 0.11714118719100952, "learning_rate": 0.0005, "loss": 2.1151, "step": 119690 }, { "epoch": 0.4556077434285149, "grad_norm": 0.12431380152702332, "learning_rate": 0.0005, "loss": 2.1202, "step": 119700 }, { "epoch": 0.45564580589663756, "grad_norm": 0.13070593774318695, "learning_rate": 0.0005, "loss": 2.1225, "step": 119710 }, { "epoch": 0.45568386836476027, "grad_norm": 0.12562495470046997, "learning_rate": 0.0005, "loss": 2.1159, "step": 119720 }, { "epoch": 0.4557219308328829, "grad_norm": 0.11666475981473923, "learning_rate": 0.0005, "loss": 2.1157, "step": 119730 }, { "epoch": 0.4557599933010056, "grad_norm": 0.1263495534658432, "learning_rate": 0.0005, "loss": 2.1256, "step": 119740 }, { "epoch": 0.4557980557691283, "grad_norm": 0.12585152685642242, "learning_rate": 0.0005, "loss": 2.1216, "step": 119750 }, { "epoch": 0.45583611823725095, "grad_norm": 0.14227187633514404, "learning_rate": 0.0005, "loss": 2.1036, "step": 119760 }, { "epoch": 0.45587418070537367, "grad_norm": 0.22283364832401276, "learning_rate": 0.0005, "loss": 2.1074, "step": 119770 }, { "epoch": 0.4559122431734963, "grad_norm": 0.12294365465641022, "learning_rate": 0.0005, "loss": 2.1134, "step": 119780 }, { "epoch": 0.45595030564161904, "grad_norm": 0.11970822513103485, "learning_rate": 0.0005, "loss": 2.1277, "step": 119790 }, { "epoch": 0.4559883681097417, "grad_norm": 0.12036450952291489, "learning_rate": 0.0005, "loss": 2.1103, "step": 119800 }, { "epoch": 0.4560264305778644, "grad_norm": 0.1168990209698677, "learning_rate": 0.0005, "loss": 2.1158, "step": 119810 }, { "epoch": 0.45606449304598706, "grad_norm": 0.12877270579338074, "learning_rate": 0.0005, "loss": 2.1249, "step": 119820 }, { "epoch": 0.4561025555141098, "grad_norm": 0.11663218587636948, "learning_rate": 0.0005, "loss": 2.108, "step": 119830 }, { "epoch": 0.45614061798223243, "grad_norm": 0.12282679975032806, "learning_rate": 0.0005, "loss": 2.1071, "step": 119840 }, { "epoch": 0.45617868045035515, "grad_norm": 0.12231219559907913, "learning_rate": 0.0005, "loss": 2.1183, "step": 119850 }, { "epoch": 0.4562167429184778, "grad_norm": 0.11906345933675766, "learning_rate": 0.0005, "loss": 2.1181, "step": 119860 }, { "epoch": 0.4562548053866005, "grad_norm": 0.1266046166419983, "learning_rate": 0.0005, "loss": 2.1279, "step": 119870 }, { "epoch": 0.45629286785472317, "grad_norm": 0.1269948035478592, "learning_rate": 0.0005, "loss": 2.111, "step": 119880 }, { "epoch": 0.45633093032284583, "grad_norm": 0.12274617701768875, "learning_rate": 0.0005, "loss": 2.1254, "step": 119890 }, { "epoch": 0.45636899279096854, "grad_norm": 0.13784544169902802, "learning_rate": 0.0005, "loss": 2.115, "step": 119900 }, { "epoch": 0.4564070552590912, "grad_norm": 0.1259078085422516, "learning_rate": 0.0005, "loss": 2.1147, "step": 119910 }, { "epoch": 0.4564451177272139, "grad_norm": 0.14160633087158203, "learning_rate": 0.0005, "loss": 2.119, "step": 119920 }, { "epoch": 0.45648318019533657, "grad_norm": 0.12463352829217911, "learning_rate": 0.0005, "loss": 2.1084, "step": 119930 }, { "epoch": 0.4565212426634593, "grad_norm": 0.1392524242401123, "learning_rate": 0.0005, "loss": 2.1304, "step": 119940 }, { "epoch": 0.45655930513158194, "grad_norm": 0.12912337481975555, "learning_rate": 0.0005, "loss": 2.1133, "step": 119950 }, { "epoch": 0.45659736759970465, "grad_norm": 0.15192027390003204, "learning_rate": 0.0005, "loss": 2.1073, "step": 119960 }, { "epoch": 0.4566354300678273, "grad_norm": 0.13262653350830078, "learning_rate": 0.0005, "loss": 2.1253, "step": 119970 }, { "epoch": 0.45667349253595, "grad_norm": 0.12109184265136719, "learning_rate": 0.0005, "loss": 2.1185, "step": 119980 }, { "epoch": 0.4567115550040727, "grad_norm": 0.1148374006152153, "learning_rate": 0.0005, "loss": 2.1103, "step": 119990 }, { "epoch": 0.4567496174721954, "grad_norm": 0.11489134281873703, "learning_rate": 0.0005, "loss": 2.1197, "step": 120000 }, { "epoch": 0.45678767994031805, "grad_norm": 0.11090307682752609, "learning_rate": 0.0005, "loss": 2.1197, "step": 120010 }, { "epoch": 0.45682574240844076, "grad_norm": 0.12963759899139404, "learning_rate": 0.0005, "loss": 2.1202, "step": 120020 }, { "epoch": 0.4568638048765634, "grad_norm": 0.12337867170572281, "learning_rate": 0.0005, "loss": 2.1112, "step": 120030 }, { "epoch": 0.4569018673446861, "grad_norm": 0.12097255140542984, "learning_rate": 0.0005, "loss": 2.1197, "step": 120040 }, { "epoch": 0.4569399298128088, "grad_norm": 0.1328166127204895, "learning_rate": 0.0005, "loss": 2.1085, "step": 120050 }, { "epoch": 0.45697799228093144, "grad_norm": 0.12114717811346054, "learning_rate": 0.0005, "loss": 2.1142, "step": 120060 }, { "epoch": 0.45701605474905416, "grad_norm": 0.13208146393299103, "learning_rate": 0.0005, "loss": 2.1105, "step": 120070 }, { "epoch": 0.4570541172171768, "grad_norm": 0.12861287593841553, "learning_rate": 0.0005, "loss": 2.1236, "step": 120080 }, { "epoch": 0.4570921796852995, "grad_norm": 0.12194353342056274, "learning_rate": 0.0005, "loss": 2.119, "step": 120090 }, { "epoch": 0.4571302421534222, "grad_norm": 0.13183648884296417, "learning_rate": 0.0005, "loss": 2.1132, "step": 120100 }, { "epoch": 0.4571683046215449, "grad_norm": 0.12721547484397888, "learning_rate": 0.0005, "loss": 2.1144, "step": 120110 }, { "epoch": 0.45720636708966755, "grad_norm": 0.11995424330234528, "learning_rate": 0.0005, "loss": 2.1203, "step": 120120 }, { "epoch": 0.45724442955779027, "grad_norm": 0.12486982345581055, "learning_rate": 0.0005, "loss": 2.0977, "step": 120130 }, { "epoch": 0.4572824920259129, "grad_norm": 0.12625457346439362, "learning_rate": 0.0005, "loss": 2.1169, "step": 120140 }, { "epoch": 0.45732055449403564, "grad_norm": 0.1323632448911667, "learning_rate": 0.0005, "loss": 2.1028, "step": 120150 }, { "epoch": 0.4573586169621583, "grad_norm": 0.11913406103849411, "learning_rate": 0.0005, "loss": 2.1155, "step": 120160 }, { "epoch": 0.45739667943028095, "grad_norm": 0.13413356244564056, "learning_rate": 0.0005, "loss": 2.1013, "step": 120170 }, { "epoch": 0.45743474189840366, "grad_norm": 0.14971719682216644, "learning_rate": 0.0005, "loss": 2.1206, "step": 120180 }, { "epoch": 0.4574728043665263, "grad_norm": 0.13183917105197906, "learning_rate": 0.0005, "loss": 2.1204, "step": 120190 }, { "epoch": 0.45751086683464903, "grad_norm": 0.12312076985836029, "learning_rate": 0.0005, "loss": 2.1118, "step": 120200 }, { "epoch": 0.4575489293027717, "grad_norm": 0.11903540045022964, "learning_rate": 0.0005, "loss": 2.13, "step": 120210 }, { "epoch": 0.4575869917708944, "grad_norm": 0.12743575870990753, "learning_rate": 0.0005, "loss": 2.1044, "step": 120220 }, { "epoch": 0.45762505423901706, "grad_norm": 0.11738581955432892, "learning_rate": 0.0005, "loss": 2.1138, "step": 120230 }, { "epoch": 0.45766311670713977, "grad_norm": 0.13653422892093658, "learning_rate": 0.0005, "loss": 2.1281, "step": 120240 }, { "epoch": 0.45770117917526243, "grad_norm": 0.12781554460525513, "learning_rate": 0.0005, "loss": 2.1076, "step": 120250 }, { "epoch": 0.45773924164338514, "grad_norm": 0.14034056663513184, "learning_rate": 0.0005, "loss": 2.1209, "step": 120260 }, { "epoch": 0.4577773041115078, "grad_norm": 0.12680433690547943, "learning_rate": 0.0005, "loss": 2.1181, "step": 120270 }, { "epoch": 0.4578153665796305, "grad_norm": 0.13826939463615417, "learning_rate": 0.0005, "loss": 2.1348, "step": 120280 }, { "epoch": 0.45785342904775317, "grad_norm": 0.1185794547200203, "learning_rate": 0.0005, "loss": 2.1201, "step": 120290 }, { "epoch": 0.4578914915158759, "grad_norm": 0.12192360311746597, "learning_rate": 0.0005, "loss": 2.0962, "step": 120300 }, { "epoch": 0.45792955398399854, "grad_norm": 0.14247040450572968, "learning_rate": 0.0005, "loss": 2.1049, "step": 120310 }, { "epoch": 0.4579676164521212, "grad_norm": 0.1364268809556961, "learning_rate": 0.0005, "loss": 2.1167, "step": 120320 }, { "epoch": 0.4580056789202439, "grad_norm": 0.13730765879154205, "learning_rate": 0.0005, "loss": 2.1307, "step": 120330 }, { "epoch": 0.45804374138836657, "grad_norm": 0.12975889444351196, "learning_rate": 0.0005, "loss": 2.1199, "step": 120340 }, { "epoch": 0.4580818038564893, "grad_norm": 0.12218843400478363, "learning_rate": 0.0005, "loss": 2.1178, "step": 120350 }, { "epoch": 0.45811986632461194, "grad_norm": 0.11997194588184357, "learning_rate": 0.0005, "loss": 2.119, "step": 120360 }, { "epoch": 0.45815792879273465, "grad_norm": 0.1225312352180481, "learning_rate": 0.0005, "loss": 2.1224, "step": 120370 }, { "epoch": 0.4581959912608573, "grad_norm": 0.1350684016942978, "learning_rate": 0.0005, "loss": 2.1216, "step": 120380 }, { "epoch": 0.45823405372898, "grad_norm": 0.11670040339231491, "learning_rate": 0.0005, "loss": 2.1154, "step": 120390 }, { "epoch": 0.4582721161971027, "grad_norm": 0.1220269724726677, "learning_rate": 0.0005, "loss": 2.1154, "step": 120400 }, { "epoch": 0.4583101786652254, "grad_norm": 0.13793805241584778, "learning_rate": 0.0005, "loss": 2.1157, "step": 120410 }, { "epoch": 0.45834824113334804, "grad_norm": 0.130221426486969, "learning_rate": 0.0005, "loss": 2.124, "step": 120420 }, { "epoch": 0.45838630360147076, "grad_norm": 0.12842504680156708, "learning_rate": 0.0005, "loss": 2.1187, "step": 120430 }, { "epoch": 0.4584243660695934, "grad_norm": 0.133062481880188, "learning_rate": 0.0005, "loss": 2.116, "step": 120440 }, { "epoch": 0.4584624285377161, "grad_norm": 0.12013937532901764, "learning_rate": 0.0005, "loss": 2.1046, "step": 120450 }, { "epoch": 0.4585004910058388, "grad_norm": 0.11916226148605347, "learning_rate": 0.0005, "loss": 2.1153, "step": 120460 }, { "epoch": 0.45853855347396144, "grad_norm": 0.11697036027908325, "learning_rate": 0.0005, "loss": 2.1095, "step": 120470 }, { "epoch": 0.45857661594208415, "grad_norm": 0.11921066790819168, "learning_rate": 0.0005, "loss": 2.1225, "step": 120480 }, { "epoch": 0.4586146784102068, "grad_norm": 0.12394513189792633, "learning_rate": 0.0005, "loss": 2.1161, "step": 120490 }, { "epoch": 0.4586527408783295, "grad_norm": 0.11950384080410004, "learning_rate": 0.0005, "loss": 2.1303, "step": 120500 }, { "epoch": 0.4586908033464522, "grad_norm": 0.133575439453125, "learning_rate": 0.0005, "loss": 2.1226, "step": 120510 }, { "epoch": 0.4587288658145749, "grad_norm": 0.12195932120084763, "learning_rate": 0.0005, "loss": 2.1346, "step": 120520 }, { "epoch": 0.45876692828269755, "grad_norm": 0.13978615403175354, "learning_rate": 0.0005, "loss": 2.1056, "step": 120530 }, { "epoch": 0.45880499075082026, "grad_norm": 0.11702223867177963, "learning_rate": 0.0005, "loss": 2.1159, "step": 120540 }, { "epoch": 0.4588430532189429, "grad_norm": 0.15952906012535095, "learning_rate": 0.0005, "loss": 2.1251, "step": 120550 }, { "epoch": 0.45888111568706563, "grad_norm": 0.1334410309791565, "learning_rate": 0.0005, "loss": 2.119, "step": 120560 }, { "epoch": 0.4589191781551883, "grad_norm": 0.13073650002479553, "learning_rate": 0.0005, "loss": 2.1201, "step": 120570 }, { "epoch": 0.458957240623311, "grad_norm": 0.12061751633882523, "learning_rate": 0.0005, "loss": 2.1277, "step": 120580 }, { "epoch": 0.45899530309143366, "grad_norm": 0.12560215592384338, "learning_rate": 0.0005, "loss": 2.1225, "step": 120590 }, { "epoch": 0.4590333655595563, "grad_norm": 0.13219398260116577, "learning_rate": 0.0005, "loss": 2.1368, "step": 120600 }, { "epoch": 0.45907142802767903, "grad_norm": 0.13976603746414185, "learning_rate": 0.0005, "loss": 2.1078, "step": 120610 }, { "epoch": 0.4591094904958017, "grad_norm": 0.12265753746032715, "learning_rate": 0.0005, "loss": 2.1231, "step": 120620 }, { "epoch": 0.4591475529639244, "grad_norm": 0.1313551366329193, "learning_rate": 0.0005, "loss": 2.1221, "step": 120630 }, { "epoch": 0.45918561543204706, "grad_norm": 0.11972445994615555, "learning_rate": 0.0005, "loss": 2.1282, "step": 120640 }, { "epoch": 0.45922367790016977, "grad_norm": 0.13018518686294556, "learning_rate": 0.0005, "loss": 2.1129, "step": 120650 }, { "epoch": 0.4592617403682924, "grad_norm": 0.14095982909202576, "learning_rate": 0.0005, "loss": 2.1156, "step": 120660 }, { "epoch": 0.45929980283641514, "grad_norm": 0.11395607143640518, "learning_rate": 0.0005, "loss": 2.1099, "step": 120670 }, { "epoch": 0.4593378653045378, "grad_norm": 0.1700298935174942, "learning_rate": 0.0005, "loss": 2.1121, "step": 120680 }, { "epoch": 0.4593759277726605, "grad_norm": 0.1294533908367157, "learning_rate": 0.0005, "loss": 2.1306, "step": 120690 }, { "epoch": 0.45941399024078317, "grad_norm": 0.12129295617341995, "learning_rate": 0.0005, "loss": 2.1136, "step": 120700 }, { "epoch": 0.4594520527089059, "grad_norm": 0.12236347049474716, "learning_rate": 0.0005, "loss": 2.1094, "step": 120710 }, { "epoch": 0.45949011517702854, "grad_norm": 0.12837907671928406, "learning_rate": 0.0005, "loss": 2.125, "step": 120720 }, { "epoch": 0.45952817764515125, "grad_norm": 0.12483696639537811, "learning_rate": 0.0005, "loss": 2.1091, "step": 120730 }, { "epoch": 0.4595662401132739, "grad_norm": 0.13650977611541748, "learning_rate": 0.0005, "loss": 2.1193, "step": 120740 }, { "epoch": 0.45960430258139656, "grad_norm": 0.13617606461048126, "learning_rate": 0.0005, "loss": 2.1161, "step": 120750 }, { "epoch": 0.4596423650495193, "grad_norm": 0.13370659947395325, "learning_rate": 0.0005, "loss": 2.1221, "step": 120760 }, { "epoch": 0.45968042751764193, "grad_norm": 0.1324436515569687, "learning_rate": 0.0005, "loss": 2.1221, "step": 120770 }, { "epoch": 0.45971848998576464, "grad_norm": 0.11624017357826233, "learning_rate": 0.0005, "loss": 2.1194, "step": 120780 }, { "epoch": 0.4597565524538873, "grad_norm": 0.12417466193437576, "learning_rate": 0.0005, "loss": 2.1165, "step": 120790 }, { "epoch": 0.45979461492201, "grad_norm": 0.13375669717788696, "learning_rate": 0.0005, "loss": 2.1043, "step": 120800 }, { "epoch": 0.45983267739013267, "grad_norm": 0.12934353947639465, "learning_rate": 0.0005, "loss": 2.1103, "step": 120810 }, { "epoch": 0.4598707398582554, "grad_norm": 0.12061566859483719, "learning_rate": 0.0005, "loss": 2.1115, "step": 120820 }, { "epoch": 0.45990880232637804, "grad_norm": 0.1487627923488617, "learning_rate": 0.0005, "loss": 2.1233, "step": 120830 }, { "epoch": 0.45994686479450075, "grad_norm": 0.1293063908815384, "learning_rate": 0.0005, "loss": 2.1134, "step": 120840 }, { "epoch": 0.4599849272626234, "grad_norm": 0.12316817790269852, "learning_rate": 0.0005, "loss": 2.1197, "step": 120850 }, { "epoch": 0.4600229897307461, "grad_norm": 0.128492072224617, "learning_rate": 0.0005, "loss": 2.1029, "step": 120860 }, { "epoch": 0.4600610521988688, "grad_norm": 0.13701295852661133, "learning_rate": 0.0005, "loss": 2.1328, "step": 120870 }, { "epoch": 0.4600991146669915, "grad_norm": 0.12807445228099823, "learning_rate": 0.0005, "loss": 2.1154, "step": 120880 }, { "epoch": 0.46013717713511415, "grad_norm": 0.12884552776813507, "learning_rate": 0.0005, "loss": 2.1232, "step": 120890 }, { "epoch": 0.4601752396032368, "grad_norm": 0.13927030563354492, "learning_rate": 0.0005, "loss": 2.1101, "step": 120900 }, { "epoch": 0.4602133020713595, "grad_norm": 0.14055320620536804, "learning_rate": 0.0005, "loss": 2.1189, "step": 120910 }, { "epoch": 0.4602513645394822, "grad_norm": 0.1349189579486847, "learning_rate": 0.0005, "loss": 2.1145, "step": 120920 }, { "epoch": 0.4602894270076049, "grad_norm": 0.1196657121181488, "learning_rate": 0.0005, "loss": 2.1164, "step": 120930 }, { "epoch": 0.46032748947572755, "grad_norm": 0.11339619010686874, "learning_rate": 0.0005, "loss": 2.0867, "step": 120940 }, { "epoch": 0.46036555194385026, "grad_norm": 0.13684843480587006, "learning_rate": 0.0005, "loss": 2.1166, "step": 120950 }, { "epoch": 0.4604036144119729, "grad_norm": 0.11891072243452072, "learning_rate": 0.0005, "loss": 2.1209, "step": 120960 }, { "epoch": 0.46044167688009563, "grad_norm": 0.1228720098733902, "learning_rate": 0.0005, "loss": 2.1127, "step": 120970 }, { "epoch": 0.4604797393482183, "grad_norm": 0.11489354074001312, "learning_rate": 0.0005, "loss": 2.1115, "step": 120980 }, { "epoch": 0.460517801816341, "grad_norm": 0.1338176280260086, "learning_rate": 0.0005, "loss": 2.1298, "step": 120990 }, { "epoch": 0.46055586428446366, "grad_norm": 0.12991927564144135, "learning_rate": 0.0005, "loss": 2.1131, "step": 121000 }, { "epoch": 0.46059392675258637, "grad_norm": 0.13514523208141327, "learning_rate": 0.0005, "loss": 2.1103, "step": 121010 }, { "epoch": 0.460631989220709, "grad_norm": 0.12413940578699112, "learning_rate": 0.0005, "loss": 2.1236, "step": 121020 }, { "epoch": 0.4606700516888317, "grad_norm": 0.12618406116962433, "learning_rate": 0.0005, "loss": 2.1131, "step": 121030 }, { "epoch": 0.4607081141569544, "grad_norm": 0.11842025816440582, "learning_rate": 0.0005, "loss": 2.1104, "step": 121040 }, { "epoch": 0.46074617662507705, "grad_norm": 0.13303187489509583, "learning_rate": 0.0005, "loss": 2.118, "step": 121050 }, { "epoch": 0.46078423909319977, "grad_norm": 0.14305202662944794, "learning_rate": 0.0005, "loss": 2.116, "step": 121060 }, { "epoch": 0.4608223015613224, "grad_norm": 0.1284281462430954, "learning_rate": 0.0005, "loss": 2.1108, "step": 121070 }, { "epoch": 0.46086036402944514, "grad_norm": 0.13640108704566956, "learning_rate": 0.0005, "loss": 2.1268, "step": 121080 }, { "epoch": 0.4608984264975678, "grad_norm": 0.11827525496482849, "learning_rate": 0.0005, "loss": 2.1326, "step": 121090 }, { "epoch": 0.4609364889656905, "grad_norm": 0.1367952823638916, "learning_rate": 0.0005, "loss": 2.1127, "step": 121100 }, { "epoch": 0.46097455143381316, "grad_norm": 0.12315177172422409, "learning_rate": 0.0005, "loss": 2.1078, "step": 121110 }, { "epoch": 0.4610126139019359, "grad_norm": 0.12004311382770538, "learning_rate": 0.0005, "loss": 2.1285, "step": 121120 }, { "epoch": 0.46105067637005853, "grad_norm": 0.11687660217285156, "learning_rate": 0.0005, "loss": 2.1229, "step": 121130 }, { "epoch": 0.46108873883818124, "grad_norm": 0.11443885415792465, "learning_rate": 0.0005, "loss": 2.1167, "step": 121140 }, { "epoch": 0.4611268013063039, "grad_norm": 0.13030986487865448, "learning_rate": 0.0005, "loss": 2.1228, "step": 121150 }, { "epoch": 0.4611648637744266, "grad_norm": 0.1401316374540329, "learning_rate": 0.0005, "loss": 2.1213, "step": 121160 }, { "epoch": 0.46120292624254927, "grad_norm": 0.12074064463376999, "learning_rate": 0.0005, "loss": 2.1009, "step": 121170 }, { "epoch": 0.46124098871067193, "grad_norm": 0.11939749121665955, "learning_rate": 0.0005, "loss": 2.1166, "step": 121180 }, { "epoch": 0.46127905117879464, "grad_norm": 0.123379185795784, "learning_rate": 0.0005, "loss": 2.1167, "step": 121190 }, { "epoch": 0.4613171136469173, "grad_norm": 0.1268387883901596, "learning_rate": 0.0005, "loss": 2.1187, "step": 121200 }, { "epoch": 0.46135517611504, "grad_norm": 0.14271396398544312, "learning_rate": 0.0005, "loss": 2.1183, "step": 121210 }, { "epoch": 0.46139323858316267, "grad_norm": 0.11226682364940643, "learning_rate": 0.0005, "loss": 2.1031, "step": 121220 }, { "epoch": 0.4614313010512854, "grad_norm": 0.12411001324653625, "learning_rate": 0.0005, "loss": 2.1098, "step": 121230 }, { "epoch": 0.46146936351940804, "grad_norm": 0.12751705944538116, "learning_rate": 0.0005, "loss": 2.1295, "step": 121240 }, { "epoch": 0.46150742598753075, "grad_norm": 0.11743474751710892, "learning_rate": 0.0005, "loss": 2.1015, "step": 121250 }, { "epoch": 0.4615454884556534, "grad_norm": 0.1255132555961609, "learning_rate": 0.0005, "loss": 2.1376, "step": 121260 }, { "epoch": 0.4615835509237761, "grad_norm": 0.12986207008361816, "learning_rate": 0.0005, "loss": 2.131, "step": 121270 }, { "epoch": 0.4616216133918988, "grad_norm": 0.12577657401561737, "learning_rate": 0.0005, "loss": 2.1254, "step": 121280 }, { "epoch": 0.4616596758600215, "grad_norm": 0.15010912716388702, "learning_rate": 0.0005, "loss": 2.1391, "step": 121290 }, { "epoch": 0.46169773832814415, "grad_norm": 0.13126231729984283, "learning_rate": 0.0005, "loss": 2.1174, "step": 121300 }, { "epoch": 0.46173580079626686, "grad_norm": 0.12573876976966858, "learning_rate": 0.0005, "loss": 2.1231, "step": 121310 }, { "epoch": 0.4617738632643895, "grad_norm": 0.12809449434280396, "learning_rate": 0.0005, "loss": 2.136, "step": 121320 }, { "epoch": 0.4618119257325122, "grad_norm": 0.11522986739873886, "learning_rate": 0.0005, "loss": 2.1138, "step": 121330 }, { "epoch": 0.4618499882006349, "grad_norm": 0.1317320615053177, "learning_rate": 0.0005, "loss": 2.1177, "step": 121340 }, { "epoch": 0.46188805066875754, "grad_norm": 0.1397280991077423, "learning_rate": 0.0005, "loss": 2.1222, "step": 121350 }, { "epoch": 0.46192611313688026, "grad_norm": 0.12934623658657074, "learning_rate": 0.0005, "loss": 2.1243, "step": 121360 }, { "epoch": 0.4619641756050029, "grad_norm": 0.12387774139642715, "learning_rate": 0.0005, "loss": 2.1227, "step": 121370 }, { "epoch": 0.4620022380731256, "grad_norm": 0.12394656985998154, "learning_rate": 0.0005, "loss": 2.1277, "step": 121380 }, { "epoch": 0.4620403005412483, "grad_norm": 0.12481960654258728, "learning_rate": 0.0005, "loss": 2.1177, "step": 121390 }, { "epoch": 0.462078363009371, "grad_norm": 0.12520074844360352, "learning_rate": 0.0005, "loss": 2.1117, "step": 121400 }, { "epoch": 0.46211642547749365, "grad_norm": 0.12304385006427765, "learning_rate": 0.0005, "loss": 2.1243, "step": 121410 }, { "epoch": 0.46215448794561637, "grad_norm": 0.1355585902929306, "learning_rate": 0.0005, "loss": 2.1046, "step": 121420 }, { "epoch": 0.462192550413739, "grad_norm": 0.11432173103094101, "learning_rate": 0.0005, "loss": 2.1312, "step": 121430 }, { "epoch": 0.46223061288186174, "grad_norm": 0.1187356635928154, "learning_rate": 0.0005, "loss": 2.1207, "step": 121440 }, { "epoch": 0.4622686753499844, "grad_norm": 0.1240907609462738, "learning_rate": 0.0005, "loss": 2.1039, "step": 121450 }, { "epoch": 0.46230673781810705, "grad_norm": 0.12461048364639282, "learning_rate": 0.0005, "loss": 2.1136, "step": 121460 }, { "epoch": 0.46234480028622976, "grad_norm": 0.13123613595962524, "learning_rate": 0.0005, "loss": 2.1203, "step": 121470 }, { "epoch": 0.4623828627543524, "grad_norm": 0.13103175163269043, "learning_rate": 0.0005, "loss": 2.0967, "step": 121480 }, { "epoch": 0.46242092522247513, "grad_norm": 0.12875773012638092, "learning_rate": 0.0005, "loss": 2.1181, "step": 121490 }, { "epoch": 0.4624589876905978, "grad_norm": 0.13313820958137512, "learning_rate": 0.0005, "loss": 2.101, "step": 121500 }, { "epoch": 0.4624970501587205, "grad_norm": 0.11926737427711487, "learning_rate": 0.0005, "loss": 2.1087, "step": 121510 }, { "epoch": 0.46253511262684316, "grad_norm": 0.11945221573114395, "learning_rate": 0.0005, "loss": 2.1375, "step": 121520 }, { "epoch": 0.46257317509496587, "grad_norm": 0.13894334435462952, "learning_rate": 0.0005, "loss": 2.0995, "step": 121530 }, { "epoch": 0.46261123756308853, "grad_norm": 0.13981805741786957, "learning_rate": 0.0005, "loss": 2.1305, "step": 121540 }, { "epoch": 0.46264930003121124, "grad_norm": 0.12328756600618362, "learning_rate": 0.0005, "loss": 2.122, "step": 121550 }, { "epoch": 0.4626873624993339, "grad_norm": 0.1285262405872345, "learning_rate": 0.0005, "loss": 2.1071, "step": 121560 }, { "epoch": 0.4627254249674566, "grad_norm": 0.1343710571527481, "learning_rate": 0.0005, "loss": 2.1144, "step": 121570 }, { "epoch": 0.46276348743557927, "grad_norm": 0.1505940854549408, "learning_rate": 0.0005, "loss": 2.1147, "step": 121580 }, { "epoch": 0.462801549903702, "grad_norm": 0.12722337245941162, "learning_rate": 0.0005, "loss": 2.1097, "step": 121590 }, { "epoch": 0.46283961237182464, "grad_norm": 0.11916808038949966, "learning_rate": 0.0005, "loss": 2.1166, "step": 121600 }, { "epoch": 0.4628776748399473, "grad_norm": 0.12827543914318085, "learning_rate": 0.0005, "loss": 2.1185, "step": 121610 }, { "epoch": 0.46291573730807, "grad_norm": 0.11692959070205688, "learning_rate": 0.0005, "loss": 2.115, "step": 121620 }, { "epoch": 0.46295379977619266, "grad_norm": 0.12335637211799622, "learning_rate": 0.0005, "loss": 2.1126, "step": 121630 }, { "epoch": 0.4629918622443154, "grad_norm": 0.11940497905015945, "learning_rate": 0.0005, "loss": 2.123, "step": 121640 }, { "epoch": 0.46302992471243803, "grad_norm": 0.11708685755729675, "learning_rate": 0.0005, "loss": 2.1152, "step": 121650 }, { "epoch": 0.46306798718056075, "grad_norm": 0.1382119208574295, "learning_rate": 0.0005, "loss": 2.1198, "step": 121660 }, { "epoch": 0.4631060496486834, "grad_norm": 0.14125585556030273, "learning_rate": 0.0005, "loss": 2.1168, "step": 121670 }, { "epoch": 0.4631441121168061, "grad_norm": 0.11189690232276917, "learning_rate": 0.0005, "loss": 2.1202, "step": 121680 }, { "epoch": 0.4631821745849288, "grad_norm": 0.1348118931055069, "learning_rate": 0.0005, "loss": 2.1145, "step": 121690 }, { "epoch": 0.4632202370530515, "grad_norm": 0.11401959508657455, "learning_rate": 0.0005, "loss": 2.1344, "step": 121700 }, { "epoch": 0.46325829952117414, "grad_norm": 0.12618707120418549, "learning_rate": 0.0005, "loss": 2.1083, "step": 121710 }, { "epoch": 0.46329636198929686, "grad_norm": 0.13240477442741394, "learning_rate": 0.0005, "loss": 2.1179, "step": 121720 }, { "epoch": 0.4633344244574195, "grad_norm": 0.12720882892608643, "learning_rate": 0.0005, "loss": 2.1117, "step": 121730 }, { "epoch": 0.4633724869255422, "grad_norm": 0.11353076249361038, "learning_rate": 0.0005, "loss": 2.1131, "step": 121740 }, { "epoch": 0.4634105493936649, "grad_norm": 0.1258583813905716, "learning_rate": 0.0005, "loss": 2.0998, "step": 121750 }, { "epoch": 0.46344861186178754, "grad_norm": 0.12236791104078293, "learning_rate": 0.0005, "loss": 2.1258, "step": 121760 }, { "epoch": 0.46348667432991025, "grad_norm": 0.12761059403419495, "learning_rate": 0.0005, "loss": 2.0967, "step": 121770 }, { "epoch": 0.4635247367980329, "grad_norm": 0.12125248461961746, "learning_rate": 0.0005, "loss": 2.1058, "step": 121780 }, { "epoch": 0.4635627992661556, "grad_norm": 0.1282951980829239, "learning_rate": 0.0005, "loss": 2.114, "step": 121790 }, { "epoch": 0.4636008617342783, "grad_norm": 0.12211728096008301, "learning_rate": 0.0005, "loss": 2.1154, "step": 121800 }, { "epoch": 0.463638924202401, "grad_norm": 0.11740932613611221, "learning_rate": 0.0005, "loss": 2.0923, "step": 121810 }, { "epoch": 0.46367698667052365, "grad_norm": 0.11825814098119736, "learning_rate": 0.0005, "loss": 2.123, "step": 121820 }, { "epoch": 0.46371504913864636, "grad_norm": 0.1204843744635582, "learning_rate": 0.0005, "loss": 2.1114, "step": 121830 }, { "epoch": 0.463753111606769, "grad_norm": 0.12917695939540863, "learning_rate": 0.0005, "loss": 2.1208, "step": 121840 }, { "epoch": 0.46379117407489173, "grad_norm": 0.12288330495357513, "learning_rate": 0.0005, "loss": 2.1301, "step": 121850 }, { "epoch": 0.4638292365430144, "grad_norm": 0.11910853534936905, "learning_rate": 0.0005, "loss": 2.1019, "step": 121860 }, { "epoch": 0.4638672990111371, "grad_norm": 0.11725097894668579, "learning_rate": 0.0005, "loss": 2.1337, "step": 121870 }, { "epoch": 0.46390536147925976, "grad_norm": 0.11722494661808014, "learning_rate": 0.0005, "loss": 2.1131, "step": 121880 }, { "epoch": 0.46394342394738247, "grad_norm": 0.12103597819805145, "learning_rate": 0.0005, "loss": 2.1378, "step": 121890 }, { "epoch": 0.46398148641550513, "grad_norm": 0.12656059861183167, "learning_rate": 0.0005, "loss": 2.1105, "step": 121900 }, { "epoch": 0.4640195488836278, "grad_norm": 0.1225254163146019, "learning_rate": 0.0005, "loss": 2.1077, "step": 121910 }, { "epoch": 0.4640576113517505, "grad_norm": 0.12554019689559937, "learning_rate": 0.0005, "loss": 2.096, "step": 121920 }, { "epoch": 0.46409567381987316, "grad_norm": 0.1267976462841034, "learning_rate": 0.0005, "loss": 2.1107, "step": 121930 }, { "epoch": 0.46413373628799587, "grad_norm": 0.13665032386779785, "learning_rate": 0.0005, "loss": 2.1266, "step": 121940 }, { "epoch": 0.4641717987561185, "grad_norm": 0.1262006163597107, "learning_rate": 0.0005, "loss": 2.1343, "step": 121950 }, { "epoch": 0.46420986122424124, "grad_norm": 0.12373677641153336, "learning_rate": 0.0005, "loss": 2.1147, "step": 121960 }, { "epoch": 0.4642479236923639, "grad_norm": 1.3493014574050903, "learning_rate": 0.0005, "loss": 2.1216, "step": 121970 }, { "epoch": 0.4642859861604866, "grad_norm": 0.1411971002817154, "learning_rate": 0.0005, "loss": 2.1234, "step": 121980 }, { "epoch": 0.46432404862860926, "grad_norm": 0.1318981945514679, "learning_rate": 0.0005, "loss": 2.1246, "step": 121990 }, { "epoch": 0.464362111096732, "grad_norm": 0.11528051644563675, "learning_rate": 0.0005, "loss": 2.1155, "step": 122000 }, { "epoch": 0.46440017356485463, "grad_norm": 0.1222640722990036, "learning_rate": 0.0005, "loss": 2.1077, "step": 122010 }, { "epoch": 0.46443823603297735, "grad_norm": 0.12968085706233978, "learning_rate": 0.0005, "loss": 2.1212, "step": 122020 }, { "epoch": 0.4644762985011, "grad_norm": 0.12960520386695862, "learning_rate": 0.0005, "loss": 2.1166, "step": 122030 }, { "epoch": 0.46451436096922266, "grad_norm": 0.12939879298210144, "learning_rate": 0.0005, "loss": 2.1089, "step": 122040 }, { "epoch": 0.4645524234373454, "grad_norm": 0.12176186591386795, "learning_rate": 0.0005, "loss": 2.1126, "step": 122050 }, { "epoch": 0.46459048590546803, "grad_norm": 0.11837892979383469, "learning_rate": 0.0005, "loss": 2.1155, "step": 122060 }, { "epoch": 0.46462854837359074, "grad_norm": 0.12619103491306305, "learning_rate": 0.0005, "loss": 2.1297, "step": 122070 }, { "epoch": 0.4646666108417134, "grad_norm": 0.12157522141933441, "learning_rate": 0.0005, "loss": 2.1067, "step": 122080 }, { "epoch": 0.4647046733098361, "grad_norm": 0.11906483769416809, "learning_rate": 0.0005, "loss": 2.1261, "step": 122090 }, { "epoch": 0.46474273577795877, "grad_norm": 0.13369664549827576, "learning_rate": 0.0005, "loss": 2.1142, "step": 122100 }, { "epoch": 0.4647807982460815, "grad_norm": 0.1242845430970192, "learning_rate": 0.0005, "loss": 2.1106, "step": 122110 }, { "epoch": 0.46481886071420414, "grad_norm": 0.12459714710712433, "learning_rate": 0.0005, "loss": 2.1179, "step": 122120 }, { "epoch": 0.46485692318232685, "grad_norm": 0.12096191197633743, "learning_rate": 0.0005, "loss": 2.114, "step": 122130 }, { "epoch": 0.4648949856504495, "grad_norm": 0.12581023573875427, "learning_rate": 0.0005, "loss": 2.1066, "step": 122140 }, { "epoch": 0.4649330481185722, "grad_norm": 0.12309510260820389, "learning_rate": 0.0005, "loss": 2.1011, "step": 122150 }, { "epoch": 0.4649711105866949, "grad_norm": 0.12692727148532867, "learning_rate": 0.0005, "loss": 2.0989, "step": 122160 }, { "epoch": 0.4650091730548176, "grad_norm": 0.12823912501335144, "learning_rate": 0.0005, "loss": 2.1239, "step": 122170 }, { "epoch": 0.46504723552294025, "grad_norm": 0.12171664834022522, "learning_rate": 0.0005, "loss": 2.1131, "step": 122180 }, { "epoch": 0.4650852979910629, "grad_norm": 0.14432282745838165, "learning_rate": 0.0005, "loss": 2.1219, "step": 122190 }, { "epoch": 0.4651233604591856, "grad_norm": 0.11886001378297806, "learning_rate": 0.0005, "loss": 2.1157, "step": 122200 }, { "epoch": 0.4651614229273083, "grad_norm": 0.12576742470264435, "learning_rate": 0.0005, "loss": 2.1086, "step": 122210 }, { "epoch": 0.465199485395431, "grad_norm": 0.14525540173053741, "learning_rate": 0.0005, "loss": 2.114, "step": 122220 }, { "epoch": 0.46523754786355365, "grad_norm": 0.1189890205860138, "learning_rate": 0.0005, "loss": 2.1267, "step": 122230 }, { "epoch": 0.46527561033167636, "grad_norm": 0.12696944177150726, "learning_rate": 0.0005, "loss": 2.1105, "step": 122240 }, { "epoch": 0.465313672799799, "grad_norm": 0.11398856341838837, "learning_rate": 0.0005, "loss": 2.1038, "step": 122250 }, { "epoch": 0.46535173526792173, "grad_norm": 0.11955248564481735, "learning_rate": 0.0005, "loss": 2.1102, "step": 122260 }, { "epoch": 0.4653897977360444, "grad_norm": 0.13834500312805176, "learning_rate": 0.0005, "loss": 2.1245, "step": 122270 }, { "epoch": 0.4654278602041671, "grad_norm": 0.12638892233371735, "learning_rate": 0.0005, "loss": 2.1097, "step": 122280 }, { "epoch": 0.46546592267228976, "grad_norm": 0.12643951177597046, "learning_rate": 0.0005, "loss": 2.1095, "step": 122290 }, { "epoch": 0.46550398514041247, "grad_norm": 0.13313770294189453, "learning_rate": 0.0005, "loss": 2.1169, "step": 122300 }, { "epoch": 0.4655420476085351, "grad_norm": 0.13501465320587158, "learning_rate": 0.0005, "loss": 2.1271, "step": 122310 }, { "epoch": 0.46558011007665784, "grad_norm": 0.13232651352882385, "learning_rate": 0.0005, "loss": 2.1093, "step": 122320 }, { "epoch": 0.4656181725447805, "grad_norm": 0.12018989026546478, "learning_rate": 0.0005, "loss": 2.1167, "step": 122330 }, { "epoch": 0.46565623501290315, "grad_norm": 0.1271781027317047, "learning_rate": 0.0005, "loss": 2.1066, "step": 122340 }, { "epoch": 0.46569429748102587, "grad_norm": 0.12841512262821198, "learning_rate": 0.0005, "loss": 2.1259, "step": 122350 }, { "epoch": 0.4657323599491485, "grad_norm": 0.11983626335859299, "learning_rate": 0.0005, "loss": 2.11, "step": 122360 }, { "epoch": 0.46577042241727123, "grad_norm": 0.13224050402641296, "learning_rate": 0.0005, "loss": 2.11, "step": 122370 }, { "epoch": 0.4658084848853939, "grad_norm": 0.12928161025047302, "learning_rate": 0.0005, "loss": 2.12, "step": 122380 }, { "epoch": 0.4658465473535166, "grad_norm": 0.11840779334306717, "learning_rate": 0.0005, "loss": 2.1225, "step": 122390 }, { "epoch": 0.46588460982163926, "grad_norm": 0.12242060899734497, "learning_rate": 0.0005, "loss": 2.1153, "step": 122400 }, { "epoch": 0.465922672289762, "grad_norm": 0.15045621991157532, "learning_rate": 0.0005, "loss": 2.1179, "step": 122410 }, { "epoch": 0.46596073475788463, "grad_norm": 0.1329570859670639, "learning_rate": 0.0005, "loss": 2.1331, "step": 122420 }, { "epoch": 0.46599879722600734, "grad_norm": 0.12627650797367096, "learning_rate": 0.0005, "loss": 2.1426, "step": 122430 }, { "epoch": 0.46603685969413, "grad_norm": 0.12650640308856964, "learning_rate": 0.0005, "loss": 2.1091, "step": 122440 }, { "epoch": 0.4660749221622527, "grad_norm": 0.12126204371452332, "learning_rate": 0.0005, "loss": 2.114, "step": 122450 }, { "epoch": 0.46611298463037537, "grad_norm": 0.12744243443012238, "learning_rate": 0.0005, "loss": 2.1213, "step": 122460 }, { "epoch": 0.46615104709849803, "grad_norm": 0.11687210947275162, "learning_rate": 0.0005, "loss": 2.0983, "step": 122470 }, { "epoch": 0.46618910956662074, "grad_norm": 0.12865188717842102, "learning_rate": 0.0005, "loss": 2.109, "step": 122480 }, { "epoch": 0.4662271720347434, "grad_norm": 0.14424051344394684, "learning_rate": 0.0005, "loss": 2.1133, "step": 122490 }, { "epoch": 0.4662652345028661, "grad_norm": 0.12110546231269836, "learning_rate": 0.0005, "loss": 2.1045, "step": 122500 }, { "epoch": 0.46630329697098877, "grad_norm": 0.13489185273647308, "learning_rate": 0.0005, "loss": 2.1138, "step": 122510 }, { "epoch": 0.4663413594391115, "grad_norm": 0.1381700485944748, "learning_rate": 0.0005, "loss": 2.0987, "step": 122520 }, { "epoch": 0.46637942190723414, "grad_norm": 0.13211920857429504, "learning_rate": 0.0005, "loss": 2.1118, "step": 122530 }, { "epoch": 0.46641748437535685, "grad_norm": 0.132435604929924, "learning_rate": 0.0005, "loss": 2.1254, "step": 122540 }, { "epoch": 0.4664555468434795, "grad_norm": 0.12245769053697586, "learning_rate": 0.0005, "loss": 2.1185, "step": 122550 }, { "epoch": 0.4664936093116022, "grad_norm": 0.1318317949771881, "learning_rate": 0.0005, "loss": 2.125, "step": 122560 }, { "epoch": 0.4665316717797249, "grad_norm": 0.13142701983451843, "learning_rate": 0.0005, "loss": 2.1081, "step": 122570 }, { "epoch": 0.4665697342478476, "grad_norm": 0.1299506574869156, "learning_rate": 0.0005, "loss": 2.1167, "step": 122580 }, { "epoch": 0.46660779671597025, "grad_norm": 0.14254699647426605, "learning_rate": 0.0005, "loss": 2.1167, "step": 122590 }, { "epoch": 0.46664585918409296, "grad_norm": 0.11915218085050583, "learning_rate": 0.0005, "loss": 2.1262, "step": 122600 }, { "epoch": 0.4666839216522156, "grad_norm": 0.11730083078145981, "learning_rate": 0.0005, "loss": 2.1073, "step": 122610 }, { "epoch": 0.4667219841203383, "grad_norm": 0.14593356847763062, "learning_rate": 0.0005, "loss": 2.1228, "step": 122620 }, { "epoch": 0.466760046588461, "grad_norm": 0.1312982439994812, "learning_rate": 0.0005, "loss": 2.1221, "step": 122630 }, { "epoch": 0.46679810905658364, "grad_norm": 0.12263674288988113, "learning_rate": 0.0005, "loss": 2.1116, "step": 122640 }, { "epoch": 0.46683617152470636, "grad_norm": 0.13540539145469666, "learning_rate": 0.0005, "loss": 2.1103, "step": 122650 }, { "epoch": 0.466874233992829, "grad_norm": 0.12395121902227402, "learning_rate": 0.0005, "loss": 2.1226, "step": 122660 }, { "epoch": 0.4669122964609517, "grad_norm": 0.14132188260555267, "learning_rate": 0.0005, "loss": 2.1308, "step": 122670 }, { "epoch": 0.4669503589290744, "grad_norm": 0.11930973082780838, "learning_rate": 0.0005, "loss": 2.1092, "step": 122680 }, { "epoch": 0.4669884213971971, "grad_norm": 0.13231684267520905, "learning_rate": 0.0005, "loss": 2.1126, "step": 122690 }, { "epoch": 0.46702648386531975, "grad_norm": 0.14038299024105072, "learning_rate": 0.0005, "loss": 2.114, "step": 122700 }, { "epoch": 0.46706454633344247, "grad_norm": 0.1272704005241394, "learning_rate": 0.0005, "loss": 2.1253, "step": 122710 }, { "epoch": 0.4671026088015651, "grad_norm": 0.1259326934814453, "learning_rate": 0.0005, "loss": 2.1144, "step": 122720 }, { "epoch": 0.46714067126968783, "grad_norm": 0.11703534424304962, "learning_rate": 0.0005, "loss": 2.108, "step": 122730 }, { "epoch": 0.4671787337378105, "grad_norm": 0.11842364817857742, "learning_rate": 0.0005, "loss": 2.111, "step": 122740 }, { "epoch": 0.4672167962059332, "grad_norm": 0.14716917276382446, "learning_rate": 0.0005, "loss": 2.1143, "step": 122750 }, { "epoch": 0.46725485867405586, "grad_norm": 0.12898138165473938, "learning_rate": 0.0005, "loss": 2.1131, "step": 122760 }, { "epoch": 0.4672929211421785, "grad_norm": 0.11034823209047318, "learning_rate": 0.0005, "loss": 2.1226, "step": 122770 }, { "epoch": 0.46733098361030123, "grad_norm": 0.11895115673542023, "learning_rate": 0.0005, "loss": 2.1238, "step": 122780 }, { "epoch": 0.4673690460784239, "grad_norm": 0.12958012521266937, "learning_rate": 0.0005, "loss": 2.1037, "step": 122790 }, { "epoch": 0.4674071085465466, "grad_norm": 0.12227039039134979, "learning_rate": 0.0005, "loss": 2.1191, "step": 122800 }, { "epoch": 0.46744517101466926, "grad_norm": 0.12844082713127136, "learning_rate": 0.0005, "loss": 2.105, "step": 122810 }, { "epoch": 0.46748323348279197, "grad_norm": 0.11611893773078918, "learning_rate": 0.0005, "loss": 2.1162, "step": 122820 }, { "epoch": 0.46752129595091463, "grad_norm": 0.16009820997714996, "learning_rate": 0.0005, "loss": 2.1242, "step": 122830 }, { "epoch": 0.46755935841903734, "grad_norm": 0.1324053853750229, "learning_rate": 0.0005, "loss": 2.1222, "step": 122840 }, { "epoch": 0.46759742088716, "grad_norm": 0.129500612616539, "learning_rate": 0.0005, "loss": 2.0992, "step": 122850 }, { "epoch": 0.4676354833552827, "grad_norm": 0.1253543645143509, "learning_rate": 0.0005, "loss": 2.1264, "step": 122860 }, { "epoch": 0.46767354582340537, "grad_norm": 0.12207604199647903, "learning_rate": 0.0005, "loss": 2.1067, "step": 122870 }, { "epoch": 0.4677116082915281, "grad_norm": 0.13183759152889252, "learning_rate": 0.0005, "loss": 2.1045, "step": 122880 }, { "epoch": 0.46774967075965074, "grad_norm": 0.13390865921974182, "learning_rate": 0.0005, "loss": 2.1117, "step": 122890 }, { "epoch": 0.4677877332277734, "grad_norm": 0.12353968620300293, "learning_rate": 0.0005, "loss": 2.1153, "step": 122900 }, { "epoch": 0.4678257956958961, "grad_norm": 0.1235695481300354, "learning_rate": 0.0005, "loss": 2.1238, "step": 122910 }, { "epoch": 0.46786385816401876, "grad_norm": 0.1252755969762802, "learning_rate": 0.0005, "loss": 2.1164, "step": 122920 }, { "epoch": 0.4679019206321415, "grad_norm": 0.12756647169589996, "learning_rate": 0.0005, "loss": 2.1145, "step": 122930 }, { "epoch": 0.46793998310026413, "grad_norm": 0.13085618615150452, "learning_rate": 0.0005, "loss": 2.1121, "step": 122940 }, { "epoch": 0.46797804556838685, "grad_norm": 0.12621687352657318, "learning_rate": 0.0005, "loss": 2.1255, "step": 122950 }, { "epoch": 0.4680161080365095, "grad_norm": 0.119481660425663, "learning_rate": 0.0005, "loss": 2.1171, "step": 122960 }, { "epoch": 0.4680541705046322, "grad_norm": 0.12664827704429626, "learning_rate": 0.0005, "loss": 2.1226, "step": 122970 }, { "epoch": 0.4680922329727549, "grad_norm": 0.12512001395225525, "learning_rate": 0.0005, "loss": 2.1193, "step": 122980 }, { "epoch": 0.4681302954408776, "grad_norm": 0.13938689231872559, "learning_rate": 0.0005, "loss": 2.1243, "step": 122990 }, { "epoch": 0.46816835790900024, "grad_norm": 0.1311841458082199, "learning_rate": 0.0005, "loss": 2.103, "step": 123000 }, { "epoch": 0.46820642037712296, "grad_norm": 0.12570905685424805, "learning_rate": 0.0005, "loss": 2.1309, "step": 123010 }, { "epoch": 0.4682444828452456, "grad_norm": 0.11313197016716003, "learning_rate": 0.0005, "loss": 2.1123, "step": 123020 }, { "epoch": 0.4682825453133683, "grad_norm": 0.11979328840970993, "learning_rate": 0.0005, "loss": 2.1145, "step": 123030 }, { "epoch": 0.468320607781491, "grad_norm": 0.1246039941906929, "learning_rate": 0.0005, "loss": 2.1067, "step": 123040 }, { "epoch": 0.46835867024961364, "grad_norm": 0.11751657724380493, "learning_rate": 0.0005, "loss": 2.1159, "step": 123050 }, { "epoch": 0.46839673271773635, "grad_norm": 0.13147424161434174, "learning_rate": 0.0005, "loss": 2.1187, "step": 123060 }, { "epoch": 0.468434795185859, "grad_norm": 0.119588203728199, "learning_rate": 0.0005, "loss": 2.0929, "step": 123070 }, { "epoch": 0.4684728576539817, "grad_norm": 0.12306036800146103, "learning_rate": 0.0005, "loss": 2.1061, "step": 123080 }, { "epoch": 0.4685109201221044, "grad_norm": 0.1368551254272461, "learning_rate": 0.0005, "loss": 2.1084, "step": 123090 }, { "epoch": 0.4685489825902271, "grad_norm": 0.14483638107776642, "learning_rate": 0.0005, "loss": 2.1163, "step": 123100 }, { "epoch": 0.46858704505834975, "grad_norm": 0.11744410544633865, "learning_rate": 0.0005, "loss": 2.1054, "step": 123110 }, { "epoch": 0.46862510752647246, "grad_norm": 0.1234697699546814, "learning_rate": 0.0005, "loss": 2.0992, "step": 123120 }, { "epoch": 0.4686631699945951, "grad_norm": 0.14475607872009277, "learning_rate": 0.0005, "loss": 2.1077, "step": 123130 }, { "epoch": 0.46870123246271783, "grad_norm": 0.11726272106170654, "learning_rate": 0.0005, "loss": 2.1249, "step": 123140 }, { "epoch": 0.4687392949308405, "grad_norm": 0.11984838545322418, "learning_rate": 0.0005, "loss": 2.1236, "step": 123150 }, { "epoch": 0.4687773573989632, "grad_norm": 0.1178346648812294, "learning_rate": 0.0005, "loss": 2.1276, "step": 123160 }, { "epoch": 0.46881541986708586, "grad_norm": 0.1334547996520996, "learning_rate": 0.0005, "loss": 2.104, "step": 123170 }, { "epoch": 0.46885348233520857, "grad_norm": 0.13188724219799042, "learning_rate": 0.0005, "loss": 2.109, "step": 123180 }, { "epoch": 0.46889154480333123, "grad_norm": 0.12499808520078659, "learning_rate": 0.0005, "loss": 2.1175, "step": 123190 }, { "epoch": 0.4689296072714539, "grad_norm": 0.12053222954273224, "learning_rate": 0.0005, "loss": 2.1081, "step": 123200 }, { "epoch": 0.4689676697395766, "grad_norm": 0.12222561240196228, "learning_rate": 0.0005, "loss": 2.0979, "step": 123210 }, { "epoch": 0.46900573220769926, "grad_norm": 0.12280338257551193, "learning_rate": 0.0005, "loss": 2.1064, "step": 123220 }, { "epoch": 0.46904379467582197, "grad_norm": 0.12783880531787872, "learning_rate": 0.0005, "loss": 2.0973, "step": 123230 }, { "epoch": 0.4690818571439446, "grad_norm": 0.15136320888996124, "learning_rate": 0.0005, "loss": 2.1169, "step": 123240 }, { "epoch": 0.46911991961206734, "grad_norm": 0.1327112317085266, "learning_rate": 0.0005, "loss": 2.1285, "step": 123250 }, { "epoch": 0.46915798208019, "grad_norm": 0.11947908252477646, "learning_rate": 0.0005, "loss": 2.1058, "step": 123260 }, { "epoch": 0.4691960445483127, "grad_norm": 0.12162414193153381, "learning_rate": 0.0005, "loss": 2.1161, "step": 123270 }, { "epoch": 0.46923410701643536, "grad_norm": 0.13324034214019775, "learning_rate": 0.0005, "loss": 2.1155, "step": 123280 }, { "epoch": 0.4692721694845581, "grad_norm": 0.12378178536891937, "learning_rate": 0.0005, "loss": 2.1272, "step": 123290 }, { "epoch": 0.46931023195268073, "grad_norm": 0.13062675297260284, "learning_rate": 0.0005, "loss": 2.1183, "step": 123300 }, { "epoch": 0.46934829442080345, "grad_norm": 0.1286676824092865, "learning_rate": 0.0005, "loss": 2.1348, "step": 123310 }, { "epoch": 0.4693863568889261, "grad_norm": 0.11752732843160629, "learning_rate": 0.0005, "loss": 2.105, "step": 123320 }, { "epoch": 0.46942441935704876, "grad_norm": 0.14866508543491364, "learning_rate": 0.0005, "loss": 2.1245, "step": 123330 }, { "epoch": 0.4694624818251715, "grad_norm": 0.11668729037046432, "learning_rate": 0.0005, "loss": 2.124, "step": 123340 }, { "epoch": 0.46950054429329413, "grad_norm": 0.13470356166362762, "learning_rate": 0.0005, "loss": 2.1128, "step": 123350 }, { "epoch": 0.46953860676141684, "grad_norm": 0.14405706524848938, "learning_rate": 0.0005, "loss": 2.096, "step": 123360 }, { "epoch": 0.4695766692295395, "grad_norm": 0.128583624958992, "learning_rate": 0.0005, "loss": 2.1242, "step": 123370 }, { "epoch": 0.4696147316976622, "grad_norm": 0.1338459551334381, "learning_rate": 0.0005, "loss": 2.1377, "step": 123380 }, { "epoch": 0.46965279416578487, "grad_norm": 0.12829987704753876, "learning_rate": 0.0005, "loss": 2.1189, "step": 123390 }, { "epoch": 0.4696908566339076, "grad_norm": 0.12520089745521545, "learning_rate": 0.0005, "loss": 2.1264, "step": 123400 }, { "epoch": 0.46972891910203024, "grad_norm": 0.12828919291496277, "learning_rate": 0.0005, "loss": 2.1308, "step": 123410 }, { "epoch": 0.46976698157015295, "grad_norm": 0.12368378788232803, "learning_rate": 0.0005, "loss": 2.1316, "step": 123420 }, { "epoch": 0.4698050440382756, "grad_norm": 0.12954358756542206, "learning_rate": 0.0005, "loss": 2.1148, "step": 123430 }, { "epoch": 0.4698431065063983, "grad_norm": 0.12930402159690857, "learning_rate": 0.0005, "loss": 2.1125, "step": 123440 }, { "epoch": 0.469881168974521, "grad_norm": 0.12339557707309723, "learning_rate": 0.0005, "loss": 2.1158, "step": 123450 }, { "epoch": 0.4699192314426437, "grad_norm": 0.11649967730045319, "learning_rate": 0.0005, "loss": 2.1189, "step": 123460 }, { "epoch": 0.46995729391076635, "grad_norm": 0.1173417717218399, "learning_rate": 0.0005, "loss": 2.1199, "step": 123470 }, { "epoch": 0.469995356378889, "grad_norm": 0.1417657881975174, "learning_rate": 0.0005, "loss": 2.1122, "step": 123480 }, { "epoch": 0.4700334188470117, "grad_norm": 0.13985687494277954, "learning_rate": 0.0005, "loss": 2.119, "step": 123490 }, { "epoch": 0.4700714813151344, "grad_norm": 0.12331917881965637, "learning_rate": 0.0005, "loss": 2.1324, "step": 123500 }, { "epoch": 0.4701095437832571, "grad_norm": 0.12005551159381866, "learning_rate": 0.0005, "loss": 2.1065, "step": 123510 }, { "epoch": 0.47014760625137975, "grad_norm": 0.11983375996351242, "learning_rate": 0.0005, "loss": 2.1075, "step": 123520 }, { "epoch": 0.47018566871950246, "grad_norm": 0.11790003627538681, "learning_rate": 0.0005, "loss": 2.1085, "step": 123530 }, { "epoch": 0.4702237311876251, "grad_norm": 0.14322248101234436, "learning_rate": 0.0005, "loss": 2.1121, "step": 123540 }, { "epoch": 0.47026179365574783, "grad_norm": 0.12420692294836044, "learning_rate": 0.0005, "loss": 2.1243, "step": 123550 }, { "epoch": 0.4702998561238705, "grad_norm": 0.12818403542041779, "learning_rate": 0.0005, "loss": 2.1292, "step": 123560 }, { "epoch": 0.4703379185919932, "grad_norm": 0.13269901275634766, "learning_rate": 0.0005, "loss": 2.099, "step": 123570 }, { "epoch": 0.47037598106011586, "grad_norm": 0.12523117661476135, "learning_rate": 0.0005, "loss": 2.1202, "step": 123580 }, { "epoch": 0.47041404352823857, "grad_norm": 0.11941179633140564, "learning_rate": 0.0005, "loss": 2.123, "step": 123590 }, { "epoch": 0.4704521059963612, "grad_norm": 0.12068691849708557, "learning_rate": 0.0005, "loss": 2.1343, "step": 123600 }, { "epoch": 0.47049016846448394, "grad_norm": 0.12186926603317261, "learning_rate": 0.0005, "loss": 2.1231, "step": 123610 }, { "epoch": 0.4705282309326066, "grad_norm": 0.12164727598428726, "learning_rate": 0.0005, "loss": 2.1206, "step": 123620 }, { "epoch": 0.47056629340072925, "grad_norm": 0.11695064604282379, "learning_rate": 0.0005, "loss": 2.1191, "step": 123630 }, { "epoch": 0.47060435586885196, "grad_norm": 0.12620823085308075, "learning_rate": 0.0005, "loss": 2.1061, "step": 123640 }, { "epoch": 0.4706424183369746, "grad_norm": 0.11972874402999878, "learning_rate": 0.0005, "loss": 2.1192, "step": 123650 }, { "epoch": 0.47068048080509733, "grad_norm": 0.1494438350200653, "learning_rate": 0.0005, "loss": 2.1259, "step": 123660 }, { "epoch": 0.47071854327322, "grad_norm": 0.13811999559402466, "learning_rate": 0.0005, "loss": 2.1183, "step": 123670 }, { "epoch": 0.4707566057413427, "grad_norm": 0.12858720123767853, "learning_rate": 0.0005, "loss": 2.1082, "step": 123680 }, { "epoch": 0.47079466820946536, "grad_norm": 0.12714563310146332, "learning_rate": 0.0005, "loss": 2.1033, "step": 123690 }, { "epoch": 0.4708327306775881, "grad_norm": 0.11671023070812225, "learning_rate": 0.0005, "loss": 2.125, "step": 123700 }, { "epoch": 0.47087079314571073, "grad_norm": 0.13238412141799927, "learning_rate": 0.0005, "loss": 2.1074, "step": 123710 }, { "epoch": 0.47090885561383344, "grad_norm": 0.11716995388269424, "learning_rate": 0.0005, "loss": 2.1301, "step": 123720 }, { "epoch": 0.4709469180819561, "grad_norm": 0.1207890436053276, "learning_rate": 0.0005, "loss": 2.1117, "step": 123730 }, { "epoch": 0.4709849805500788, "grad_norm": 0.12667891383171082, "learning_rate": 0.0005, "loss": 2.1067, "step": 123740 }, { "epoch": 0.47102304301820147, "grad_norm": 0.12619231641292572, "learning_rate": 0.0005, "loss": 2.1242, "step": 123750 }, { "epoch": 0.4710611054863241, "grad_norm": 0.11664183437824249, "learning_rate": 0.0005, "loss": 2.112, "step": 123760 }, { "epoch": 0.47109916795444684, "grad_norm": 0.11987679451704025, "learning_rate": 0.0005, "loss": 2.1028, "step": 123770 }, { "epoch": 0.4711372304225695, "grad_norm": 0.13168591260910034, "learning_rate": 0.0005, "loss": 2.1244, "step": 123780 }, { "epoch": 0.4711752928906922, "grad_norm": 0.11626217514276505, "learning_rate": 0.0005, "loss": 2.1219, "step": 123790 }, { "epoch": 0.47121335535881487, "grad_norm": 0.1286042183637619, "learning_rate": 0.0005, "loss": 2.1153, "step": 123800 }, { "epoch": 0.4712514178269376, "grad_norm": 0.12957172095775604, "learning_rate": 0.0005, "loss": 2.1099, "step": 123810 }, { "epoch": 0.47128948029506024, "grad_norm": 0.12456455081701279, "learning_rate": 0.0005, "loss": 2.1208, "step": 123820 }, { "epoch": 0.47132754276318295, "grad_norm": 0.1218956708908081, "learning_rate": 0.0005, "loss": 2.1306, "step": 123830 }, { "epoch": 0.4713656052313056, "grad_norm": 0.11381538212299347, "learning_rate": 0.0005, "loss": 2.1111, "step": 123840 }, { "epoch": 0.4714036676994283, "grad_norm": 0.11608819663524628, "learning_rate": 0.0005, "loss": 2.1168, "step": 123850 }, { "epoch": 0.471441730167551, "grad_norm": 0.13417969644069672, "learning_rate": 0.0005, "loss": 2.0979, "step": 123860 }, { "epoch": 0.4714797926356737, "grad_norm": 0.12660396099090576, "learning_rate": 0.0005, "loss": 2.1265, "step": 123870 }, { "epoch": 0.47151785510379635, "grad_norm": 0.1264629364013672, "learning_rate": 0.0005, "loss": 2.1063, "step": 123880 }, { "epoch": 0.47155591757191906, "grad_norm": 0.1252276450395584, "learning_rate": 0.0005, "loss": 2.1121, "step": 123890 }, { "epoch": 0.4715939800400417, "grad_norm": 0.14274397492408752, "learning_rate": 0.0005, "loss": 2.1191, "step": 123900 }, { "epoch": 0.4716320425081644, "grad_norm": 0.12475555390119553, "learning_rate": 0.0005, "loss": 2.1091, "step": 123910 }, { "epoch": 0.4716701049762871, "grad_norm": 0.12727521359920502, "learning_rate": 0.0005, "loss": 2.1222, "step": 123920 }, { "epoch": 0.47170816744440974, "grad_norm": 0.13684184849262238, "learning_rate": 0.0005, "loss": 2.1299, "step": 123930 }, { "epoch": 0.47174622991253246, "grad_norm": 0.13249535858631134, "learning_rate": 0.0005, "loss": 2.121, "step": 123940 }, { "epoch": 0.4717842923806551, "grad_norm": 0.1293686330318451, "learning_rate": 0.0005, "loss": 2.1283, "step": 123950 }, { "epoch": 0.4718223548487778, "grad_norm": 0.14200104773044586, "learning_rate": 0.0005, "loss": 2.1111, "step": 123960 }, { "epoch": 0.4718604173169005, "grad_norm": 0.12920936942100525, "learning_rate": 0.0005, "loss": 2.1242, "step": 123970 }, { "epoch": 0.4718984797850232, "grad_norm": 0.1317531168460846, "learning_rate": 0.0005, "loss": 2.1146, "step": 123980 }, { "epoch": 0.47193654225314585, "grad_norm": 0.13654613494873047, "learning_rate": 0.0005, "loss": 2.125, "step": 123990 }, { "epoch": 0.47197460472126856, "grad_norm": 0.12663967907428741, "learning_rate": 0.0005, "loss": 2.1163, "step": 124000 }, { "epoch": 0.4720126671893912, "grad_norm": 0.12163762748241425, "learning_rate": 0.0005, "loss": 2.1048, "step": 124010 }, { "epoch": 0.47205072965751393, "grad_norm": 0.13067223131656647, "learning_rate": 0.0005, "loss": 2.1282, "step": 124020 }, { "epoch": 0.4720887921256366, "grad_norm": 0.11829733103513718, "learning_rate": 0.0005, "loss": 2.1197, "step": 124030 }, { "epoch": 0.4721268545937593, "grad_norm": 0.12859944999217987, "learning_rate": 0.0005, "loss": 2.1184, "step": 124040 }, { "epoch": 0.47216491706188196, "grad_norm": 0.1161130964756012, "learning_rate": 0.0005, "loss": 2.1194, "step": 124050 }, { "epoch": 0.4722029795300046, "grad_norm": 0.1277155727148056, "learning_rate": 0.0005, "loss": 2.1151, "step": 124060 }, { "epoch": 0.47224104199812733, "grad_norm": 0.10972582548856735, "learning_rate": 0.0005, "loss": 2.1189, "step": 124070 }, { "epoch": 0.47227910446625, "grad_norm": 0.12672588229179382, "learning_rate": 0.0005, "loss": 2.1149, "step": 124080 }, { "epoch": 0.4723171669343727, "grad_norm": 0.14097382128238678, "learning_rate": 0.0005, "loss": 2.1248, "step": 124090 }, { "epoch": 0.47235522940249536, "grad_norm": 0.12108251452445984, "learning_rate": 0.0005, "loss": 2.1029, "step": 124100 }, { "epoch": 0.47239329187061807, "grad_norm": 0.1314038187265396, "learning_rate": 0.0005, "loss": 2.1242, "step": 124110 }, { "epoch": 0.4724313543387407, "grad_norm": 0.13684318959712982, "learning_rate": 0.0005, "loss": 2.1134, "step": 124120 }, { "epoch": 0.47246941680686344, "grad_norm": 0.12464044988155365, "learning_rate": 0.0005, "loss": 2.12, "step": 124130 }, { "epoch": 0.4725074792749861, "grad_norm": 0.13032224774360657, "learning_rate": 0.0005, "loss": 2.122, "step": 124140 }, { "epoch": 0.4725455417431088, "grad_norm": 0.1219305470585823, "learning_rate": 0.0005, "loss": 2.1277, "step": 124150 }, { "epoch": 0.47258360421123147, "grad_norm": 0.12953796982765198, "learning_rate": 0.0005, "loss": 2.1195, "step": 124160 }, { "epoch": 0.4726216666793542, "grad_norm": 0.1365276426076889, "learning_rate": 0.0005, "loss": 2.1198, "step": 124170 }, { "epoch": 0.47265972914747684, "grad_norm": 0.12354233115911484, "learning_rate": 0.0005, "loss": 2.1277, "step": 124180 }, { "epoch": 0.47269779161559955, "grad_norm": 0.13295651972293854, "learning_rate": 0.0005, "loss": 2.0998, "step": 124190 }, { "epoch": 0.4727358540837222, "grad_norm": 0.1274101585149765, "learning_rate": 0.0005, "loss": 2.1094, "step": 124200 }, { "epoch": 0.47277391655184486, "grad_norm": 0.12417597323656082, "learning_rate": 0.0005, "loss": 2.1262, "step": 124210 }, { "epoch": 0.4728119790199676, "grad_norm": 0.12661489844322205, "learning_rate": 0.0005, "loss": 2.1261, "step": 124220 }, { "epoch": 0.47285004148809023, "grad_norm": 0.10787620395421982, "learning_rate": 0.0005, "loss": 2.1041, "step": 124230 }, { "epoch": 0.47288810395621295, "grad_norm": 0.12663552165031433, "learning_rate": 0.0005, "loss": 2.1089, "step": 124240 }, { "epoch": 0.4729261664243356, "grad_norm": 0.13825610280036926, "learning_rate": 0.0005, "loss": 2.1002, "step": 124250 }, { "epoch": 0.4729642288924583, "grad_norm": 0.12444397807121277, "learning_rate": 0.0005, "loss": 2.1146, "step": 124260 }, { "epoch": 0.473002291360581, "grad_norm": 0.11867283284664154, "learning_rate": 0.0005, "loss": 2.1309, "step": 124270 }, { "epoch": 0.4730403538287037, "grad_norm": 0.13824884593486786, "learning_rate": 0.0005, "loss": 2.1192, "step": 124280 }, { "epoch": 0.47307841629682634, "grad_norm": 0.1373097002506256, "learning_rate": 0.0005, "loss": 2.1163, "step": 124290 }, { "epoch": 0.47311647876494906, "grad_norm": 0.11826524883508682, "learning_rate": 0.0005, "loss": 2.1209, "step": 124300 }, { "epoch": 0.4731545412330717, "grad_norm": 0.11999563872814178, "learning_rate": 0.0005, "loss": 2.1075, "step": 124310 }, { "epoch": 0.4731926037011944, "grad_norm": 0.11594437062740326, "learning_rate": 0.0005, "loss": 2.1136, "step": 124320 }, { "epoch": 0.4732306661693171, "grad_norm": 0.11913499981164932, "learning_rate": 0.0005, "loss": 2.1061, "step": 124330 }, { "epoch": 0.47326872863743974, "grad_norm": 0.12613172829151154, "learning_rate": 0.0005, "loss": 2.1104, "step": 124340 }, { "epoch": 0.47330679110556245, "grad_norm": 0.11476528644561768, "learning_rate": 0.0005, "loss": 2.1134, "step": 124350 }, { "epoch": 0.4733448535736851, "grad_norm": 0.12161380052566528, "learning_rate": 0.0005, "loss": 2.1123, "step": 124360 }, { "epoch": 0.4733829160418078, "grad_norm": 0.12207907438278198, "learning_rate": 0.0005, "loss": 2.1156, "step": 124370 }, { "epoch": 0.4734209785099305, "grad_norm": 0.129538431763649, "learning_rate": 0.0005, "loss": 2.1181, "step": 124380 }, { "epoch": 0.4734590409780532, "grad_norm": 0.12141788005828857, "learning_rate": 0.0005, "loss": 2.1121, "step": 124390 }, { "epoch": 0.47349710344617585, "grad_norm": 0.13145986199378967, "learning_rate": 0.0005, "loss": 2.1046, "step": 124400 }, { "epoch": 0.47353516591429856, "grad_norm": 0.12737277150154114, "learning_rate": 0.0005, "loss": 2.1202, "step": 124410 }, { "epoch": 0.4735732283824212, "grad_norm": 0.13328434526920319, "learning_rate": 0.0005, "loss": 2.119, "step": 124420 }, { "epoch": 0.47361129085054393, "grad_norm": 0.1177564188838005, "learning_rate": 0.0005, "loss": 2.0978, "step": 124430 }, { "epoch": 0.4736493533186666, "grad_norm": 0.12407038360834122, "learning_rate": 0.0005, "loss": 2.1101, "step": 124440 }, { "epoch": 0.4736874157867893, "grad_norm": 0.12505443394184113, "learning_rate": 0.0005, "loss": 2.1424, "step": 124450 }, { "epoch": 0.47372547825491196, "grad_norm": 0.11660947650671005, "learning_rate": 0.0005, "loss": 2.1264, "step": 124460 }, { "epoch": 0.47376354072303467, "grad_norm": 0.1413010209798813, "learning_rate": 0.0005, "loss": 2.1143, "step": 124470 }, { "epoch": 0.47380160319115733, "grad_norm": 0.1457766890525818, "learning_rate": 0.0005, "loss": 2.1034, "step": 124480 }, { "epoch": 0.47383966565928, "grad_norm": 0.11343556642532349, "learning_rate": 0.0005, "loss": 2.1209, "step": 124490 }, { "epoch": 0.4738777281274027, "grad_norm": 0.13399618864059448, "learning_rate": 0.0005, "loss": 2.1319, "step": 124500 }, { "epoch": 0.47391579059552535, "grad_norm": 0.1370176076889038, "learning_rate": 0.0005, "loss": 2.1042, "step": 124510 }, { "epoch": 0.47395385306364807, "grad_norm": 0.1271154284477234, "learning_rate": 0.0005, "loss": 2.1328, "step": 124520 }, { "epoch": 0.4739919155317707, "grad_norm": 0.12623514235019684, "learning_rate": 0.0005, "loss": 2.1275, "step": 124530 }, { "epoch": 0.47402997799989344, "grad_norm": 0.15138548612594604, "learning_rate": 0.0005, "loss": 2.1142, "step": 124540 }, { "epoch": 0.4740680404680161, "grad_norm": 0.1311340481042862, "learning_rate": 0.0005, "loss": 2.1165, "step": 124550 }, { "epoch": 0.4741061029361388, "grad_norm": 0.12341172248125076, "learning_rate": 0.0005, "loss": 2.1225, "step": 124560 }, { "epoch": 0.47414416540426146, "grad_norm": 0.12557727098464966, "learning_rate": 0.0005, "loss": 2.114, "step": 124570 }, { "epoch": 0.4741822278723842, "grad_norm": 0.12559503316879272, "learning_rate": 0.0005, "loss": 2.1133, "step": 124580 }, { "epoch": 0.47422029034050683, "grad_norm": 0.1338992416858673, "learning_rate": 0.0005, "loss": 2.1134, "step": 124590 }, { "epoch": 0.47425835280862955, "grad_norm": 0.12634088099002838, "learning_rate": 0.0005, "loss": 2.1197, "step": 124600 }, { "epoch": 0.4742964152767522, "grad_norm": 0.12977583706378937, "learning_rate": 0.0005, "loss": 2.1015, "step": 124610 }, { "epoch": 0.4743344777448749, "grad_norm": 0.11903408169746399, "learning_rate": 0.0005, "loss": 2.121, "step": 124620 }, { "epoch": 0.4743725402129976, "grad_norm": 0.12493615597486496, "learning_rate": 0.0005, "loss": 2.1136, "step": 124630 }, { "epoch": 0.47441060268112023, "grad_norm": 0.12668555974960327, "learning_rate": 0.0005, "loss": 2.1357, "step": 124640 }, { "epoch": 0.47444866514924294, "grad_norm": 0.11522762477397919, "learning_rate": 0.0005, "loss": 2.1331, "step": 124650 }, { "epoch": 0.4744867276173656, "grad_norm": 0.12461698055267334, "learning_rate": 0.0005, "loss": 2.1259, "step": 124660 }, { "epoch": 0.4745247900854883, "grad_norm": 0.11919340491294861, "learning_rate": 0.0005, "loss": 2.1119, "step": 124670 }, { "epoch": 0.47456285255361097, "grad_norm": 0.1142156720161438, "learning_rate": 0.0005, "loss": 2.1139, "step": 124680 }, { "epoch": 0.4746009150217337, "grad_norm": 0.1265660524368286, "learning_rate": 0.0005, "loss": 2.1302, "step": 124690 }, { "epoch": 0.47463897748985634, "grad_norm": 0.1387627124786377, "learning_rate": 0.0005, "loss": 2.1175, "step": 124700 }, { "epoch": 0.47467703995797905, "grad_norm": 0.14065468311309814, "learning_rate": 0.0005, "loss": 2.1212, "step": 124710 }, { "epoch": 0.4747151024261017, "grad_norm": 0.1256401389837265, "learning_rate": 0.0005, "loss": 2.1181, "step": 124720 }, { "epoch": 0.4747531648942244, "grad_norm": 0.12581953406333923, "learning_rate": 0.0005, "loss": 2.1075, "step": 124730 }, { "epoch": 0.4747912273623471, "grad_norm": 0.13003399968147278, "learning_rate": 0.0005, "loss": 2.1065, "step": 124740 }, { "epoch": 0.4748292898304698, "grad_norm": 0.12539975345134735, "learning_rate": 0.0005, "loss": 2.0997, "step": 124750 }, { "epoch": 0.47486735229859245, "grad_norm": 0.1365066021680832, "learning_rate": 0.0005, "loss": 2.1016, "step": 124760 }, { "epoch": 0.4749054147667151, "grad_norm": 0.1297515630722046, "learning_rate": 0.0005, "loss": 2.1059, "step": 124770 }, { "epoch": 0.4749434772348378, "grad_norm": 0.12623704969882965, "learning_rate": 0.0005, "loss": 2.0995, "step": 124780 }, { "epoch": 0.4749815397029605, "grad_norm": 0.12066211551427841, "learning_rate": 0.0005, "loss": 2.1141, "step": 124790 }, { "epoch": 0.4750196021710832, "grad_norm": 0.11945539712905884, "learning_rate": 0.0005, "loss": 2.1175, "step": 124800 }, { "epoch": 0.47505766463920585, "grad_norm": 0.13884441554546356, "learning_rate": 0.0005, "loss": 2.1189, "step": 124810 }, { "epoch": 0.47509572710732856, "grad_norm": 0.1486128270626068, "learning_rate": 0.0005, "loss": 2.1082, "step": 124820 }, { "epoch": 0.4751337895754512, "grad_norm": 0.12276052683591843, "learning_rate": 0.0005, "loss": 2.1217, "step": 124830 }, { "epoch": 0.47517185204357393, "grad_norm": 0.13106083869934082, "learning_rate": 0.0005, "loss": 2.1033, "step": 124840 }, { "epoch": 0.4752099145116966, "grad_norm": 0.1291654109954834, "learning_rate": 0.0005, "loss": 2.1133, "step": 124850 }, { "epoch": 0.4752479769798193, "grad_norm": 0.1254255324602127, "learning_rate": 0.0005, "loss": 2.1024, "step": 124860 }, { "epoch": 0.47528603944794195, "grad_norm": 0.12424609810113907, "learning_rate": 0.0005, "loss": 2.1022, "step": 124870 }, { "epoch": 0.47532410191606467, "grad_norm": 0.1316959261894226, "learning_rate": 0.0005, "loss": 2.1235, "step": 124880 }, { "epoch": 0.4753621643841873, "grad_norm": 0.13111689686775208, "learning_rate": 0.0005, "loss": 2.1236, "step": 124890 }, { "epoch": 0.47540022685231004, "grad_norm": 0.11223283410072327, "learning_rate": 0.0005, "loss": 2.1234, "step": 124900 }, { "epoch": 0.4754382893204327, "grad_norm": 0.1185784563422203, "learning_rate": 0.0005, "loss": 2.105, "step": 124910 }, { "epoch": 0.47547635178855535, "grad_norm": 0.1251397579908371, "learning_rate": 0.0005, "loss": 2.1217, "step": 124920 }, { "epoch": 0.47551441425667806, "grad_norm": 0.13898153603076935, "learning_rate": 0.0005, "loss": 2.1043, "step": 124930 }, { "epoch": 0.4755524767248007, "grad_norm": 0.12897156178951263, "learning_rate": 0.0005, "loss": 2.1219, "step": 124940 }, { "epoch": 0.47559053919292343, "grad_norm": 0.1365572065114975, "learning_rate": 0.0005, "loss": 2.1083, "step": 124950 }, { "epoch": 0.4756286016610461, "grad_norm": 0.12999938428401947, "learning_rate": 0.0005, "loss": 2.1164, "step": 124960 }, { "epoch": 0.4756666641291688, "grad_norm": 0.14317527413368225, "learning_rate": 0.0005, "loss": 2.1135, "step": 124970 }, { "epoch": 0.47570472659729146, "grad_norm": 0.13054832816123962, "learning_rate": 0.0005, "loss": 2.1094, "step": 124980 }, { "epoch": 0.4757427890654142, "grad_norm": 0.12040127068758011, "learning_rate": 0.0005, "loss": 2.1048, "step": 124990 }, { "epoch": 0.47578085153353683, "grad_norm": 0.15106411278247833, "learning_rate": 0.0005, "loss": 2.1274, "step": 125000 }, { "epoch": 0.47581891400165954, "grad_norm": 0.12881748378276825, "learning_rate": 0.0005, "loss": 2.1257, "step": 125010 }, { "epoch": 0.4758569764697822, "grad_norm": 0.1211240142583847, "learning_rate": 0.0005, "loss": 2.1295, "step": 125020 }, { "epoch": 0.4758950389379049, "grad_norm": 0.1255410760641098, "learning_rate": 0.0005, "loss": 2.108, "step": 125030 }, { "epoch": 0.47593310140602757, "grad_norm": 0.13183090090751648, "learning_rate": 0.0005, "loss": 2.1284, "step": 125040 }, { "epoch": 0.4759711638741503, "grad_norm": 0.4035550653934479, "learning_rate": 0.0005, "loss": 2.108, "step": 125050 }, { "epoch": 0.47600922634227294, "grad_norm": 0.12410563975572586, "learning_rate": 0.0005, "loss": 2.1095, "step": 125060 }, { "epoch": 0.4760472888103956, "grad_norm": 0.12635254859924316, "learning_rate": 0.0005, "loss": 2.122, "step": 125070 }, { "epoch": 0.4760853512785183, "grad_norm": 0.13341419398784637, "learning_rate": 0.0005, "loss": 2.1194, "step": 125080 }, { "epoch": 0.47612341374664097, "grad_norm": 0.13298030197620392, "learning_rate": 0.0005, "loss": 2.1291, "step": 125090 }, { "epoch": 0.4761614762147637, "grad_norm": 0.11619975417852402, "learning_rate": 0.0005, "loss": 2.1256, "step": 125100 }, { "epoch": 0.47619953868288634, "grad_norm": 0.12509651482105255, "learning_rate": 0.0005, "loss": 2.1013, "step": 125110 }, { "epoch": 0.47623760115100905, "grad_norm": 0.1121395081281662, "learning_rate": 0.0005, "loss": 2.1219, "step": 125120 }, { "epoch": 0.4762756636191317, "grad_norm": 0.12973745167255402, "learning_rate": 0.0005, "loss": 2.1234, "step": 125130 }, { "epoch": 0.4763137260872544, "grad_norm": 0.13184334337711334, "learning_rate": 0.0005, "loss": 2.1223, "step": 125140 }, { "epoch": 0.4763517885553771, "grad_norm": 0.11461430788040161, "learning_rate": 0.0005, "loss": 2.112, "step": 125150 }, { "epoch": 0.4763898510234998, "grad_norm": 0.13077722489833832, "learning_rate": 0.0005, "loss": 2.1089, "step": 125160 }, { "epoch": 0.47642791349162245, "grad_norm": 0.14497019350528717, "learning_rate": 0.0005, "loss": 2.1101, "step": 125170 }, { "epoch": 0.47646597595974516, "grad_norm": 0.12146943807601929, "learning_rate": 0.0005, "loss": 2.1194, "step": 125180 }, { "epoch": 0.4765040384278678, "grad_norm": 0.11859641224145889, "learning_rate": 0.0005, "loss": 2.115, "step": 125190 }, { "epoch": 0.4765421008959905, "grad_norm": 0.13055294752120972, "learning_rate": 0.0005, "loss": 2.1108, "step": 125200 }, { "epoch": 0.4765801633641132, "grad_norm": 0.1231132373213768, "learning_rate": 0.0005, "loss": 2.1049, "step": 125210 }, { "epoch": 0.47661822583223584, "grad_norm": 0.1298208385705948, "learning_rate": 0.0005, "loss": 2.103, "step": 125220 }, { "epoch": 0.47665628830035855, "grad_norm": 0.11626437306404114, "learning_rate": 0.0005, "loss": 2.1017, "step": 125230 }, { "epoch": 0.4766943507684812, "grad_norm": 0.13338100910186768, "learning_rate": 0.0005, "loss": 2.104, "step": 125240 }, { "epoch": 0.4767324132366039, "grad_norm": 0.14445482194423676, "learning_rate": 0.0005, "loss": 2.1222, "step": 125250 }, { "epoch": 0.4767704757047266, "grad_norm": 0.12296000868082047, "learning_rate": 0.0005, "loss": 2.1064, "step": 125260 }, { "epoch": 0.4768085381728493, "grad_norm": 0.11614549905061722, "learning_rate": 0.0005, "loss": 2.0973, "step": 125270 }, { "epoch": 0.47684660064097195, "grad_norm": 0.11743879318237305, "learning_rate": 0.0005, "loss": 2.1106, "step": 125280 }, { "epoch": 0.47688466310909466, "grad_norm": 0.12904691696166992, "learning_rate": 0.0005, "loss": 2.1148, "step": 125290 }, { "epoch": 0.4769227255772173, "grad_norm": 0.13546623289585114, "learning_rate": 0.0005, "loss": 2.1202, "step": 125300 }, { "epoch": 0.47696078804534003, "grad_norm": 0.13174700736999512, "learning_rate": 0.0005, "loss": 2.1116, "step": 125310 }, { "epoch": 0.4769988505134627, "grad_norm": 0.128945454955101, "learning_rate": 0.0005, "loss": 2.103, "step": 125320 }, { "epoch": 0.4770369129815854, "grad_norm": 0.1436336636543274, "learning_rate": 0.0005, "loss": 2.1139, "step": 125330 }, { "epoch": 0.47707497544970806, "grad_norm": 0.12894588708877563, "learning_rate": 0.0005, "loss": 2.0977, "step": 125340 }, { "epoch": 0.4771130379178307, "grad_norm": 0.1292628049850464, "learning_rate": 0.0005, "loss": 2.1095, "step": 125350 }, { "epoch": 0.47715110038595343, "grad_norm": 0.12030140310525894, "learning_rate": 0.0005, "loss": 2.1235, "step": 125360 }, { "epoch": 0.4771891628540761, "grad_norm": 0.12619850039482117, "learning_rate": 0.0005, "loss": 2.1162, "step": 125370 }, { "epoch": 0.4772272253221988, "grad_norm": 0.3717951774597168, "learning_rate": 0.0005, "loss": 2.1019, "step": 125380 }, { "epoch": 0.47726528779032146, "grad_norm": 0.13587363064289093, "learning_rate": 0.0005, "loss": 2.1217, "step": 125390 }, { "epoch": 0.47730335025844417, "grad_norm": 0.15112850069999695, "learning_rate": 0.0005, "loss": 2.1025, "step": 125400 }, { "epoch": 0.4773414127265668, "grad_norm": 0.12029213458299637, "learning_rate": 0.0005, "loss": 2.121, "step": 125410 }, { "epoch": 0.47737947519468954, "grad_norm": 0.1370517760515213, "learning_rate": 0.0005, "loss": 2.1155, "step": 125420 }, { "epoch": 0.4774175376628122, "grad_norm": 0.12078516185283661, "learning_rate": 0.0005, "loss": 2.119, "step": 125430 }, { "epoch": 0.4774556001309349, "grad_norm": 0.117357537150383, "learning_rate": 0.0005, "loss": 2.112, "step": 125440 }, { "epoch": 0.47749366259905757, "grad_norm": 0.11920157074928284, "learning_rate": 0.0005, "loss": 2.1055, "step": 125450 }, { "epoch": 0.4775317250671803, "grad_norm": 0.1200534850358963, "learning_rate": 0.0005, "loss": 2.1197, "step": 125460 }, { "epoch": 0.47756978753530294, "grad_norm": 0.13385429978370667, "learning_rate": 0.0005, "loss": 2.124, "step": 125470 }, { "epoch": 0.47760785000342565, "grad_norm": 0.11925206333398819, "learning_rate": 0.0005, "loss": 2.1041, "step": 125480 }, { "epoch": 0.4776459124715483, "grad_norm": 0.13422226905822754, "learning_rate": 0.0005, "loss": 2.1191, "step": 125490 }, { "epoch": 0.47768397493967096, "grad_norm": 0.12674109637737274, "learning_rate": 0.0005, "loss": 2.1105, "step": 125500 }, { "epoch": 0.4777220374077937, "grad_norm": 0.1292978972196579, "learning_rate": 0.0005, "loss": 2.12, "step": 125510 }, { "epoch": 0.47776009987591633, "grad_norm": 0.14307229220867157, "learning_rate": 0.0005, "loss": 2.1186, "step": 125520 }, { "epoch": 0.47779816234403905, "grad_norm": 0.13327215611934662, "learning_rate": 0.0005, "loss": 2.1066, "step": 125530 }, { "epoch": 0.4778362248121617, "grad_norm": 0.13076509535312653, "learning_rate": 0.0005, "loss": 2.1122, "step": 125540 }, { "epoch": 0.4778742872802844, "grad_norm": 0.11930997669696808, "learning_rate": 0.0005, "loss": 2.1135, "step": 125550 }, { "epoch": 0.4779123497484071, "grad_norm": 0.12345302850008011, "learning_rate": 0.0005, "loss": 2.1083, "step": 125560 }, { "epoch": 0.4779504122165298, "grad_norm": 0.12322073429822922, "learning_rate": 0.0005, "loss": 2.1254, "step": 125570 }, { "epoch": 0.47798847468465244, "grad_norm": 0.12302183359861374, "learning_rate": 0.0005, "loss": 2.1049, "step": 125580 }, { "epoch": 0.47802653715277516, "grad_norm": 0.1490904539823532, "learning_rate": 0.0005, "loss": 2.1061, "step": 125590 }, { "epoch": 0.4780645996208978, "grad_norm": 0.13050857186317444, "learning_rate": 0.0005, "loss": 2.1132, "step": 125600 }, { "epoch": 0.4781026620890205, "grad_norm": 0.13035564124584198, "learning_rate": 0.0005, "loss": 2.1017, "step": 125610 }, { "epoch": 0.4781407245571432, "grad_norm": 0.13813672959804535, "learning_rate": 0.0005, "loss": 2.1209, "step": 125620 }, { "epoch": 0.47817878702526584, "grad_norm": 0.1336815059185028, "learning_rate": 0.0005, "loss": 2.1271, "step": 125630 }, { "epoch": 0.47821684949338855, "grad_norm": 0.12296368181705475, "learning_rate": 0.0005, "loss": 2.1239, "step": 125640 }, { "epoch": 0.4782549119615112, "grad_norm": 0.12488293647766113, "learning_rate": 0.0005, "loss": 2.1162, "step": 125650 }, { "epoch": 0.4782929744296339, "grad_norm": 0.13063012063503265, "learning_rate": 0.0005, "loss": 2.0934, "step": 125660 }, { "epoch": 0.4783310368977566, "grad_norm": 0.1249491274356842, "learning_rate": 0.0005, "loss": 2.109, "step": 125670 }, { "epoch": 0.4783690993658793, "grad_norm": 0.2116761952638626, "learning_rate": 0.0005, "loss": 2.1249, "step": 125680 }, { "epoch": 0.47840716183400195, "grad_norm": 0.12892261147499084, "learning_rate": 0.0005, "loss": 2.1092, "step": 125690 }, { "epoch": 0.47844522430212466, "grad_norm": 0.13267821073532104, "learning_rate": 0.0005, "loss": 2.0941, "step": 125700 }, { "epoch": 0.4784832867702473, "grad_norm": 0.1281917840242386, "learning_rate": 0.0005, "loss": 2.115, "step": 125710 }, { "epoch": 0.47852134923837003, "grad_norm": 0.12860079109668732, "learning_rate": 0.0005, "loss": 2.0996, "step": 125720 }, { "epoch": 0.4785594117064927, "grad_norm": 0.13791748881340027, "learning_rate": 0.0005, "loss": 2.1176, "step": 125730 }, { "epoch": 0.4785974741746154, "grad_norm": 0.12474928796291351, "learning_rate": 0.0005, "loss": 2.1222, "step": 125740 }, { "epoch": 0.47863553664273806, "grad_norm": 0.1367192417383194, "learning_rate": 0.0005, "loss": 2.1154, "step": 125750 }, { "epoch": 0.47867359911086077, "grad_norm": 0.12175612151622772, "learning_rate": 0.0005, "loss": 2.1152, "step": 125760 }, { "epoch": 0.4787116615789834, "grad_norm": 0.12259689718484879, "learning_rate": 0.0005, "loss": 2.1023, "step": 125770 }, { "epoch": 0.4787497240471061, "grad_norm": 0.12758487462997437, "learning_rate": 0.0005, "loss": 2.1144, "step": 125780 }, { "epoch": 0.4787877865152288, "grad_norm": 0.13025754690170288, "learning_rate": 0.0005, "loss": 2.1017, "step": 125790 }, { "epoch": 0.47882584898335145, "grad_norm": 0.13321420550346375, "learning_rate": 0.0005, "loss": 2.1182, "step": 125800 }, { "epoch": 0.47886391145147417, "grad_norm": 0.12373219430446625, "learning_rate": 0.0005, "loss": 2.1197, "step": 125810 }, { "epoch": 0.4789019739195968, "grad_norm": 0.14295180141925812, "learning_rate": 0.0005, "loss": 2.1202, "step": 125820 }, { "epoch": 0.47894003638771954, "grad_norm": 0.12274914234876633, "learning_rate": 0.0005, "loss": 2.105, "step": 125830 }, { "epoch": 0.4789780988558422, "grad_norm": 0.12903110682964325, "learning_rate": 0.0005, "loss": 2.1133, "step": 125840 }, { "epoch": 0.4790161613239649, "grad_norm": 0.1290123164653778, "learning_rate": 0.0005, "loss": 2.1174, "step": 125850 }, { "epoch": 0.47905422379208756, "grad_norm": 0.12053056061267853, "learning_rate": 0.0005, "loss": 2.1077, "step": 125860 }, { "epoch": 0.4790922862602103, "grad_norm": 0.13491888344287872, "learning_rate": 0.0005, "loss": 2.1158, "step": 125870 }, { "epoch": 0.47913034872833293, "grad_norm": 0.12733201682567596, "learning_rate": 0.0005, "loss": 2.1259, "step": 125880 }, { "epoch": 0.47916841119645565, "grad_norm": 0.12354981154203415, "learning_rate": 0.0005, "loss": 2.1175, "step": 125890 }, { "epoch": 0.4792064736645783, "grad_norm": 0.12990646064281464, "learning_rate": 0.0005, "loss": 2.104, "step": 125900 }, { "epoch": 0.479244536132701, "grad_norm": 0.11663618683815002, "learning_rate": 0.0005, "loss": 2.1255, "step": 125910 }, { "epoch": 0.4792825986008237, "grad_norm": 0.1272948831319809, "learning_rate": 0.0005, "loss": 2.1147, "step": 125920 }, { "epoch": 0.47932066106894633, "grad_norm": 0.12534311413764954, "learning_rate": 0.0005, "loss": 2.1176, "step": 125930 }, { "epoch": 0.47935872353706904, "grad_norm": 0.1488637626171112, "learning_rate": 0.0005, "loss": 2.1064, "step": 125940 }, { "epoch": 0.4793967860051917, "grad_norm": 0.13527126610279083, "learning_rate": 0.0005, "loss": 2.1226, "step": 125950 }, { "epoch": 0.4794348484733144, "grad_norm": 0.1611463725566864, "learning_rate": 0.0005, "loss": 2.1102, "step": 125960 }, { "epoch": 0.47947291094143707, "grad_norm": 0.11967337131500244, "learning_rate": 0.0005, "loss": 2.1231, "step": 125970 }, { "epoch": 0.4795109734095598, "grad_norm": 0.13560950756072998, "learning_rate": 0.0005, "loss": 2.1181, "step": 125980 }, { "epoch": 0.47954903587768244, "grad_norm": 0.1264272928237915, "learning_rate": 0.0005, "loss": 2.0995, "step": 125990 }, { "epoch": 0.47958709834580515, "grad_norm": 0.1372808814048767, "learning_rate": 0.0005, "loss": 2.1172, "step": 126000 }, { "epoch": 0.4796251608139278, "grad_norm": 0.13430854678153992, "learning_rate": 0.0005, "loss": 2.1226, "step": 126010 }, { "epoch": 0.4796632232820505, "grad_norm": 0.14079371094703674, "learning_rate": 0.0005, "loss": 2.107, "step": 126020 }, { "epoch": 0.4797012857501732, "grad_norm": 0.13879041373729706, "learning_rate": 0.0005, "loss": 2.1306, "step": 126030 }, { "epoch": 0.4797393482182959, "grad_norm": 0.12993349134922028, "learning_rate": 0.0005, "loss": 2.1263, "step": 126040 }, { "epoch": 0.47977741068641855, "grad_norm": 0.11303086578845978, "learning_rate": 0.0005, "loss": 2.1166, "step": 126050 }, { "epoch": 0.4798154731545412, "grad_norm": 0.11333756148815155, "learning_rate": 0.0005, "loss": 2.1208, "step": 126060 }, { "epoch": 0.4798535356226639, "grad_norm": 0.11820187419652939, "learning_rate": 0.0005, "loss": 2.1164, "step": 126070 }, { "epoch": 0.4798915980907866, "grad_norm": 0.14163796603679657, "learning_rate": 0.0005, "loss": 2.1045, "step": 126080 }, { "epoch": 0.4799296605589093, "grad_norm": 0.13567952811717987, "learning_rate": 0.0005, "loss": 2.1366, "step": 126090 }, { "epoch": 0.47996772302703195, "grad_norm": 0.12436114996671677, "learning_rate": 0.0005, "loss": 2.1199, "step": 126100 }, { "epoch": 0.48000578549515466, "grad_norm": 0.12871521711349487, "learning_rate": 0.0005, "loss": 2.0973, "step": 126110 }, { "epoch": 0.4800438479632773, "grad_norm": 0.12559446692466736, "learning_rate": 0.0005, "loss": 2.1202, "step": 126120 }, { "epoch": 0.4800819104314, "grad_norm": 0.11897538602352142, "learning_rate": 0.0005, "loss": 2.1066, "step": 126130 }, { "epoch": 0.4801199728995227, "grad_norm": 0.13987573981285095, "learning_rate": 0.0005, "loss": 2.1149, "step": 126140 }, { "epoch": 0.4801580353676454, "grad_norm": 0.13216280937194824, "learning_rate": 0.0005, "loss": 2.1139, "step": 126150 }, { "epoch": 0.48019609783576805, "grad_norm": 0.11237651854753494, "learning_rate": 0.0005, "loss": 2.1121, "step": 126160 }, { "epoch": 0.48023416030389077, "grad_norm": 0.12070129811763763, "learning_rate": 0.0005, "loss": 2.111, "step": 126170 }, { "epoch": 0.4802722227720134, "grad_norm": 0.12533554434776306, "learning_rate": 0.0005, "loss": 2.1118, "step": 126180 }, { "epoch": 0.48031028524013614, "grad_norm": 0.12822876870632172, "learning_rate": 0.0005, "loss": 2.1155, "step": 126190 }, { "epoch": 0.4803483477082588, "grad_norm": 0.12372897565364838, "learning_rate": 0.0005, "loss": 2.1323, "step": 126200 }, { "epoch": 0.48038641017638145, "grad_norm": 0.1299714893102646, "learning_rate": 0.0005, "loss": 2.1109, "step": 126210 }, { "epoch": 0.48042447264450416, "grad_norm": 0.14231672883033752, "learning_rate": 0.0005, "loss": 2.1231, "step": 126220 }, { "epoch": 0.4804625351126268, "grad_norm": 0.12293112277984619, "learning_rate": 0.0005, "loss": 2.1162, "step": 126230 }, { "epoch": 0.48050059758074953, "grad_norm": 0.12258761376142502, "learning_rate": 0.0005, "loss": 2.1109, "step": 126240 }, { "epoch": 0.4805386600488722, "grad_norm": 0.12533219158649445, "learning_rate": 0.0005, "loss": 2.1122, "step": 126250 }, { "epoch": 0.4805767225169949, "grad_norm": 0.30654239654541016, "learning_rate": 0.0005, "loss": 2.1239, "step": 126260 }, { "epoch": 0.48061478498511756, "grad_norm": 0.13637426495552063, "learning_rate": 0.0005, "loss": 2.1195, "step": 126270 }, { "epoch": 0.4806528474532403, "grad_norm": 0.13394691050052643, "learning_rate": 0.0005, "loss": 2.1014, "step": 126280 }, { "epoch": 0.48069090992136293, "grad_norm": 0.12441105395555496, "learning_rate": 0.0005, "loss": 2.134, "step": 126290 }, { "epoch": 0.48072897238948564, "grad_norm": 0.11978069692850113, "learning_rate": 0.0005, "loss": 2.1201, "step": 126300 }, { "epoch": 0.4807670348576083, "grad_norm": 0.14189858734607697, "learning_rate": 0.0005, "loss": 2.1236, "step": 126310 }, { "epoch": 0.480805097325731, "grad_norm": 0.1284215748310089, "learning_rate": 0.0005, "loss": 2.1233, "step": 126320 }, { "epoch": 0.48084315979385367, "grad_norm": 0.13876661658287048, "learning_rate": 0.0005, "loss": 2.1058, "step": 126330 }, { "epoch": 0.4808812222619764, "grad_norm": 0.1364854872226715, "learning_rate": 0.0005, "loss": 2.1154, "step": 126340 }, { "epoch": 0.48091928473009904, "grad_norm": 0.13453872501850128, "learning_rate": 0.0005, "loss": 2.1059, "step": 126350 }, { "epoch": 0.4809573471982217, "grad_norm": 0.12328676879405975, "learning_rate": 0.0005, "loss": 2.0941, "step": 126360 }, { "epoch": 0.4809954096663444, "grad_norm": 0.12543216347694397, "learning_rate": 0.0005, "loss": 2.1337, "step": 126370 }, { "epoch": 0.48103347213446707, "grad_norm": 0.1285954713821411, "learning_rate": 0.0005, "loss": 2.1096, "step": 126380 }, { "epoch": 0.4810715346025898, "grad_norm": 0.13536769151687622, "learning_rate": 0.0005, "loss": 2.1081, "step": 126390 }, { "epoch": 0.48110959707071244, "grad_norm": 0.12796925008296967, "learning_rate": 0.0005, "loss": 2.1159, "step": 126400 }, { "epoch": 0.48114765953883515, "grad_norm": 0.13553667068481445, "learning_rate": 0.0005, "loss": 2.1119, "step": 126410 }, { "epoch": 0.4811857220069578, "grad_norm": 0.12393445521593094, "learning_rate": 0.0005, "loss": 2.121, "step": 126420 }, { "epoch": 0.4812237844750805, "grad_norm": 0.138578861951828, "learning_rate": 0.0005, "loss": 2.1104, "step": 126430 }, { "epoch": 0.4812618469432032, "grad_norm": 0.13088813424110413, "learning_rate": 0.0005, "loss": 2.1218, "step": 126440 }, { "epoch": 0.4812999094113259, "grad_norm": 0.12858811020851135, "learning_rate": 0.0005, "loss": 2.1123, "step": 126450 }, { "epoch": 0.48133797187944855, "grad_norm": 0.11651439219713211, "learning_rate": 0.0005, "loss": 2.1133, "step": 126460 }, { "epoch": 0.48137603434757126, "grad_norm": 0.1188209056854248, "learning_rate": 0.0005, "loss": 2.1307, "step": 126470 }, { "epoch": 0.4814140968156939, "grad_norm": 0.119675412774086, "learning_rate": 0.0005, "loss": 2.1088, "step": 126480 }, { "epoch": 0.4814521592838166, "grad_norm": 0.13475286960601807, "learning_rate": 0.0005, "loss": 2.1216, "step": 126490 }, { "epoch": 0.4814902217519393, "grad_norm": 0.12447642534971237, "learning_rate": 0.0005, "loss": 2.1156, "step": 126500 }, { "epoch": 0.48152828422006194, "grad_norm": 0.12670594453811646, "learning_rate": 0.0005, "loss": 2.1143, "step": 126510 }, { "epoch": 0.48156634668818465, "grad_norm": 0.12636840343475342, "learning_rate": 0.0005, "loss": 2.1181, "step": 126520 }, { "epoch": 0.4816044091563073, "grad_norm": 0.13792423903942108, "learning_rate": 0.0005, "loss": 2.1281, "step": 126530 }, { "epoch": 0.48164247162443, "grad_norm": 0.13417412340641022, "learning_rate": 0.0005, "loss": 2.1171, "step": 126540 }, { "epoch": 0.4816805340925527, "grad_norm": 0.14853769540786743, "learning_rate": 0.0005, "loss": 2.1272, "step": 126550 }, { "epoch": 0.4817185965606754, "grad_norm": 0.12657450139522552, "learning_rate": 0.0005, "loss": 2.1296, "step": 126560 }, { "epoch": 0.48175665902879805, "grad_norm": 0.16439884901046753, "learning_rate": 0.0005, "loss": 2.1096, "step": 126570 }, { "epoch": 0.48179472149692076, "grad_norm": 0.1257026195526123, "learning_rate": 0.0005, "loss": 2.1068, "step": 126580 }, { "epoch": 0.4818327839650434, "grad_norm": 0.12009840458631516, "learning_rate": 0.0005, "loss": 2.1114, "step": 126590 }, { "epoch": 0.48187084643316613, "grad_norm": 0.12126778811216354, "learning_rate": 0.0005, "loss": 2.1023, "step": 126600 }, { "epoch": 0.4819089089012888, "grad_norm": 0.13086417317390442, "learning_rate": 0.0005, "loss": 2.1121, "step": 126610 }, { "epoch": 0.4819469713694115, "grad_norm": 0.13769873976707458, "learning_rate": 0.0005, "loss": 2.1085, "step": 126620 }, { "epoch": 0.48198503383753416, "grad_norm": 0.14477433264255524, "learning_rate": 0.0005, "loss": 2.1143, "step": 126630 }, { "epoch": 0.4820230963056568, "grad_norm": 0.12043140828609467, "learning_rate": 0.0005, "loss": 2.1036, "step": 126640 }, { "epoch": 0.48206115877377953, "grad_norm": 0.12772685289382935, "learning_rate": 0.0005, "loss": 2.122, "step": 126650 }, { "epoch": 0.4820992212419022, "grad_norm": 0.12375368177890778, "learning_rate": 0.0005, "loss": 2.1081, "step": 126660 }, { "epoch": 0.4821372837100249, "grad_norm": 0.12025720626115799, "learning_rate": 0.0005, "loss": 2.114, "step": 126670 }, { "epoch": 0.48217534617814756, "grad_norm": 0.1275482028722763, "learning_rate": 0.0005, "loss": 2.1159, "step": 126680 }, { "epoch": 0.48221340864627027, "grad_norm": 0.12488540261983871, "learning_rate": 0.0005, "loss": 2.1218, "step": 126690 }, { "epoch": 0.4822514711143929, "grad_norm": 0.12372930347919464, "learning_rate": 0.0005, "loss": 2.1129, "step": 126700 }, { "epoch": 0.48228953358251564, "grad_norm": 0.13978280127048492, "learning_rate": 0.0005, "loss": 2.1121, "step": 126710 }, { "epoch": 0.4823275960506383, "grad_norm": 0.13022901117801666, "learning_rate": 0.0005, "loss": 2.1325, "step": 126720 }, { "epoch": 0.482365658518761, "grad_norm": 0.12196984887123108, "learning_rate": 0.0005, "loss": 2.1153, "step": 126730 }, { "epoch": 0.48240372098688367, "grad_norm": 0.127203106880188, "learning_rate": 0.0005, "loss": 2.1203, "step": 126740 }, { "epoch": 0.4824417834550064, "grad_norm": 0.1250368058681488, "learning_rate": 0.0005, "loss": 2.1219, "step": 126750 }, { "epoch": 0.48247984592312904, "grad_norm": 0.11303237825632095, "learning_rate": 0.0005, "loss": 2.1185, "step": 126760 }, { "epoch": 0.48251790839125175, "grad_norm": 0.12539029121398926, "learning_rate": 0.0005, "loss": 2.1106, "step": 126770 }, { "epoch": 0.4825559708593744, "grad_norm": 0.12309008091688156, "learning_rate": 0.0005, "loss": 2.1068, "step": 126780 }, { "epoch": 0.48259403332749706, "grad_norm": 0.1341366022825241, "learning_rate": 0.0005, "loss": 2.0961, "step": 126790 }, { "epoch": 0.4826320957956198, "grad_norm": 0.12358230352401733, "learning_rate": 0.0005, "loss": 2.1069, "step": 126800 }, { "epoch": 0.48267015826374243, "grad_norm": 0.11779564619064331, "learning_rate": 0.0005, "loss": 2.1219, "step": 126810 }, { "epoch": 0.48270822073186515, "grad_norm": 0.13021689653396606, "learning_rate": 0.0005, "loss": 2.1203, "step": 126820 }, { "epoch": 0.4827462831999878, "grad_norm": 0.12274055927991867, "learning_rate": 0.0005, "loss": 2.1245, "step": 126830 }, { "epoch": 0.4827843456681105, "grad_norm": 0.12138562649488449, "learning_rate": 0.0005, "loss": 2.1148, "step": 126840 }, { "epoch": 0.48282240813623317, "grad_norm": 0.11908987909555435, "learning_rate": 0.0005, "loss": 2.128, "step": 126850 }, { "epoch": 0.4828604706043559, "grad_norm": 0.12997077405452728, "learning_rate": 0.0005, "loss": 2.107, "step": 126860 }, { "epoch": 0.48289853307247854, "grad_norm": 0.11264729499816895, "learning_rate": 0.0005, "loss": 2.1336, "step": 126870 }, { "epoch": 0.48293659554060125, "grad_norm": 0.12156654894351959, "learning_rate": 0.0005, "loss": 2.133, "step": 126880 }, { "epoch": 0.4829746580087239, "grad_norm": 0.12737615406513214, "learning_rate": 0.0005, "loss": 2.1281, "step": 126890 }, { "epoch": 0.4830127204768466, "grad_norm": 0.11980323493480682, "learning_rate": 0.0005, "loss": 2.1119, "step": 126900 }, { "epoch": 0.4830507829449693, "grad_norm": 0.13023775815963745, "learning_rate": 0.0005, "loss": 2.1352, "step": 126910 }, { "epoch": 0.483088845413092, "grad_norm": 0.12090695649385452, "learning_rate": 0.0005, "loss": 2.1149, "step": 126920 }, { "epoch": 0.48312690788121465, "grad_norm": 0.13404586911201477, "learning_rate": 0.0005, "loss": 2.1112, "step": 126930 }, { "epoch": 0.4831649703493373, "grad_norm": 0.12173371016979218, "learning_rate": 0.0005, "loss": 2.1247, "step": 126940 }, { "epoch": 0.48320303281746, "grad_norm": 0.13910695910453796, "learning_rate": 0.0005, "loss": 2.1005, "step": 126950 }, { "epoch": 0.4832410952855827, "grad_norm": 0.12336437404155731, "learning_rate": 0.0005, "loss": 2.1001, "step": 126960 }, { "epoch": 0.4832791577537054, "grad_norm": 0.12722080945968628, "learning_rate": 0.0005, "loss": 2.1193, "step": 126970 }, { "epoch": 0.48331722022182805, "grad_norm": 0.11827809363603592, "learning_rate": 0.0005, "loss": 2.1212, "step": 126980 }, { "epoch": 0.48335528268995076, "grad_norm": 0.13443544507026672, "learning_rate": 0.0005, "loss": 2.1082, "step": 126990 }, { "epoch": 0.4833933451580734, "grad_norm": 0.11773506551980972, "learning_rate": 0.0005, "loss": 2.1156, "step": 127000 }, { "epoch": 0.48343140762619613, "grad_norm": 0.13921083509922028, "learning_rate": 0.0005, "loss": 2.1086, "step": 127010 }, { "epoch": 0.4834694700943188, "grad_norm": 0.1361696422100067, "learning_rate": 0.0005, "loss": 2.1195, "step": 127020 }, { "epoch": 0.4835075325624415, "grad_norm": 0.15075117349624634, "learning_rate": 0.0005, "loss": 2.1008, "step": 127030 }, { "epoch": 0.48354559503056416, "grad_norm": 0.11024720221757889, "learning_rate": 0.0005, "loss": 2.1061, "step": 127040 }, { "epoch": 0.48358365749868687, "grad_norm": 0.11815572530031204, "learning_rate": 0.0005, "loss": 2.1054, "step": 127050 }, { "epoch": 0.4836217199668095, "grad_norm": 0.13710758090019226, "learning_rate": 0.0005, "loss": 2.1147, "step": 127060 }, { "epoch": 0.4836597824349322, "grad_norm": 0.13314233720302582, "learning_rate": 0.0005, "loss": 2.1168, "step": 127070 }, { "epoch": 0.4836978449030549, "grad_norm": 0.12891244888305664, "learning_rate": 0.0005, "loss": 2.1149, "step": 127080 }, { "epoch": 0.48373590737117755, "grad_norm": 0.14799803495407104, "learning_rate": 0.0005, "loss": 2.1097, "step": 127090 }, { "epoch": 0.48377396983930027, "grad_norm": 0.1257518082857132, "learning_rate": 0.0005, "loss": 2.1098, "step": 127100 }, { "epoch": 0.4838120323074229, "grad_norm": 0.12361308932304382, "learning_rate": 0.0005, "loss": 2.1016, "step": 127110 }, { "epoch": 0.48385009477554564, "grad_norm": 0.1231234073638916, "learning_rate": 0.0005, "loss": 2.1278, "step": 127120 }, { "epoch": 0.4838881572436683, "grad_norm": 0.1312589794397354, "learning_rate": 0.0005, "loss": 2.1111, "step": 127130 }, { "epoch": 0.483926219711791, "grad_norm": 0.13112683594226837, "learning_rate": 0.0005, "loss": 2.1141, "step": 127140 }, { "epoch": 0.48396428217991366, "grad_norm": 0.11283516138792038, "learning_rate": 0.0005, "loss": 2.1177, "step": 127150 }, { "epoch": 0.4840023446480364, "grad_norm": 0.12356321513652802, "learning_rate": 0.0005, "loss": 2.1238, "step": 127160 }, { "epoch": 0.48404040711615903, "grad_norm": 0.12498544901609421, "learning_rate": 0.0005, "loss": 2.1032, "step": 127170 }, { "epoch": 0.48407846958428175, "grad_norm": 0.12278742343187332, "learning_rate": 0.0005, "loss": 2.1108, "step": 127180 }, { "epoch": 0.4841165320524044, "grad_norm": 0.12722855806350708, "learning_rate": 0.0005, "loss": 2.1141, "step": 127190 }, { "epoch": 0.4841545945205271, "grad_norm": 0.11609535664319992, "learning_rate": 0.0005, "loss": 2.1104, "step": 127200 }, { "epoch": 0.48419265698864977, "grad_norm": 0.1401669681072235, "learning_rate": 0.0005, "loss": 2.1235, "step": 127210 }, { "epoch": 0.48423071945677243, "grad_norm": 0.13876022398471832, "learning_rate": 0.0005, "loss": 2.1179, "step": 127220 }, { "epoch": 0.48426878192489514, "grad_norm": 0.13365916907787323, "learning_rate": 0.0005, "loss": 2.1145, "step": 127230 }, { "epoch": 0.4843068443930178, "grad_norm": 0.12094181030988693, "learning_rate": 0.0005, "loss": 2.1313, "step": 127240 }, { "epoch": 0.4843449068611405, "grad_norm": 0.1237097904086113, "learning_rate": 0.0005, "loss": 2.1259, "step": 127250 }, { "epoch": 0.48438296932926317, "grad_norm": 0.1223485916852951, "learning_rate": 0.0005, "loss": 2.1086, "step": 127260 }, { "epoch": 0.4844210317973859, "grad_norm": 0.12210199981927872, "learning_rate": 0.0005, "loss": 2.1139, "step": 127270 }, { "epoch": 0.48445909426550854, "grad_norm": 0.12790727615356445, "learning_rate": 0.0005, "loss": 2.1034, "step": 127280 }, { "epoch": 0.48449715673363125, "grad_norm": 0.11236969381570816, "learning_rate": 0.0005, "loss": 2.1203, "step": 127290 }, { "epoch": 0.4845352192017539, "grad_norm": 0.12637045979499817, "learning_rate": 0.0005, "loss": 2.1272, "step": 127300 }, { "epoch": 0.4845732816698766, "grad_norm": 0.12643814086914062, "learning_rate": 0.0005, "loss": 2.111, "step": 127310 }, { "epoch": 0.4846113441379993, "grad_norm": 0.11725429445505142, "learning_rate": 0.0005, "loss": 2.1163, "step": 127320 }, { "epoch": 0.484649406606122, "grad_norm": 0.1281515210866928, "learning_rate": 0.0005, "loss": 2.1154, "step": 127330 }, { "epoch": 0.48468746907424465, "grad_norm": 0.15110932290554047, "learning_rate": 0.0005, "loss": 2.1026, "step": 127340 }, { "epoch": 0.48472553154236736, "grad_norm": 0.12391675263643265, "learning_rate": 0.0005, "loss": 2.1192, "step": 127350 }, { "epoch": 0.48476359401049, "grad_norm": 0.13406608998775482, "learning_rate": 0.0005, "loss": 2.1367, "step": 127360 }, { "epoch": 0.4848016564786127, "grad_norm": 0.11705932766199112, "learning_rate": 0.0005, "loss": 2.1229, "step": 127370 }, { "epoch": 0.4848397189467354, "grad_norm": 0.1269107609987259, "learning_rate": 0.0005, "loss": 2.1021, "step": 127380 }, { "epoch": 0.48487778141485804, "grad_norm": 0.11953911930322647, "learning_rate": 0.0005, "loss": 2.0996, "step": 127390 }, { "epoch": 0.48491584388298076, "grad_norm": 0.12773080170154572, "learning_rate": 0.0005, "loss": 2.1162, "step": 127400 }, { "epoch": 0.4849539063511034, "grad_norm": 0.12607593834400177, "learning_rate": 0.0005, "loss": 2.0954, "step": 127410 }, { "epoch": 0.4849919688192261, "grad_norm": 0.1383269876241684, "learning_rate": 0.0005, "loss": 2.1225, "step": 127420 }, { "epoch": 0.4850300312873488, "grad_norm": 0.13885319232940674, "learning_rate": 0.0005, "loss": 2.1084, "step": 127430 }, { "epoch": 0.4850680937554715, "grad_norm": 0.12093721330165863, "learning_rate": 0.0005, "loss": 2.0981, "step": 127440 }, { "epoch": 0.48510615622359415, "grad_norm": 0.11831363290548325, "learning_rate": 0.0005, "loss": 2.1117, "step": 127450 }, { "epoch": 0.48514421869171687, "grad_norm": 0.11542569100856781, "learning_rate": 0.0005, "loss": 2.1203, "step": 127460 }, { "epoch": 0.4851822811598395, "grad_norm": 0.12869244813919067, "learning_rate": 0.0005, "loss": 2.1289, "step": 127470 }, { "epoch": 0.48522034362796224, "grad_norm": 0.1293071061372757, "learning_rate": 0.0005, "loss": 2.124, "step": 127480 }, { "epoch": 0.4852584060960849, "grad_norm": 0.13619841635227203, "learning_rate": 0.0005, "loss": 2.1225, "step": 127490 }, { "epoch": 0.48529646856420755, "grad_norm": 0.12476862967014313, "learning_rate": 0.0005, "loss": 2.1248, "step": 127500 }, { "epoch": 0.48533453103233026, "grad_norm": 0.12852947413921356, "learning_rate": 0.0005, "loss": 2.1114, "step": 127510 }, { "epoch": 0.4853725935004529, "grad_norm": 0.11751388758420944, "learning_rate": 0.0005, "loss": 2.1074, "step": 127520 }, { "epoch": 0.48541065596857563, "grad_norm": 0.12054309993982315, "learning_rate": 0.0005, "loss": 2.1018, "step": 127530 }, { "epoch": 0.4854487184366983, "grad_norm": 0.13147751986980438, "learning_rate": 0.0005, "loss": 2.1085, "step": 127540 }, { "epoch": 0.485486780904821, "grad_norm": 0.13924241065979004, "learning_rate": 0.0005, "loss": 2.1269, "step": 127550 }, { "epoch": 0.48552484337294366, "grad_norm": 0.12083495408296585, "learning_rate": 0.0005, "loss": 2.1139, "step": 127560 }, { "epoch": 0.4855629058410664, "grad_norm": 0.12078743427991867, "learning_rate": 0.0005, "loss": 2.1146, "step": 127570 }, { "epoch": 0.48560096830918903, "grad_norm": 0.13089805841445923, "learning_rate": 0.0005, "loss": 2.0991, "step": 127580 }, { "epoch": 0.48563903077731174, "grad_norm": 0.14108452200889587, "learning_rate": 0.0005, "loss": 2.114, "step": 127590 }, { "epoch": 0.4856770932454344, "grad_norm": 0.1272251456975937, "learning_rate": 0.0005, "loss": 2.1201, "step": 127600 }, { "epoch": 0.4857151557135571, "grad_norm": 0.12551584839820862, "learning_rate": 0.0005, "loss": 2.128, "step": 127610 }, { "epoch": 0.48575321818167977, "grad_norm": 0.12183475494384766, "learning_rate": 0.0005, "loss": 2.1192, "step": 127620 }, { "epoch": 0.4857912806498025, "grad_norm": 0.13344140350818634, "learning_rate": 0.0005, "loss": 2.1136, "step": 127630 }, { "epoch": 0.48582934311792514, "grad_norm": 0.12324704229831696, "learning_rate": 0.0005, "loss": 2.1091, "step": 127640 }, { "epoch": 0.4858674055860478, "grad_norm": 0.11987955123186111, "learning_rate": 0.0005, "loss": 2.1152, "step": 127650 }, { "epoch": 0.4859054680541705, "grad_norm": 0.11302775144577026, "learning_rate": 0.0005, "loss": 2.116, "step": 127660 }, { "epoch": 0.48594353052229317, "grad_norm": 0.13847026228904724, "learning_rate": 0.0005, "loss": 2.1262, "step": 127670 }, { "epoch": 0.4859815929904159, "grad_norm": 0.1276199072599411, "learning_rate": 0.0005, "loss": 2.1142, "step": 127680 }, { "epoch": 0.48601965545853854, "grad_norm": 0.14437644183635712, "learning_rate": 0.0005, "loss": 2.129, "step": 127690 }, { "epoch": 0.48605771792666125, "grad_norm": 0.12203813344240189, "learning_rate": 0.0005, "loss": 2.1187, "step": 127700 }, { "epoch": 0.4860957803947839, "grad_norm": 0.135308176279068, "learning_rate": 0.0005, "loss": 2.1216, "step": 127710 }, { "epoch": 0.4861338428629066, "grad_norm": 0.11932894587516785, "learning_rate": 0.0005, "loss": 2.1322, "step": 127720 }, { "epoch": 0.4861719053310293, "grad_norm": 0.12615463137626648, "learning_rate": 0.0005, "loss": 2.1149, "step": 127730 }, { "epoch": 0.486209967799152, "grad_norm": 0.12288632243871689, "learning_rate": 0.0005, "loss": 2.1004, "step": 127740 }, { "epoch": 0.48624803026727464, "grad_norm": 0.12118787318468094, "learning_rate": 0.0005, "loss": 2.1226, "step": 127750 }, { "epoch": 0.48628609273539736, "grad_norm": 0.12299742549657822, "learning_rate": 0.0005, "loss": 2.131, "step": 127760 }, { "epoch": 0.48632415520352, "grad_norm": 0.11897924542427063, "learning_rate": 0.0005, "loss": 2.1038, "step": 127770 }, { "epoch": 0.4863622176716427, "grad_norm": 0.1315123736858368, "learning_rate": 0.0005, "loss": 2.1218, "step": 127780 }, { "epoch": 0.4864002801397654, "grad_norm": 0.1309213489294052, "learning_rate": 0.0005, "loss": 2.1286, "step": 127790 }, { "epoch": 0.48643834260788804, "grad_norm": 0.13837243616580963, "learning_rate": 0.0005, "loss": 2.1282, "step": 127800 }, { "epoch": 0.48647640507601075, "grad_norm": 0.13022461533546448, "learning_rate": 0.0005, "loss": 2.1248, "step": 127810 }, { "epoch": 0.4865144675441334, "grad_norm": 0.1236427053809166, "learning_rate": 0.0005, "loss": 2.1202, "step": 127820 }, { "epoch": 0.4865525300122561, "grad_norm": 0.11672331392765045, "learning_rate": 0.0005, "loss": 2.1109, "step": 127830 }, { "epoch": 0.4865905924803788, "grad_norm": 0.13600587844848633, "learning_rate": 0.0005, "loss": 2.1184, "step": 127840 }, { "epoch": 0.4866286549485015, "grad_norm": 0.13283072412014008, "learning_rate": 0.0005, "loss": 2.1071, "step": 127850 }, { "epoch": 0.48666671741662415, "grad_norm": 0.12012767791748047, "learning_rate": 0.0005, "loss": 2.1145, "step": 127860 }, { "epoch": 0.48670477988474686, "grad_norm": 0.12542325258255005, "learning_rate": 0.0005, "loss": 2.1024, "step": 127870 }, { "epoch": 0.4867428423528695, "grad_norm": 0.13662007451057434, "learning_rate": 0.0005, "loss": 2.1132, "step": 127880 }, { "epoch": 0.48678090482099223, "grad_norm": 0.12781007587909698, "learning_rate": 0.0005, "loss": 2.105, "step": 127890 }, { "epoch": 0.4868189672891149, "grad_norm": 0.14764262735843658, "learning_rate": 0.0005, "loss": 2.1249, "step": 127900 }, { "epoch": 0.4868570297572376, "grad_norm": 0.11196591705083847, "learning_rate": 0.0005, "loss": 2.1014, "step": 127910 }, { "epoch": 0.48689509222536026, "grad_norm": 0.11736451089382172, "learning_rate": 0.0005, "loss": 2.1055, "step": 127920 }, { "epoch": 0.4869331546934829, "grad_norm": 0.1281765252351761, "learning_rate": 0.0005, "loss": 2.1252, "step": 127930 }, { "epoch": 0.48697121716160563, "grad_norm": 0.12472829222679138, "learning_rate": 0.0005, "loss": 2.1193, "step": 127940 }, { "epoch": 0.4870092796297283, "grad_norm": 0.12662485241889954, "learning_rate": 0.0005, "loss": 2.1036, "step": 127950 }, { "epoch": 0.487047342097851, "grad_norm": 0.1277189701795578, "learning_rate": 0.0005, "loss": 2.1277, "step": 127960 }, { "epoch": 0.48708540456597366, "grad_norm": 0.14531460404396057, "learning_rate": 0.0005, "loss": 2.1161, "step": 127970 }, { "epoch": 0.48712346703409637, "grad_norm": 0.15048803389072418, "learning_rate": 0.0005, "loss": 2.1174, "step": 127980 }, { "epoch": 0.487161529502219, "grad_norm": 0.12176571786403656, "learning_rate": 0.0005, "loss": 2.1228, "step": 127990 }, { "epoch": 0.48719959197034174, "grad_norm": 0.12032541632652283, "learning_rate": 0.0005, "loss": 2.1234, "step": 128000 }, { "epoch": 0.4872376544384644, "grad_norm": 0.1318209022283554, "learning_rate": 0.0005, "loss": 2.1299, "step": 128010 }, { "epoch": 0.4872757169065871, "grad_norm": 0.13124960660934448, "learning_rate": 0.0005, "loss": 2.1309, "step": 128020 }, { "epoch": 0.48731377937470977, "grad_norm": 0.12284164130687714, "learning_rate": 0.0005, "loss": 2.0958, "step": 128030 }, { "epoch": 0.4873518418428325, "grad_norm": 0.12062957137823105, "learning_rate": 0.0005, "loss": 2.1136, "step": 128040 }, { "epoch": 0.48738990431095514, "grad_norm": 0.1256282776594162, "learning_rate": 0.0005, "loss": 2.115, "step": 128050 }, { "epoch": 0.48742796677907785, "grad_norm": 0.12683241069316864, "learning_rate": 0.0005, "loss": 2.1256, "step": 128060 }, { "epoch": 0.4874660292472005, "grad_norm": 0.11736001819372177, "learning_rate": 0.0005, "loss": 2.1087, "step": 128070 }, { "epoch": 0.48750409171532316, "grad_norm": 0.11980387568473816, "learning_rate": 0.0005, "loss": 2.1232, "step": 128080 }, { "epoch": 0.4875421541834459, "grad_norm": 0.13046181201934814, "learning_rate": 0.0005, "loss": 2.1146, "step": 128090 }, { "epoch": 0.48758021665156853, "grad_norm": 0.1274123191833496, "learning_rate": 0.0005, "loss": 2.1122, "step": 128100 }, { "epoch": 0.48761827911969124, "grad_norm": 0.11976911127567291, "learning_rate": 0.0005, "loss": 2.1274, "step": 128110 }, { "epoch": 0.4876563415878139, "grad_norm": 0.12375515699386597, "learning_rate": 0.0005, "loss": 2.121, "step": 128120 }, { "epoch": 0.4876944040559366, "grad_norm": 0.12907162308692932, "learning_rate": 0.0005, "loss": 2.1052, "step": 128130 }, { "epoch": 0.48773246652405927, "grad_norm": 0.13195452094078064, "learning_rate": 0.0005, "loss": 2.1216, "step": 128140 }, { "epoch": 0.487770528992182, "grad_norm": 0.13561616837978363, "learning_rate": 0.0005, "loss": 2.1111, "step": 128150 }, { "epoch": 0.48780859146030464, "grad_norm": 0.12271276116371155, "learning_rate": 0.0005, "loss": 2.1214, "step": 128160 }, { "epoch": 0.48784665392842735, "grad_norm": 0.13250857591629028, "learning_rate": 0.0005, "loss": 2.1071, "step": 128170 }, { "epoch": 0.48788471639655, "grad_norm": 0.1365223228931427, "learning_rate": 0.0005, "loss": 2.1152, "step": 128180 }, { "epoch": 0.4879227788646727, "grad_norm": 0.127644881606102, "learning_rate": 0.0005, "loss": 2.1077, "step": 128190 }, { "epoch": 0.4879608413327954, "grad_norm": 0.11915410310029984, "learning_rate": 0.0005, "loss": 2.1246, "step": 128200 }, { "epoch": 0.4879989038009181, "grad_norm": 0.12355806678533554, "learning_rate": 0.0005, "loss": 2.1158, "step": 128210 }, { "epoch": 0.48803696626904075, "grad_norm": 0.15161676704883575, "learning_rate": 0.0005, "loss": 2.1131, "step": 128220 }, { "epoch": 0.4880750287371634, "grad_norm": 0.12143923342227936, "learning_rate": 0.0005, "loss": 2.11, "step": 128230 }, { "epoch": 0.4881130912052861, "grad_norm": 0.13301809132099152, "learning_rate": 0.0005, "loss": 2.112, "step": 128240 }, { "epoch": 0.4881511536734088, "grad_norm": 0.12631773948669434, "learning_rate": 0.0005, "loss": 2.1248, "step": 128250 }, { "epoch": 0.4881892161415315, "grad_norm": 0.1465943306684494, "learning_rate": 0.0005, "loss": 2.1139, "step": 128260 }, { "epoch": 0.48822727860965415, "grad_norm": 0.12848037481307983, "learning_rate": 0.0005, "loss": 2.1166, "step": 128270 }, { "epoch": 0.48826534107777686, "grad_norm": 0.11996082216501236, "learning_rate": 0.0005, "loss": 2.1147, "step": 128280 }, { "epoch": 0.4883034035458995, "grad_norm": 0.1305745393037796, "learning_rate": 0.0005, "loss": 2.1007, "step": 128290 }, { "epoch": 0.48834146601402223, "grad_norm": 0.12072660028934479, "learning_rate": 0.0005, "loss": 2.1219, "step": 128300 }, { "epoch": 0.4883795284821449, "grad_norm": 0.11993353813886642, "learning_rate": 0.0005, "loss": 2.1048, "step": 128310 }, { "epoch": 0.4884175909502676, "grad_norm": 0.12914374470710754, "learning_rate": 0.0005, "loss": 2.107, "step": 128320 }, { "epoch": 0.48845565341839026, "grad_norm": 0.12362520396709442, "learning_rate": 0.0005, "loss": 2.1332, "step": 128330 }, { "epoch": 0.48849371588651297, "grad_norm": 0.12493337690830231, "learning_rate": 0.0005, "loss": 2.115, "step": 128340 }, { "epoch": 0.4885317783546356, "grad_norm": 0.12441351264715195, "learning_rate": 0.0005, "loss": 2.1034, "step": 128350 }, { "epoch": 0.4885698408227583, "grad_norm": 0.12130783498287201, "learning_rate": 0.0005, "loss": 2.1162, "step": 128360 }, { "epoch": 0.488607903290881, "grad_norm": 0.11701652407646179, "learning_rate": 0.0005, "loss": 2.1199, "step": 128370 }, { "epoch": 0.48864596575900365, "grad_norm": 0.11885587126016617, "learning_rate": 0.0005, "loss": 2.107, "step": 128380 }, { "epoch": 0.48868402822712637, "grad_norm": 0.14552637934684753, "learning_rate": 0.0005, "loss": 2.1037, "step": 128390 }, { "epoch": 0.488722090695249, "grad_norm": 0.12319986522197723, "learning_rate": 0.0005, "loss": 2.0929, "step": 128400 }, { "epoch": 0.48876015316337174, "grad_norm": 0.12676241993904114, "learning_rate": 0.0005, "loss": 2.1202, "step": 128410 }, { "epoch": 0.4887982156314944, "grad_norm": 0.12602174282073975, "learning_rate": 0.0005, "loss": 2.1076, "step": 128420 }, { "epoch": 0.4888362780996171, "grad_norm": 0.12732820212841034, "learning_rate": 0.0005, "loss": 2.1118, "step": 128430 }, { "epoch": 0.48887434056773976, "grad_norm": 0.13058944046497345, "learning_rate": 0.0005, "loss": 2.1108, "step": 128440 }, { "epoch": 0.4889124030358625, "grad_norm": 0.12957490980625153, "learning_rate": 0.0005, "loss": 2.101, "step": 128450 }, { "epoch": 0.48895046550398513, "grad_norm": 0.1237308606505394, "learning_rate": 0.0005, "loss": 2.1149, "step": 128460 }, { "epoch": 0.48898852797210784, "grad_norm": 0.13554799556732178, "learning_rate": 0.0005, "loss": 2.1225, "step": 128470 }, { "epoch": 0.4890265904402305, "grad_norm": 0.11221189796924591, "learning_rate": 0.0005, "loss": 2.1156, "step": 128480 }, { "epoch": 0.4890646529083532, "grad_norm": 0.1298692524433136, "learning_rate": 0.0005, "loss": 2.1262, "step": 128490 }, { "epoch": 0.48910271537647587, "grad_norm": 0.12430565059185028, "learning_rate": 0.0005, "loss": 2.1037, "step": 128500 }, { "epoch": 0.48914077784459853, "grad_norm": 0.15557394921779633, "learning_rate": 0.0005, "loss": 2.1014, "step": 128510 }, { "epoch": 0.48917884031272124, "grad_norm": 0.11919406801462173, "learning_rate": 0.0005, "loss": 2.1122, "step": 128520 }, { "epoch": 0.4892169027808439, "grad_norm": 0.1355024129152298, "learning_rate": 0.0005, "loss": 2.1216, "step": 128530 }, { "epoch": 0.4892549652489666, "grad_norm": 0.11714503169059753, "learning_rate": 0.0005, "loss": 2.1087, "step": 128540 }, { "epoch": 0.48929302771708927, "grad_norm": 0.13108661770820618, "learning_rate": 0.0005, "loss": 2.1171, "step": 128550 }, { "epoch": 0.489331090185212, "grad_norm": 0.13541683554649353, "learning_rate": 0.0005, "loss": 2.1256, "step": 128560 }, { "epoch": 0.48936915265333464, "grad_norm": 0.12678135931491852, "learning_rate": 0.0005, "loss": 2.1253, "step": 128570 }, { "epoch": 0.48940721512145735, "grad_norm": 0.1365925818681717, "learning_rate": 0.0005, "loss": 2.128, "step": 128580 }, { "epoch": 0.48944527758958, "grad_norm": 0.12762218713760376, "learning_rate": 0.0005, "loss": 2.106, "step": 128590 }, { "epoch": 0.4894833400577027, "grad_norm": 0.1352071613073349, "learning_rate": 0.0005, "loss": 2.1136, "step": 128600 }, { "epoch": 0.4895214025258254, "grad_norm": 0.12248429656028748, "learning_rate": 0.0005, "loss": 2.1096, "step": 128610 }, { "epoch": 0.4895594649939481, "grad_norm": 0.12541040778160095, "learning_rate": 0.0005, "loss": 2.1097, "step": 128620 }, { "epoch": 0.48959752746207075, "grad_norm": 0.12017042189836502, "learning_rate": 0.0005, "loss": 2.1061, "step": 128630 }, { "epoch": 0.48963558993019346, "grad_norm": 0.11116807907819748, "learning_rate": 0.0005, "loss": 2.1151, "step": 128640 }, { "epoch": 0.4896736523983161, "grad_norm": 0.13902676105499268, "learning_rate": 0.0005, "loss": 2.1133, "step": 128650 }, { "epoch": 0.4897117148664388, "grad_norm": 0.12121377885341644, "learning_rate": 0.0005, "loss": 2.0942, "step": 128660 }, { "epoch": 0.4897497773345615, "grad_norm": 0.13029231131076813, "learning_rate": 0.0005, "loss": 2.1206, "step": 128670 }, { "epoch": 0.48978783980268414, "grad_norm": 0.13663001358509064, "learning_rate": 0.0005, "loss": 2.1201, "step": 128680 }, { "epoch": 0.48982590227080686, "grad_norm": 0.13909859955310822, "learning_rate": 0.0005, "loss": 2.1297, "step": 128690 }, { "epoch": 0.4898639647389295, "grad_norm": 0.12316319346427917, "learning_rate": 0.0005, "loss": 2.114, "step": 128700 }, { "epoch": 0.4899020272070522, "grad_norm": 0.13498586416244507, "learning_rate": 0.0005, "loss": 2.1028, "step": 128710 }, { "epoch": 0.4899400896751749, "grad_norm": 0.1561654806137085, "learning_rate": 0.0005, "loss": 2.1221, "step": 128720 }, { "epoch": 0.4899781521432976, "grad_norm": 0.12787999212741852, "learning_rate": 0.0005, "loss": 2.0921, "step": 128730 }, { "epoch": 0.49001621461142025, "grad_norm": 0.13014093041419983, "learning_rate": 0.0005, "loss": 2.1079, "step": 128740 }, { "epoch": 0.49005427707954297, "grad_norm": 0.11951977759599686, "learning_rate": 0.0005, "loss": 2.1202, "step": 128750 }, { "epoch": 0.4900923395476656, "grad_norm": 0.11888326704502106, "learning_rate": 0.0005, "loss": 2.1011, "step": 128760 }, { "epoch": 0.49013040201578834, "grad_norm": 0.12961071729660034, "learning_rate": 0.0005, "loss": 2.1033, "step": 128770 }, { "epoch": 0.490168464483911, "grad_norm": 0.13857637345790863, "learning_rate": 0.0005, "loss": 2.1341, "step": 128780 }, { "epoch": 0.4902065269520337, "grad_norm": 0.1179923564195633, "learning_rate": 0.0005, "loss": 2.1192, "step": 128790 }, { "epoch": 0.49024458942015636, "grad_norm": 0.12373822182416916, "learning_rate": 0.0005, "loss": 2.0984, "step": 128800 }, { "epoch": 0.490282651888279, "grad_norm": 0.12144889682531357, "learning_rate": 0.0005, "loss": 2.1112, "step": 128810 }, { "epoch": 0.49032071435640173, "grad_norm": 0.11663493514060974, "learning_rate": 0.0005, "loss": 2.1057, "step": 128820 }, { "epoch": 0.4903587768245244, "grad_norm": 0.13361524045467377, "learning_rate": 0.0005, "loss": 2.1196, "step": 128830 }, { "epoch": 0.4903968392926471, "grad_norm": 0.15043866634368896, "learning_rate": 0.0005, "loss": 2.1303, "step": 128840 }, { "epoch": 0.49043490176076976, "grad_norm": 0.12512058019638062, "learning_rate": 0.0005, "loss": 2.1411, "step": 128850 }, { "epoch": 0.49047296422889247, "grad_norm": 0.12376557290554047, "learning_rate": 0.0005, "loss": 2.1178, "step": 128860 }, { "epoch": 0.49051102669701513, "grad_norm": 0.14460743963718414, "learning_rate": 0.0005, "loss": 2.1266, "step": 128870 }, { "epoch": 0.49054908916513784, "grad_norm": 0.11526907235383987, "learning_rate": 0.0005, "loss": 2.122, "step": 128880 }, { "epoch": 0.4905871516332605, "grad_norm": 0.1400173157453537, "learning_rate": 0.0005, "loss": 2.1041, "step": 128890 }, { "epoch": 0.4906252141013832, "grad_norm": 0.12534202635288239, "learning_rate": 0.0005, "loss": 2.1131, "step": 128900 }, { "epoch": 0.49066327656950587, "grad_norm": 0.12229157984256744, "learning_rate": 0.0005, "loss": 2.1276, "step": 128910 }, { "epoch": 0.4907013390376286, "grad_norm": 0.12299957126379013, "learning_rate": 0.0005, "loss": 2.1208, "step": 128920 }, { "epoch": 0.49073940150575124, "grad_norm": 0.12647582590579987, "learning_rate": 0.0005, "loss": 2.1285, "step": 128930 }, { "epoch": 0.4907774639738739, "grad_norm": 0.1152181401848793, "learning_rate": 0.0005, "loss": 2.1205, "step": 128940 }, { "epoch": 0.4908155264419966, "grad_norm": 0.12402541190385818, "learning_rate": 0.0005, "loss": 2.1222, "step": 128950 }, { "epoch": 0.49085358891011927, "grad_norm": 0.11969681084156036, "learning_rate": 0.0005, "loss": 2.1055, "step": 128960 }, { "epoch": 0.490891651378242, "grad_norm": 0.12540394067764282, "learning_rate": 0.0005, "loss": 2.1258, "step": 128970 }, { "epoch": 0.49092971384636463, "grad_norm": 0.12889902293682098, "learning_rate": 0.0005, "loss": 2.1242, "step": 128980 }, { "epoch": 0.49096777631448735, "grad_norm": 0.13155750930309296, "learning_rate": 0.0005, "loss": 2.1256, "step": 128990 }, { "epoch": 0.49100583878261, "grad_norm": 0.130380779504776, "learning_rate": 0.0005, "loss": 2.12, "step": 129000 }, { "epoch": 0.4910439012507327, "grad_norm": 0.12233448028564453, "learning_rate": 0.0005, "loss": 2.113, "step": 129010 }, { "epoch": 0.4910819637188554, "grad_norm": 0.13260172307491302, "learning_rate": 0.0005, "loss": 2.1116, "step": 129020 }, { "epoch": 0.4911200261869781, "grad_norm": 0.13506576418876648, "learning_rate": 0.0005, "loss": 2.1149, "step": 129030 }, { "epoch": 0.49115808865510074, "grad_norm": 0.12151715159416199, "learning_rate": 0.0005, "loss": 2.1062, "step": 129040 }, { "epoch": 0.49119615112322346, "grad_norm": 0.13031908869743347, "learning_rate": 0.0005, "loss": 2.1086, "step": 129050 }, { "epoch": 0.4912342135913461, "grad_norm": 0.14683033525943756, "learning_rate": 0.0005, "loss": 2.109, "step": 129060 }, { "epoch": 0.4912722760594688, "grad_norm": 0.1929064840078354, "learning_rate": 0.0005, "loss": 2.1215, "step": 129070 }, { "epoch": 0.4913103385275915, "grad_norm": 0.14430083334445953, "learning_rate": 0.0005, "loss": 2.1004, "step": 129080 }, { "epoch": 0.49134840099571414, "grad_norm": 0.12323372066020966, "learning_rate": 0.0005, "loss": 2.1039, "step": 129090 }, { "epoch": 0.49138646346383685, "grad_norm": 0.13349588215351105, "learning_rate": 0.0005, "loss": 2.1204, "step": 129100 }, { "epoch": 0.4914245259319595, "grad_norm": 0.12230733036994934, "learning_rate": 0.0005, "loss": 2.1101, "step": 129110 }, { "epoch": 0.4914625884000822, "grad_norm": 0.13006240129470825, "learning_rate": 0.0005, "loss": 2.1118, "step": 129120 }, { "epoch": 0.4915006508682049, "grad_norm": 0.1338682621717453, "learning_rate": 0.0005, "loss": 2.1092, "step": 129130 }, { "epoch": 0.4915387133363276, "grad_norm": 0.1181456446647644, "learning_rate": 0.0005, "loss": 2.1329, "step": 129140 }, { "epoch": 0.49157677580445025, "grad_norm": 0.12192094326019287, "learning_rate": 0.0005, "loss": 2.123, "step": 129150 }, { "epoch": 0.49161483827257296, "grad_norm": 0.12347403168678284, "learning_rate": 0.0005, "loss": 2.1132, "step": 129160 }, { "epoch": 0.4916529007406956, "grad_norm": 0.11497198045253754, "learning_rate": 0.0005, "loss": 2.1138, "step": 129170 }, { "epoch": 0.49169096320881833, "grad_norm": 0.12021651864051819, "learning_rate": 0.0005, "loss": 2.1046, "step": 129180 }, { "epoch": 0.491729025676941, "grad_norm": 0.1412685215473175, "learning_rate": 0.0005, "loss": 2.1189, "step": 129190 }, { "epoch": 0.4917670881450637, "grad_norm": 0.12241291999816895, "learning_rate": 0.0005, "loss": 2.1224, "step": 129200 }, { "epoch": 0.49180515061318636, "grad_norm": 0.12812742590904236, "learning_rate": 0.0005, "loss": 2.1263, "step": 129210 }, { "epoch": 0.49184321308130907, "grad_norm": 0.12787936627864838, "learning_rate": 0.0005, "loss": 2.1094, "step": 129220 }, { "epoch": 0.49188127554943173, "grad_norm": 0.12003834545612335, "learning_rate": 0.0005, "loss": 2.1298, "step": 129230 }, { "epoch": 0.4919193380175544, "grad_norm": 0.13243554532527924, "learning_rate": 0.0005, "loss": 2.1154, "step": 129240 }, { "epoch": 0.4919574004856771, "grad_norm": 0.13243474066257477, "learning_rate": 0.0005, "loss": 2.1143, "step": 129250 }, { "epoch": 0.49199546295379976, "grad_norm": 0.12219058722257614, "learning_rate": 0.0005, "loss": 2.1134, "step": 129260 }, { "epoch": 0.49203352542192247, "grad_norm": 0.11902420222759247, "learning_rate": 0.0005, "loss": 2.1166, "step": 129270 }, { "epoch": 0.4920715878900451, "grad_norm": 0.12722794711589813, "learning_rate": 0.0005, "loss": 2.1208, "step": 129280 }, { "epoch": 0.49210965035816784, "grad_norm": 0.18392205238342285, "learning_rate": 0.0005, "loss": 2.1206, "step": 129290 }, { "epoch": 0.4921477128262905, "grad_norm": 0.11841941624879837, "learning_rate": 0.0005, "loss": 2.1192, "step": 129300 }, { "epoch": 0.4921857752944132, "grad_norm": 0.11622878164052963, "learning_rate": 0.0005, "loss": 2.1141, "step": 129310 }, { "epoch": 0.49222383776253587, "grad_norm": 0.1269485205411911, "learning_rate": 0.0005, "loss": 2.1127, "step": 129320 }, { "epoch": 0.4922619002306586, "grad_norm": 0.11990734934806824, "learning_rate": 0.0005, "loss": 2.0991, "step": 129330 }, { "epoch": 0.49229996269878123, "grad_norm": 0.12024495750665665, "learning_rate": 0.0005, "loss": 2.1374, "step": 129340 }, { "epoch": 0.49233802516690395, "grad_norm": 0.1395251601934433, "learning_rate": 0.0005, "loss": 2.1286, "step": 129350 }, { "epoch": 0.4923760876350266, "grad_norm": 0.1414317935705185, "learning_rate": 0.0005, "loss": 2.1171, "step": 129360 }, { "epoch": 0.49241415010314926, "grad_norm": 0.13144022226333618, "learning_rate": 0.0005, "loss": 2.1117, "step": 129370 }, { "epoch": 0.492452212571272, "grad_norm": 0.1271926462650299, "learning_rate": 0.0005, "loss": 2.1147, "step": 129380 }, { "epoch": 0.49249027503939463, "grad_norm": 0.13372786343097687, "learning_rate": 0.0005, "loss": 2.0996, "step": 129390 }, { "epoch": 0.49252833750751734, "grad_norm": 0.12830625474452972, "learning_rate": 0.0005, "loss": 2.1093, "step": 129400 }, { "epoch": 0.49256639997564, "grad_norm": 0.13309486210346222, "learning_rate": 0.0005, "loss": 2.1167, "step": 129410 }, { "epoch": 0.4926044624437627, "grad_norm": 0.12286410480737686, "learning_rate": 0.0005, "loss": 2.121, "step": 129420 }, { "epoch": 0.49264252491188537, "grad_norm": 0.12522216141223907, "learning_rate": 0.0005, "loss": 2.1298, "step": 129430 }, { "epoch": 0.4926805873800081, "grad_norm": 0.14131148159503937, "learning_rate": 0.0005, "loss": 2.1059, "step": 129440 }, { "epoch": 0.49271864984813074, "grad_norm": 0.12129399180412292, "learning_rate": 0.0005, "loss": 2.1211, "step": 129450 }, { "epoch": 0.49275671231625345, "grad_norm": 0.12903055548667908, "learning_rate": 0.0005, "loss": 2.1121, "step": 129460 }, { "epoch": 0.4927947747843761, "grad_norm": 0.1280837506055832, "learning_rate": 0.0005, "loss": 2.1211, "step": 129470 }, { "epoch": 0.4928328372524988, "grad_norm": 0.12391971051692963, "learning_rate": 0.0005, "loss": 2.1175, "step": 129480 }, { "epoch": 0.4928708997206215, "grad_norm": 0.12764014303684235, "learning_rate": 0.0005, "loss": 2.1349, "step": 129490 }, { "epoch": 0.4929089621887442, "grad_norm": 0.12799176573753357, "learning_rate": 0.0005, "loss": 2.1034, "step": 129500 }, { "epoch": 0.49294702465686685, "grad_norm": 0.12443231046199799, "learning_rate": 0.0005, "loss": 2.106, "step": 129510 }, { "epoch": 0.4929850871249895, "grad_norm": 0.12480633705854416, "learning_rate": 0.0005, "loss": 2.1231, "step": 129520 }, { "epoch": 0.4930231495931122, "grad_norm": 0.12381468713283539, "learning_rate": 0.0005, "loss": 2.121, "step": 129530 }, { "epoch": 0.4930612120612349, "grad_norm": 0.129247784614563, "learning_rate": 0.0005, "loss": 2.1048, "step": 129540 }, { "epoch": 0.4930992745293576, "grad_norm": 0.12614686787128448, "learning_rate": 0.0005, "loss": 2.1208, "step": 129550 }, { "epoch": 0.49313733699748025, "grad_norm": 0.12660275399684906, "learning_rate": 0.0005, "loss": 2.1293, "step": 129560 }, { "epoch": 0.49317539946560296, "grad_norm": 0.1292925328016281, "learning_rate": 0.0005, "loss": 2.1268, "step": 129570 }, { "epoch": 0.4932134619337256, "grad_norm": 0.12577460706233978, "learning_rate": 0.0005, "loss": 2.112, "step": 129580 }, { "epoch": 0.49325152440184833, "grad_norm": 0.12658214569091797, "learning_rate": 0.0005, "loss": 2.1223, "step": 129590 }, { "epoch": 0.493289586869971, "grad_norm": 0.1259559541940689, "learning_rate": 0.0005, "loss": 2.1199, "step": 129600 }, { "epoch": 0.4933276493380937, "grad_norm": 0.12474989891052246, "learning_rate": 0.0005, "loss": 2.1131, "step": 129610 }, { "epoch": 0.49336571180621636, "grad_norm": 0.11340212821960449, "learning_rate": 0.0005, "loss": 2.1115, "step": 129620 }, { "epoch": 0.49340377427433907, "grad_norm": 0.1202770248055458, "learning_rate": 0.0005, "loss": 2.1176, "step": 129630 }, { "epoch": 0.4934418367424617, "grad_norm": 0.13771983981132507, "learning_rate": 0.0005, "loss": 2.1288, "step": 129640 }, { "epoch": 0.49347989921058444, "grad_norm": 0.1280750185251236, "learning_rate": 0.0005, "loss": 2.1252, "step": 129650 }, { "epoch": 0.4935179616787071, "grad_norm": 0.12560606002807617, "learning_rate": 0.0005, "loss": 2.1223, "step": 129660 }, { "epoch": 0.49355602414682975, "grad_norm": 0.1361568570137024, "learning_rate": 0.0005, "loss": 2.1214, "step": 129670 }, { "epoch": 0.49359408661495247, "grad_norm": 0.13186447322368622, "learning_rate": 0.0005, "loss": 2.1162, "step": 129680 }, { "epoch": 0.4936321490830751, "grad_norm": 0.12305932492017746, "learning_rate": 0.0005, "loss": 2.1174, "step": 129690 }, { "epoch": 0.49367021155119784, "grad_norm": 0.1293884664773941, "learning_rate": 0.0005, "loss": 2.1151, "step": 129700 }, { "epoch": 0.4937082740193205, "grad_norm": 0.12605488300323486, "learning_rate": 0.0005, "loss": 2.1157, "step": 129710 }, { "epoch": 0.4937463364874432, "grad_norm": 0.13056647777557373, "learning_rate": 0.0005, "loss": 2.1146, "step": 129720 }, { "epoch": 0.49378439895556586, "grad_norm": 0.1266745626926422, "learning_rate": 0.0005, "loss": 2.1187, "step": 129730 }, { "epoch": 0.4938224614236886, "grad_norm": 0.1208287924528122, "learning_rate": 0.0005, "loss": 2.0923, "step": 129740 }, { "epoch": 0.49386052389181123, "grad_norm": 0.11373704671859741, "learning_rate": 0.0005, "loss": 2.1211, "step": 129750 }, { "epoch": 0.49389858635993394, "grad_norm": 0.14107324182987213, "learning_rate": 0.0005, "loss": 2.1063, "step": 129760 }, { "epoch": 0.4939366488280566, "grad_norm": 0.13310225307941437, "learning_rate": 0.0005, "loss": 2.1196, "step": 129770 }, { "epoch": 0.4939747112961793, "grad_norm": 0.12170784175395966, "learning_rate": 0.0005, "loss": 2.1087, "step": 129780 }, { "epoch": 0.49401277376430197, "grad_norm": 0.13277970254421234, "learning_rate": 0.0005, "loss": 2.1133, "step": 129790 }, { "epoch": 0.49405083623242463, "grad_norm": 0.12409801036119461, "learning_rate": 0.0005, "loss": 2.1207, "step": 129800 }, { "epoch": 0.49408889870054734, "grad_norm": 0.13743042945861816, "learning_rate": 0.0005, "loss": 2.1168, "step": 129810 }, { "epoch": 0.49412696116867, "grad_norm": 0.16073283553123474, "learning_rate": 0.0005, "loss": 2.1285, "step": 129820 }, { "epoch": 0.4941650236367927, "grad_norm": 0.12472742050886154, "learning_rate": 0.0005, "loss": 2.1157, "step": 129830 }, { "epoch": 0.49420308610491537, "grad_norm": 0.13261789083480835, "learning_rate": 0.0005, "loss": 2.1204, "step": 129840 }, { "epoch": 0.4942411485730381, "grad_norm": 0.11896239966154099, "learning_rate": 0.0005, "loss": 2.1085, "step": 129850 }, { "epoch": 0.49427921104116074, "grad_norm": 0.13525590300559998, "learning_rate": 0.0005, "loss": 2.1048, "step": 129860 }, { "epoch": 0.49431727350928345, "grad_norm": 0.11765822023153305, "learning_rate": 0.0005, "loss": 2.1046, "step": 129870 }, { "epoch": 0.4943553359774061, "grad_norm": 0.12600429356098175, "learning_rate": 0.0005, "loss": 2.1275, "step": 129880 }, { "epoch": 0.4943933984455288, "grad_norm": 0.14068011939525604, "learning_rate": 0.0005, "loss": 2.1143, "step": 129890 }, { "epoch": 0.4944314609136515, "grad_norm": 0.12498706579208374, "learning_rate": 0.0005, "loss": 2.1142, "step": 129900 }, { "epoch": 0.4944695233817742, "grad_norm": 0.1201263815164566, "learning_rate": 0.0005, "loss": 2.0956, "step": 129910 }, { "epoch": 0.49450758584989685, "grad_norm": 0.13016465306282043, "learning_rate": 0.0005, "loss": 2.1147, "step": 129920 }, { "epoch": 0.49454564831801956, "grad_norm": 0.11403704434633255, "learning_rate": 0.0005, "loss": 2.1149, "step": 129930 }, { "epoch": 0.4945837107861422, "grad_norm": 0.12226550281047821, "learning_rate": 0.0005, "loss": 2.1236, "step": 129940 }, { "epoch": 0.4946217732542649, "grad_norm": 0.11579559743404388, "learning_rate": 0.0005, "loss": 2.1144, "step": 129950 }, { "epoch": 0.4946598357223876, "grad_norm": 0.12176090478897095, "learning_rate": 0.0005, "loss": 2.1131, "step": 129960 }, { "epoch": 0.49469789819051024, "grad_norm": 0.11621265858411789, "learning_rate": 0.0005, "loss": 2.1376, "step": 129970 }, { "epoch": 0.49473596065863296, "grad_norm": 0.12214352935552597, "learning_rate": 0.0005, "loss": 2.1224, "step": 129980 }, { "epoch": 0.4947740231267556, "grad_norm": 0.12496017664670944, "learning_rate": 0.0005, "loss": 2.117, "step": 129990 }, { "epoch": 0.4948120855948783, "grad_norm": 0.11287581920623779, "learning_rate": 0.0005, "loss": 2.1137, "step": 130000 }, { "epoch": 0.494850148063001, "grad_norm": 0.1303897351026535, "learning_rate": 0.0005, "loss": 2.1158, "step": 130010 }, { "epoch": 0.4948882105311237, "grad_norm": 0.12756425142288208, "learning_rate": 0.0005, "loss": 2.1354, "step": 130020 }, { "epoch": 0.49492627299924635, "grad_norm": 0.14151465892791748, "learning_rate": 0.0005, "loss": 2.1053, "step": 130030 }, { "epoch": 0.49496433546736907, "grad_norm": 0.1192479282617569, "learning_rate": 0.0005, "loss": 2.1102, "step": 130040 }, { "epoch": 0.4950023979354917, "grad_norm": 0.11875342577695847, "learning_rate": 0.0005, "loss": 2.1147, "step": 130050 }, { "epoch": 0.49504046040361444, "grad_norm": 0.11185558885335922, "learning_rate": 0.0005, "loss": 2.1273, "step": 130060 }, { "epoch": 0.4950785228717371, "grad_norm": 0.12418881058692932, "learning_rate": 0.0005, "loss": 2.1238, "step": 130070 }, { "epoch": 0.4951165853398598, "grad_norm": 0.12762118875980377, "learning_rate": 0.0005, "loss": 2.1118, "step": 130080 }, { "epoch": 0.49515464780798246, "grad_norm": 0.11265776306390762, "learning_rate": 0.0005, "loss": 2.119, "step": 130090 }, { "epoch": 0.4951927102761051, "grad_norm": 0.13423916697502136, "learning_rate": 0.0005, "loss": 2.1061, "step": 130100 }, { "epoch": 0.49523077274422783, "grad_norm": 0.13270613551139832, "learning_rate": 0.0005, "loss": 2.0982, "step": 130110 }, { "epoch": 0.4952688352123505, "grad_norm": 0.12004047632217407, "learning_rate": 0.0005, "loss": 2.0986, "step": 130120 }, { "epoch": 0.4953068976804732, "grad_norm": 0.11681164801120758, "learning_rate": 0.0005, "loss": 2.1133, "step": 130130 }, { "epoch": 0.49534496014859586, "grad_norm": 0.12493449449539185, "learning_rate": 0.0005, "loss": 2.0962, "step": 130140 }, { "epoch": 0.49538302261671857, "grad_norm": 0.11177048832178116, "learning_rate": 0.0005, "loss": 2.1114, "step": 130150 }, { "epoch": 0.49542108508484123, "grad_norm": 0.12243694812059402, "learning_rate": 0.0005, "loss": 2.1226, "step": 130160 }, { "epoch": 0.49545914755296394, "grad_norm": 0.12640246748924255, "learning_rate": 0.0005, "loss": 2.1081, "step": 130170 }, { "epoch": 0.4954972100210866, "grad_norm": 0.12343862652778625, "learning_rate": 0.0005, "loss": 2.1048, "step": 130180 }, { "epoch": 0.4955352724892093, "grad_norm": 0.11960109323263168, "learning_rate": 0.0005, "loss": 2.1127, "step": 130190 }, { "epoch": 0.49557333495733197, "grad_norm": 0.1165766566991806, "learning_rate": 0.0005, "loss": 2.1211, "step": 130200 }, { "epoch": 0.4956113974254547, "grad_norm": 0.1352618932723999, "learning_rate": 0.0005, "loss": 2.1235, "step": 130210 }, { "epoch": 0.49564945989357734, "grad_norm": 0.13529905676841736, "learning_rate": 0.0005, "loss": 2.1133, "step": 130220 }, { "epoch": 0.4956875223617, "grad_norm": 0.11719474196434021, "learning_rate": 0.0005, "loss": 2.1149, "step": 130230 }, { "epoch": 0.4957255848298227, "grad_norm": 0.13491681218147278, "learning_rate": 0.0005, "loss": 2.1064, "step": 130240 }, { "epoch": 0.49576364729794536, "grad_norm": 0.1283651441335678, "learning_rate": 0.0005, "loss": 2.1193, "step": 130250 }, { "epoch": 0.4958017097660681, "grad_norm": 0.1324135810136795, "learning_rate": 0.0005, "loss": 2.1197, "step": 130260 }, { "epoch": 0.49583977223419073, "grad_norm": 0.11992873251438141, "learning_rate": 0.0005, "loss": 2.1039, "step": 130270 }, { "epoch": 0.49587783470231345, "grad_norm": 0.11500484496355057, "learning_rate": 0.0005, "loss": 2.099, "step": 130280 }, { "epoch": 0.4959158971704361, "grad_norm": 0.11487402766942978, "learning_rate": 0.0005, "loss": 2.1073, "step": 130290 }, { "epoch": 0.4959539596385588, "grad_norm": 0.12722621858119965, "learning_rate": 0.0005, "loss": 2.1095, "step": 130300 }, { "epoch": 0.4959920221066815, "grad_norm": 0.14762382209300995, "learning_rate": 0.0005, "loss": 2.1007, "step": 130310 }, { "epoch": 0.4960300845748042, "grad_norm": 0.11726965755224228, "learning_rate": 0.0005, "loss": 2.1065, "step": 130320 }, { "epoch": 0.49606814704292684, "grad_norm": 0.13559289276599884, "learning_rate": 0.0005, "loss": 2.1166, "step": 130330 }, { "epoch": 0.49610620951104956, "grad_norm": 0.12326718121767044, "learning_rate": 0.0005, "loss": 2.1013, "step": 130340 }, { "epoch": 0.4961442719791722, "grad_norm": 0.1360858827829361, "learning_rate": 0.0005, "loss": 2.1059, "step": 130350 }, { "epoch": 0.4961823344472949, "grad_norm": 0.14227698743343353, "learning_rate": 0.0005, "loss": 2.1203, "step": 130360 }, { "epoch": 0.4962203969154176, "grad_norm": 0.12769527733325958, "learning_rate": 0.0005, "loss": 2.1099, "step": 130370 }, { "epoch": 0.49625845938354024, "grad_norm": 0.11520993709564209, "learning_rate": 0.0005, "loss": 2.1203, "step": 130380 }, { "epoch": 0.49629652185166295, "grad_norm": 0.12799958884716034, "learning_rate": 0.0005, "loss": 2.1136, "step": 130390 }, { "epoch": 0.4963345843197856, "grad_norm": 0.12282870709896088, "learning_rate": 0.0005, "loss": 2.1144, "step": 130400 }, { "epoch": 0.4963726467879083, "grad_norm": 0.12155033648014069, "learning_rate": 0.0005, "loss": 2.1002, "step": 130410 }, { "epoch": 0.496410709256031, "grad_norm": 0.12830521166324615, "learning_rate": 0.0005, "loss": 2.1075, "step": 130420 }, { "epoch": 0.4964487717241537, "grad_norm": 0.12929099798202515, "learning_rate": 0.0005, "loss": 2.1061, "step": 130430 }, { "epoch": 0.49648683419227635, "grad_norm": 0.12785860896110535, "learning_rate": 0.0005, "loss": 2.1173, "step": 130440 }, { "epoch": 0.49652489666039906, "grad_norm": 0.1235969290137291, "learning_rate": 0.0005, "loss": 2.1172, "step": 130450 }, { "epoch": 0.4965629591285217, "grad_norm": 0.12364578247070312, "learning_rate": 0.0005, "loss": 2.1174, "step": 130460 }, { "epoch": 0.49660102159664443, "grad_norm": 0.12840792536735535, "learning_rate": 0.0005, "loss": 2.1082, "step": 130470 }, { "epoch": 0.4966390840647671, "grad_norm": 0.1260167807340622, "learning_rate": 0.0005, "loss": 2.1115, "step": 130480 }, { "epoch": 0.4966771465328898, "grad_norm": 0.12089382857084274, "learning_rate": 0.0005, "loss": 2.1252, "step": 130490 }, { "epoch": 0.49671520900101246, "grad_norm": 0.1287691444158554, "learning_rate": 0.0005, "loss": 2.121, "step": 130500 }, { "epoch": 0.49675327146913517, "grad_norm": 0.12943902611732483, "learning_rate": 0.0005, "loss": 2.1298, "step": 130510 }, { "epoch": 0.49679133393725783, "grad_norm": 0.13859035074710846, "learning_rate": 0.0005, "loss": 2.1175, "step": 130520 }, { "epoch": 0.4968293964053805, "grad_norm": 0.12068472802639008, "learning_rate": 0.0005, "loss": 2.1246, "step": 130530 }, { "epoch": 0.4968674588735032, "grad_norm": 0.12954632937908173, "learning_rate": 0.0005, "loss": 2.1113, "step": 130540 }, { "epoch": 0.49690552134162586, "grad_norm": 0.13704584538936615, "learning_rate": 0.0005, "loss": 2.1176, "step": 130550 }, { "epoch": 0.49694358380974857, "grad_norm": 0.13277916610240936, "learning_rate": 0.0005, "loss": 2.1041, "step": 130560 }, { "epoch": 0.4969816462778712, "grad_norm": 0.12848743796348572, "learning_rate": 0.0005, "loss": 2.115, "step": 130570 }, { "epoch": 0.49701970874599394, "grad_norm": 0.12172497808933258, "learning_rate": 0.0005, "loss": 2.1202, "step": 130580 }, { "epoch": 0.4970577712141166, "grad_norm": 0.12590010464191437, "learning_rate": 0.0005, "loss": 2.115, "step": 130590 }, { "epoch": 0.4970958336822393, "grad_norm": 0.1207374557852745, "learning_rate": 0.0005, "loss": 2.1076, "step": 130600 }, { "epoch": 0.49713389615036196, "grad_norm": 0.12318812310695648, "learning_rate": 0.0005, "loss": 2.1095, "step": 130610 }, { "epoch": 0.4971719586184847, "grad_norm": 0.1470927745103836, "learning_rate": 0.0005, "loss": 2.1001, "step": 130620 }, { "epoch": 0.49721002108660733, "grad_norm": 0.13342763483524323, "learning_rate": 0.0005, "loss": 2.1255, "step": 130630 }, { "epoch": 0.49724808355473005, "grad_norm": 0.12590563297271729, "learning_rate": 0.0005, "loss": 2.125, "step": 130640 }, { "epoch": 0.4972861460228527, "grad_norm": 0.11755761504173279, "learning_rate": 0.0005, "loss": 2.1144, "step": 130650 }, { "epoch": 0.49732420849097536, "grad_norm": 0.12532316148281097, "learning_rate": 0.0005, "loss": 2.1219, "step": 130660 }, { "epoch": 0.4973622709590981, "grad_norm": 0.12779484689235687, "learning_rate": 0.0005, "loss": 2.1118, "step": 130670 }, { "epoch": 0.49740033342722073, "grad_norm": 0.11526070535182953, "learning_rate": 0.0005, "loss": 2.1091, "step": 130680 }, { "epoch": 0.49743839589534344, "grad_norm": 0.12624244391918182, "learning_rate": 0.0005, "loss": 2.1333, "step": 130690 }, { "epoch": 0.4974764583634661, "grad_norm": 0.12625885009765625, "learning_rate": 0.0005, "loss": 2.1135, "step": 130700 }, { "epoch": 0.4975145208315888, "grad_norm": 0.11956362426280975, "learning_rate": 0.0005, "loss": 2.123, "step": 130710 }, { "epoch": 0.49755258329971147, "grad_norm": 0.12251365184783936, "learning_rate": 0.0005, "loss": 2.104, "step": 130720 }, { "epoch": 0.4975906457678342, "grad_norm": 0.11382036656141281, "learning_rate": 0.0005, "loss": 2.1041, "step": 130730 }, { "epoch": 0.49762870823595684, "grad_norm": 0.11894240230321884, "learning_rate": 0.0005, "loss": 2.1068, "step": 130740 }, { "epoch": 0.49766677070407955, "grad_norm": 0.1151915043592453, "learning_rate": 0.0005, "loss": 2.1088, "step": 130750 }, { "epoch": 0.4977048331722022, "grad_norm": 0.11738212406635284, "learning_rate": 0.0005, "loss": 2.1087, "step": 130760 }, { "epoch": 0.4977428956403249, "grad_norm": 0.12464101612567902, "learning_rate": 0.0005, "loss": 2.1253, "step": 130770 }, { "epoch": 0.4977809581084476, "grad_norm": 0.12291174381971359, "learning_rate": 0.0005, "loss": 2.1073, "step": 130780 }, { "epoch": 0.4978190205765703, "grad_norm": 0.12099691480398178, "learning_rate": 0.0005, "loss": 2.1106, "step": 130790 }, { "epoch": 0.49785708304469295, "grad_norm": 0.11977177113294601, "learning_rate": 0.0005, "loss": 2.1392, "step": 130800 }, { "epoch": 0.4978951455128156, "grad_norm": 0.12582093477249146, "learning_rate": 0.0005, "loss": 2.1045, "step": 130810 }, { "epoch": 0.4979332079809383, "grad_norm": 0.14268380403518677, "learning_rate": 0.0005, "loss": 2.1189, "step": 130820 }, { "epoch": 0.497971270449061, "grad_norm": 0.11546272039413452, "learning_rate": 0.0005, "loss": 2.1105, "step": 130830 }, { "epoch": 0.4980093329171837, "grad_norm": 0.1425076276063919, "learning_rate": 0.0005, "loss": 2.1095, "step": 130840 }, { "epoch": 0.49804739538530635, "grad_norm": 0.13655667006969452, "learning_rate": 0.0005, "loss": 2.1259, "step": 130850 }, { "epoch": 0.49808545785342906, "grad_norm": 0.1245507001876831, "learning_rate": 0.0005, "loss": 2.1099, "step": 130860 }, { "epoch": 0.4981235203215517, "grad_norm": 0.13207346200942993, "learning_rate": 0.0005, "loss": 2.1283, "step": 130870 }, { "epoch": 0.49816158278967443, "grad_norm": 0.11879435181617737, "learning_rate": 0.0005, "loss": 2.1106, "step": 130880 }, { "epoch": 0.4981996452577971, "grad_norm": 0.1184876337647438, "learning_rate": 0.0005, "loss": 2.1206, "step": 130890 }, { "epoch": 0.4982377077259198, "grad_norm": 0.12992629408836365, "learning_rate": 0.0005, "loss": 2.1162, "step": 130900 }, { "epoch": 0.49827577019404246, "grad_norm": 0.14854702353477478, "learning_rate": 0.0005, "loss": 2.1149, "step": 130910 }, { "epoch": 0.49831383266216517, "grad_norm": 0.1280069351196289, "learning_rate": 0.0005, "loss": 2.1154, "step": 130920 }, { "epoch": 0.4983518951302878, "grad_norm": 0.11797192692756653, "learning_rate": 0.0005, "loss": 2.1023, "step": 130930 }, { "epoch": 0.49838995759841054, "grad_norm": 0.1346352994441986, "learning_rate": 0.0005, "loss": 2.1219, "step": 130940 }, { "epoch": 0.4984280200665332, "grad_norm": 0.1282471865415573, "learning_rate": 0.0005, "loss": 2.1318, "step": 130950 }, { "epoch": 0.49846608253465585, "grad_norm": 0.12099988013505936, "learning_rate": 0.0005, "loss": 2.1261, "step": 130960 }, { "epoch": 0.49850414500277856, "grad_norm": 1.4301072359085083, "learning_rate": 0.0005, "loss": 2.1105, "step": 130970 }, { "epoch": 0.4985422074709012, "grad_norm": 0.12891265749931335, "learning_rate": 0.0005, "loss": 2.1107, "step": 130980 }, { "epoch": 0.49858026993902393, "grad_norm": 0.12442260980606079, "learning_rate": 0.0005, "loss": 2.1213, "step": 130990 }, { "epoch": 0.4986183324071466, "grad_norm": 0.14176233112812042, "learning_rate": 0.0005, "loss": 2.1267, "step": 131000 }, { "epoch": 0.4986563948752693, "grad_norm": 0.12994049489498138, "learning_rate": 0.0005, "loss": 2.1184, "step": 131010 }, { "epoch": 0.49869445734339196, "grad_norm": 0.12481575459241867, "learning_rate": 0.0005, "loss": 2.1182, "step": 131020 }, { "epoch": 0.4987325198115147, "grad_norm": 0.12183275818824768, "learning_rate": 0.0005, "loss": 2.1169, "step": 131030 }, { "epoch": 0.49877058227963733, "grad_norm": 0.1301172971725464, "learning_rate": 0.0005, "loss": 2.1142, "step": 131040 }, { "epoch": 0.49880864474776004, "grad_norm": 0.12625858187675476, "learning_rate": 0.0005, "loss": 2.1102, "step": 131050 }, { "epoch": 0.4988467072158827, "grad_norm": 0.13612774014472961, "learning_rate": 0.0005, "loss": 2.1187, "step": 131060 }, { "epoch": 0.4988847696840054, "grad_norm": 0.12587963044643402, "learning_rate": 0.0005, "loss": 2.1152, "step": 131070 }, { "epoch": 0.49892283215212807, "grad_norm": 0.12649382650852203, "learning_rate": 0.0005, "loss": 2.1176, "step": 131080 }, { "epoch": 0.49896089462025073, "grad_norm": 0.135384663939476, "learning_rate": 0.0005, "loss": 2.1198, "step": 131090 }, { "epoch": 0.49899895708837344, "grad_norm": 0.15188583731651306, "learning_rate": 0.0005, "loss": 2.1198, "step": 131100 }, { "epoch": 0.4990370195564961, "grad_norm": 0.12795251607894897, "learning_rate": 0.0005, "loss": 2.1188, "step": 131110 }, { "epoch": 0.4990750820246188, "grad_norm": 0.12417992204427719, "learning_rate": 0.0005, "loss": 2.1155, "step": 131120 }, { "epoch": 0.49911314449274147, "grad_norm": 0.1358865350484848, "learning_rate": 0.0005, "loss": 2.1225, "step": 131130 }, { "epoch": 0.4991512069608642, "grad_norm": 0.1385025829076767, "learning_rate": 0.0005, "loss": 2.1178, "step": 131140 }, { "epoch": 0.49918926942898684, "grad_norm": 0.11305361241102219, "learning_rate": 0.0005, "loss": 2.1293, "step": 131150 }, { "epoch": 0.49922733189710955, "grad_norm": 0.1257256418466568, "learning_rate": 0.0005, "loss": 2.1263, "step": 131160 }, { "epoch": 0.4992653943652322, "grad_norm": 0.12424245476722717, "learning_rate": 0.0005, "loss": 2.1169, "step": 131170 }, { "epoch": 0.4993034568333549, "grad_norm": 0.12357328087091446, "learning_rate": 0.0005, "loss": 2.1153, "step": 131180 }, { "epoch": 0.4993415193014776, "grad_norm": 0.1326143890619278, "learning_rate": 0.0005, "loss": 2.0951, "step": 131190 }, { "epoch": 0.4993795817696003, "grad_norm": 0.12293647229671478, "learning_rate": 0.0005, "loss": 2.1078, "step": 131200 }, { "epoch": 0.49941764423772295, "grad_norm": 0.11853579431772232, "learning_rate": 0.0005, "loss": 2.1174, "step": 131210 }, { "epoch": 0.49945570670584566, "grad_norm": 0.12029995769262314, "learning_rate": 0.0005, "loss": 2.122, "step": 131220 }, { "epoch": 0.4994937691739683, "grad_norm": 0.13621987402439117, "learning_rate": 0.0005, "loss": 2.13, "step": 131230 }, { "epoch": 0.499531831642091, "grad_norm": 0.1239994466304779, "learning_rate": 0.0005, "loss": 2.1239, "step": 131240 }, { "epoch": 0.4995698941102137, "grad_norm": 0.12169505655765533, "learning_rate": 0.0005, "loss": 2.1127, "step": 131250 }, { "epoch": 0.49960795657833634, "grad_norm": 0.12012088298797607, "learning_rate": 0.0005, "loss": 2.1311, "step": 131260 }, { "epoch": 0.49964601904645906, "grad_norm": 0.11918789893388748, "learning_rate": 0.0005, "loss": 2.1297, "step": 131270 }, { "epoch": 0.4996840815145817, "grad_norm": 0.12536664307117462, "learning_rate": 0.0005, "loss": 2.1214, "step": 131280 }, { "epoch": 0.4997221439827044, "grad_norm": 0.13789057731628418, "learning_rate": 0.0005, "loss": 2.1147, "step": 131290 }, { "epoch": 0.4997602064508271, "grad_norm": 0.11794903874397278, "learning_rate": 0.0005, "loss": 2.1074, "step": 131300 }, { "epoch": 0.4997982689189498, "grad_norm": 0.12749889492988586, "learning_rate": 0.0005, "loss": 2.126, "step": 131310 }, { "epoch": 0.49983633138707245, "grad_norm": 0.1235434040427208, "learning_rate": 0.0005, "loss": 2.119, "step": 131320 }, { "epoch": 0.49987439385519516, "grad_norm": 0.1203431487083435, "learning_rate": 0.0005, "loss": 2.1098, "step": 131330 }, { "epoch": 0.4999124563233178, "grad_norm": 0.12585608661174774, "learning_rate": 0.0005, "loss": 2.1024, "step": 131340 }, { "epoch": 0.49995051879144053, "grad_norm": 0.13304553925991058, "learning_rate": 0.0005, "loss": 2.1116, "step": 131350 }, { "epoch": 0.4999885812595632, "grad_norm": 0.12096429616212845, "learning_rate": 0.0005, "loss": 2.1101, "step": 131360 }, { "epoch": 0.5000266437276859, "grad_norm": 0.12587635219097137, "learning_rate": 0.0005, "loss": 2.1128, "step": 131370 }, { "epoch": 0.5000647061958086, "grad_norm": 0.13106253743171692, "learning_rate": 0.0005, "loss": 2.1145, "step": 131380 }, { "epoch": 0.5001027686639312, "grad_norm": 0.12039899080991745, "learning_rate": 0.0005, "loss": 2.1057, "step": 131390 }, { "epoch": 0.5001408311320539, "grad_norm": 0.14034612476825714, "learning_rate": 0.0005, "loss": 2.1225, "step": 131400 }, { "epoch": 0.5001788936001766, "grad_norm": 0.11676795780658722, "learning_rate": 0.0005, "loss": 2.1172, "step": 131410 }, { "epoch": 0.5002169560682993, "grad_norm": 0.12730121612548828, "learning_rate": 0.0005, "loss": 2.1184, "step": 131420 }, { "epoch": 0.500255018536422, "grad_norm": 0.1298544555902481, "learning_rate": 0.0005, "loss": 2.1159, "step": 131430 }, { "epoch": 0.5002930810045446, "grad_norm": 0.12161430716514587, "learning_rate": 0.0005, "loss": 2.1335, "step": 131440 }, { "epoch": 0.5003311434726674, "grad_norm": 0.12329776585102081, "learning_rate": 0.0005, "loss": 2.0967, "step": 131450 }, { "epoch": 0.50036920594079, "grad_norm": 0.13645300269126892, "learning_rate": 0.0005, "loss": 2.1089, "step": 131460 }, { "epoch": 0.5004072684089127, "grad_norm": 0.11726679652929306, "learning_rate": 0.0005, "loss": 2.1313, "step": 131470 }, { "epoch": 0.5004453308770354, "grad_norm": 0.12503685057163239, "learning_rate": 0.0005, "loss": 2.1046, "step": 131480 }, { "epoch": 0.5004833933451581, "grad_norm": 0.13997359573841095, "learning_rate": 0.0005, "loss": 2.1317, "step": 131490 }, { "epoch": 0.5005214558132808, "grad_norm": 0.1292068064212799, "learning_rate": 0.0005, "loss": 2.118, "step": 131500 }, { "epoch": 0.5005595182814034, "grad_norm": 0.11975622922182083, "learning_rate": 0.0005, "loss": 2.1204, "step": 131510 }, { "epoch": 0.5005975807495261, "grad_norm": 0.12581771612167358, "learning_rate": 0.0005, "loss": 2.112, "step": 131520 }, { "epoch": 0.5006356432176488, "grad_norm": 0.1165451779961586, "learning_rate": 0.0005, "loss": 2.1069, "step": 131530 }, { "epoch": 0.5006737056857715, "grad_norm": 0.1284739375114441, "learning_rate": 0.0005, "loss": 2.1001, "step": 131540 }, { "epoch": 0.5007117681538942, "grad_norm": 0.13096266984939575, "learning_rate": 0.0005, "loss": 2.1004, "step": 131550 }, { "epoch": 0.5007498306220168, "grad_norm": 0.11530668288469315, "learning_rate": 0.0005, "loss": 2.1027, "step": 131560 }, { "epoch": 0.5007878930901395, "grad_norm": 0.12410982698202133, "learning_rate": 0.0005, "loss": 2.1073, "step": 131570 }, { "epoch": 0.5008259555582623, "grad_norm": 0.1452266126871109, "learning_rate": 0.0005, "loss": 2.1192, "step": 131580 }, { "epoch": 0.5008640180263849, "grad_norm": 0.12477368116378784, "learning_rate": 0.0005, "loss": 2.1226, "step": 131590 }, { "epoch": 0.5009020804945076, "grad_norm": 0.138809472322464, "learning_rate": 0.0005, "loss": 2.1191, "step": 131600 }, { "epoch": 0.5009401429626302, "grad_norm": 0.11599473655223846, "learning_rate": 0.0005, "loss": 2.1265, "step": 131610 }, { "epoch": 0.500978205430753, "grad_norm": 0.12185689061880112, "learning_rate": 0.0005, "loss": 2.1083, "step": 131620 }, { "epoch": 0.5010162678988757, "grad_norm": 0.14375880360603333, "learning_rate": 0.0005, "loss": 2.1049, "step": 131630 }, { "epoch": 0.5010543303669983, "grad_norm": 0.12877488136291504, "learning_rate": 0.0005, "loss": 2.1155, "step": 131640 }, { "epoch": 0.501092392835121, "grad_norm": 0.1386006623506546, "learning_rate": 0.0005, "loss": 2.1146, "step": 131650 }, { "epoch": 0.5011304553032437, "grad_norm": 0.12304980307817459, "learning_rate": 0.0005, "loss": 2.1182, "step": 131660 }, { "epoch": 0.5011685177713664, "grad_norm": 0.13089512288570404, "learning_rate": 0.0005, "loss": 2.121, "step": 131670 }, { "epoch": 0.501206580239489, "grad_norm": 0.1259504109621048, "learning_rate": 0.0005, "loss": 2.1247, "step": 131680 }, { "epoch": 0.5012446427076117, "grad_norm": 0.1313033401966095, "learning_rate": 0.0005, "loss": 2.1107, "step": 131690 }, { "epoch": 0.5012827051757344, "grad_norm": 0.12506762146949768, "learning_rate": 0.0005, "loss": 2.1187, "step": 131700 }, { "epoch": 0.5013207676438571, "grad_norm": 0.11512114107608795, "learning_rate": 0.0005, "loss": 2.1135, "step": 131710 }, { "epoch": 0.5013588301119798, "grad_norm": 0.1273897886276245, "learning_rate": 0.0005, "loss": 2.1173, "step": 131720 }, { "epoch": 0.5013968925801024, "grad_norm": 0.12503871321678162, "learning_rate": 0.0005, "loss": 2.1152, "step": 131730 }, { "epoch": 0.5014349550482251, "grad_norm": 0.11183293163776398, "learning_rate": 0.0005, "loss": 2.114, "step": 131740 }, { "epoch": 0.5014730175163479, "grad_norm": 0.13775190711021423, "learning_rate": 0.0005, "loss": 2.1222, "step": 131750 }, { "epoch": 0.5015110799844705, "grad_norm": 0.12885314226150513, "learning_rate": 0.0005, "loss": 2.1173, "step": 131760 }, { "epoch": 0.5015491424525932, "grad_norm": 0.12374498695135117, "learning_rate": 0.0005, "loss": 2.1189, "step": 131770 }, { "epoch": 0.5015872049207158, "grad_norm": 0.12108965963125229, "learning_rate": 0.0005, "loss": 2.1066, "step": 131780 }, { "epoch": 0.5016252673888386, "grad_norm": 0.13662739098072052, "learning_rate": 0.0005, "loss": 2.1205, "step": 131790 }, { "epoch": 0.5016633298569613, "grad_norm": 0.12604866921901703, "learning_rate": 0.0005, "loss": 2.11, "step": 131800 }, { "epoch": 0.5017013923250839, "grad_norm": 0.1323482245206833, "learning_rate": 0.0005, "loss": 2.1127, "step": 131810 }, { "epoch": 0.5017394547932066, "grad_norm": 0.13354459404945374, "learning_rate": 0.0005, "loss": 2.1029, "step": 131820 }, { "epoch": 0.5017775172613292, "grad_norm": 0.13745664060115814, "learning_rate": 0.0005, "loss": 2.1181, "step": 131830 }, { "epoch": 0.501815579729452, "grad_norm": 0.13215488195419312, "learning_rate": 0.0005, "loss": 2.1066, "step": 131840 }, { "epoch": 0.5018536421975747, "grad_norm": 0.13737165927886963, "learning_rate": 0.0005, "loss": 2.1141, "step": 131850 }, { "epoch": 0.5018917046656973, "grad_norm": 0.12160696089267731, "learning_rate": 0.0005, "loss": 2.1154, "step": 131860 }, { "epoch": 0.50192976713382, "grad_norm": 0.127287358045578, "learning_rate": 0.0005, "loss": 2.122, "step": 131870 }, { "epoch": 0.5019678296019427, "grad_norm": 0.11619853228330612, "learning_rate": 0.0005, "loss": 2.1131, "step": 131880 }, { "epoch": 0.5020058920700654, "grad_norm": 0.12385293841362, "learning_rate": 0.0005, "loss": 2.1153, "step": 131890 }, { "epoch": 0.5020439545381881, "grad_norm": 0.12530963122844696, "learning_rate": 0.0005, "loss": 2.108, "step": 131900 }, { "epoch": 0.5020820170063107, "grad_norm": 0.11971908062696457, "learning_rate": 0.0005, "loss": 2.0995, "step": 131910 }, { "epoch": 0.5021200794744335, "grad_norm": 0.12599734961986542, "learning_rate": 0.0005, "loss": 2.1309, "step": 131920 }, { "epoch": 0.5021581419425561, "grad_norm": 0.13007313013076782, "learning_rate": 0.0005, "loss": 2.1075, "step": 131930 }, { "epoch": 0.5021962044106788, "grad_norm": 0.12488700449466705, "learning_rate": 0.0005, "loss": 2.109, "step": 131940 }, { "epoch": 0.5022342668788015, "grad_norm": 0.12737232446670532, "learning_rate": 0.0005, "loss": 2.1016, "step": 131950 }, { "epoch": 0.5022723293469241, "grad_norm": 0.138985738158226, "learning_rate": 0.0005, "loss": 2.1147, "step": 131960 }, { "epoch": 0.5023103918150469, "grad_norm": 0.11265484243631363, "learning_rate": 0.0005, "loss": 2.1037, "step": 131970 }, { "epoch": 0.5023484542831695, "grad_norm": 0.128614142537117, "learning_rate": 0.0005, "loss": 2.1186, "step": 131980 }, { "epoch": 0.5023865167512922, "grad_norm": 0.13249129056930542, "learning_rate": 0.0005, "loss": 2.1245, "step": 131990 }, { "epoch": 0.5024245792194149, "grad_norm": 0.11810528486967087, "learning_rate": 0.0005, "loss": 2.1135, "step": 132000 }, { "epoch": 0.5024626416875376, "grad_norm": 0.11904824525117874, "learning_rate": 0.0005, "loss": 2.1163, "step": 132010 }, { "epoch": 0.5025007041556603, "grad_norm": 0.11354199051856995, "learning_rate": 0.0005, "loss": 2.1082, "step": 132020 }, { "epoch": 0.5025387666237829, "grad_norm": 0.13872075080871582, "learning_rate": 0.0005, "loss": 2.1058, "step": 132030 }, { "epoch": 0.5025768290919056, "grad_norm": 0.1317596286535263, "learning_rate": 0.0005, "loss": 2.1063, "step": 132040 }, { "epoch": 0.5026148915600284, "grad_norm": 0.14089347422122955, "learning_rate": 0.0005, "loss": 2.1199, "step": 132050 }, { "epoch": 0.502652954028151, "grad_norm": 0.36503109335899353, "learning_rate": 0.0005, "loss": 2.1037, "step": 132060 }, { "epoch": 0.5026910164962737, "grad_norm": 0.130210742354393, "learning_rate": 0.0005, "loss": 2.1115, "step": 132070 }, { "epoch": 0.5027290789643963, "grad_norm": 0.11821793019771576, "learning_rate": 0.0005, "loss": 2.1049, "step": 132080 }, { "epoch": 0.5027671414325191, "grad_norm": 0.1177087128162384, "learning_rate": 0.0005, "loss": 2.1229, "step": 132090 }, { "epoch": 0.5028052039006418, "grad_norm": 0.1179640144109726, "learning_rate": 0.0005, "loss": 2.1228, "step": 132100 }, { "epoch": 0.5028432663687644, "grad_norm": 0.12133560329675674, "learning_rate": 0.0005, "loss": 2.1088, "step": 132110 }, { "epoch": 0.5028813288368871, "grad_norm": 0.14076071977615356, "learning_rate": 0.0005, "loss": 2.1138, "step": 132120 }, { "epoch": 0.5029193913050097, "grad_norm": 0.1257144659757614, "learning_rate": 0.0005, "loss": 2.1157, "step": 132130 }, { "epoch": 0.5029574537731325, "grad_norm": 0.12998227775096893, "learning_rate": 0.0005, "loss": 2.1104, "step": 132140 }, { "epoch": 0.5029955162412552, "grad_norm": 0.12388579547405243, "learning_rate": 0.0005, "loss": 2.1076, "step": 132150 }, { "epoch": 0.5030335787093778, "grad_norm": 0.1183706670999527, "learning_rate": 0.0005, "loss": 2.1037, "step": 132160 }, { "epoch": 0.5030716411775005, "grad_norm": 0.12758280336856842, "learning_rate": 0.0005, "loss": 2.1086, "step": 132170 }, { "epoch": 0.5031097036456232, "grad_norm": 0.1403193324804306, "learning_rate": 0.0005, "loss": 2.1196, "step": 132180 }, { "epoch": 0.5031477661137459, "grad_norm": 0.1350007951259613, "learning_rate": 0.0005, "loss": 2.1146, "step": 132190 }, { "epoch": 0.5031858285818686, "grad_norm": 0.12097181379795074, "learning_rate": 0.0005, "loss": 2.1111, "step": 132200 }, { "epoch": 0.5032238910499912, "grad_norm": 0.12329886853694916, "learning_rate": 0.0005, "loss": 2.1149, "step": 132210 }, { "epoch": 0.503261953518114, "grad_norm": 0.11826958507299423, "learning_rate": 0.0005, "loss": 2.1079, "step": 132220 }, { "epoch": 0.5033000159862366, "grad_norm": 0.13061341643333435, "learning_rate": 0.0005, "loss": 2.114, "step": 132230 }, { "epoch": 0.5033380784543593, "grad_norm": 0.13656175136566162, "learning_rate": 0.0005, "loss": 2.0999, "step": 132240 }, { "epoch": 0.503376140922482, "grad_norm": 0.14762873947620392, "learning_rate": 0.0005, "loss": 2.1152, "step": 132250 }, { "epoch": 0.5034142033906046, "grad_norm": 0.1220160499215126, "learning_rate": 0.0005, "loss": 2.1284, "step": 132260 }, { "epoch": 0.5034522658587274, "grad_norm": 0.128128781914711, "learning_rate": 0.0005, "loss": 2.122, "step": 132270 }, { "epoch": 0.50349032832685, "grad_norm": 0.14482936263084412, "learning_rate": 0.0005, "loss": 2.1254, "step": 132280 }, { "epoch": 0.5035283907949727, "grad_norm": 0.12684223055839539, "learning_rate": 0.0005, "loss": 2.12, "step": 132290 }, { "epoch": 0.5035664532630953, "grad_norm": 0.11803826689720154, "learning_rate": 0.0005, "loss": 2.1028, "step": 132300 }, { "epoch": 0.5036045157312181, "grad_norm": 0.12412810325622559, "learning_rate": 0.0005, "loss": 2.1236, "step": 132310 }, { "epoch": 0.5036425781993408, "grad_norm": 0.11887123435735703, "learning_rate": 0.0005, "loss": 2.1058, "step": 132320 }, { "epoch": 0.5036806406674634, "grad_norm": 0.1269877701997757, "learning_rate": 0.0005, "loss": 2.1008, "step": 132330 }, { "epoch": 0.5037187031355861, "grad_norm": 0.13143859803676605, "learning_rate": 0.0005, "loss": 2.1195, "step": 132340 }, { "epoch": 0.5037567656037089, "grad_norm": 0.12457022070884705, "learning_rate": 0.0005, "loss": 2.1222, "step": 132350 }, { "epoch": 0.5037948280718315, "grad_norm": 0.129766583442688, "learning_rate": 0.0005, "loss": 2.1053, "step": 132360 }, { "epoch": 0.5038328905399542, "grad_norm": 0.1242799237370491, "learning_rate": 0.0005, "loss": 2.1203, "step": 132370 }, { "epoch": 0.5038709530080768, "grad_norm": 0.15328699350357056, "learning_rate": 0.0005, "loss": 2.1064, "step": 132380 }, { "epoch": 0.5039090154761995, "grad_norm": 0.12018805742263794, "learning_rate": 0.0005, "loss": 2.1221, "step": 132390 }, { "epoch": 0.5039470779443223, "grad_norm": 0.12956398725509644, "learning_rate": 0.0005, "loss": 2.1319, "step": 132400 }, { "epoch": 0.5039851404124449, "grad_norm": 0.12832574546337128, "learning_rate": 0.0005, "loss": 2.1176, "step": 132410 }, { "epoch": 0.5040232028805676, "grad_norm": 0.12742365896701813, "learning_rate": 0.0005, "loss": 2.114, "step": 132420 }, { "epoch": 0.5040612653486902, "grad_norm": 0.12595300376415253, "learning_rate": 0.0005, "loss": 2.1118, "step": 132430 }, { "epoch": 0.504099327816813, "grad_norm": 0.11913701146841049, "learning_rate": 0.0005, "loss": 2.0927, "step": 132440 }, { "epoch": 0.5041373902849356, "grad_norm": 0.11661577969789505, "learning_rate": 0.0005, "loss": 2.1084, "step": 132450 }, { "epoch": 0.5041754527530583, "grad_norm": 0.14057959616184235, "learning_rate": 0.0005, "loss": 2.1194, "step": 132460 }, { "epoch": 0.504213515221181, "grad_norm": 0.127021923661232, "learning_rate": 0.0005, "loss": 2.1185, "step": 132470 }, { "epoch": 0.5042515776893037, "grad_norm": 0.121463842689991, "learning_rate": 0.0005, "loss": 2.1156, "step": 132480 }, { "epoch": 0.5042896401574264, "grad_norm": 0.13129813969135284, "learning_rate": 0.0005, "loss": 2.1193, "step": 132490 }, { "epoch": 0.504327702625549, "grad_norm": 0.14562547206878662, "learning_rate": 0.0005, "loss": 2.1139, "step": 132500 }, { "epoch": 0.5043657650936717, "grad_norm": 0.12791889905929565, "learning_rate": 0.0005, "loss": 2.1111, "step": 132510 }, { "epoch": 0.5044038275617945, "grad_norm": 0.12117471545934677, "learning_rate": 0.0005, "loss": 2.113, "step": 132520 }, { "epoch": 0.5044418900299171, "grad_norm": 0.11436469852924347, "learning_rate": 0.0005, "loss": 2.1128, "step": 132530 }, { "epoch": 0.5044799524980398, "grad_norm": 0.12663054466247559, "learning_rate": 0.0005, "loss": 2.119, "step": 132540 }, { "epoch": 0.5045180149661624, "grad_norm": 0.17653295397758484, "learning_rate": 0.0005, "loss": 2.1042, "step": 132550 }, { "epoch": 0.5045560774342851, "grad_norm": 0.12252450734376907, "learning_rate": 0.0005, "loss": 2.1387, "step": 132560 }, { "epoch": 0.5045941399024079, "grad_norm": 0.11884088069200516, "learning_rate": 0.0005, "loss": 2.1162, "step": 132570 }, { "epoch": 0.5046322023705305, "grad_norm": 0.1152932196855545, "learning_rate": 0.0005, "loss": 2.1032, "step": 132580 }, { "epoch": 0.5046702648386532, "grad_norm": 0.13256339728832245, "learning_rate": 0.0005, "loss": 2.1159, "step": 132590 }, { "epoch": 0.5047083273067758, "grad_norm": 0.12635567784309387, "learning_rate": 0.0005, "loss": 2.1089, "step": 132600 }, { "epoch": 0.5047463897748986, "grad_norm": 0.1187937930226326, "learning_rate": 0.0005, "loss": 2.1249, "step": 132610 }, { "epoch": 0.5047844522430213, "grad_norm": 0.1216667890548706, "learning_rate": 0.0005, "loss": 2.108, "step": 132620 }, { "epoch": 0.5048225147111439, "grad_norm": 0.11755828559398651, "learning_rate": 0.0005, "loss": 2.1323, "step": 132630 }, { "epoch": 0.5048605771792666, "grad_norm": 0.133527010679245, "learning_rate": 0.0005, "loss": 2.1098, "step": 132640 }, { "epoch": 0.5048986396473893, "grad_norm": 0.11593339592218399, "learning_rate": 0.0005, "loss": 2.1123, "step": 132650 }, { "epoch": 0.504936702115512, "grad_norm": 0.18402504920959473, "learning_rate": 0.0005, "loss": 2.1081, "step": 132660 }, { "epoch": 0.5049747645836347, "grad_norm": 0.13471607863903046, "learning_rate": 0.0005, "loss": 2.0985, "step": 132670 }, { "epoch": 0.5050128270517573, "grad_norm": 0.11921326071023941, "learning_rate": 0.0005, "loss": 2.1032, "step": 132680 }, { "epoch": 0.50505088951988, "grad_norm": 0.12205012887716293, "learning_rate": 0.0005, "loss": 2.1045, "step": 132690 }, { "epoch": 0.5050889519880027, "grad_norm": 0.11981106549501419, "learning_rate": 0.0005, "loss": 2.1147, "step": 132700 }, { "epoch": 0.5051270144561254, "grad_norm": 0.12213804572820663, "learning_rate": 0.0005, "loss": 2.1164, "step": 132710 }, { "epoch": 0.5051650769242481, "grad_norm": 0.13242408633232117, "learning_rate": 0.0005, "loss": 2.1129, "step": 132720 }, { "epoch": 0.5052031393923707, "grad_norm": 0.11383634060621262, "learning_rate": 0.0005, "loss": 2.1056, "step": 132730 }, { "epoch": 0.5052412018604935, "grad_norm": 0.11707602441310883, "learning_rate": 0.0005, "loss": 2.1347, "step": 132740 }, { "epoch": 0.5052792643286161, "grad_norm": 0.11705445498228073, "learning_rate": 0.0005, "loss": 2.1215, "step": 132750 }, { "epoch": 0.5053173267967388, "grad_norm": 0.11471796780824661, "learning_rate": 0.0005, "loss": 2.124, "step": 132760 }, { "epoch": 0.5053553892648615, "grad_norm": 0.12790432572364807, "learning_rate": 0.0005, "loss": 2.1138, "step": 132770 }, { "epoch": 0.5053934517329842, "grad_norm": 0.14037707448005676, "learning_rate": 0.0005, "loss": 2.0979, "step": 132780 }, { "epoch": 0.5054315142011069, "grad_norm": 0.14267417788505554, "learning_rate": 0.0005, "loss": 2.1166, "step": 132790 }, { "epoch": 0.5054695766692295, "grad_norm": 0.1200094223022461, "learning_rate": 0.0005, "loss": 2.1253, "step": 132800 }, { "epoch": 0.5055076391373522, "grad_norm": 0.1262931376695633, "learning_rate": 0.0005, "loss": 2.1007, "step": 132810 }, { "epoch": 0.5055457016054749, "grad_norm": 0.11991778761148453, "learning_rate": 0.0005, "loss": 2.1179, "step": 132820 }, { "epoch": 0.5055837640735976, "grad_norm": 0.1474510282278061, "learning_rate": 0.0005, "loss": 2.1153, "step": 132830 }, { "epoch": 0.5056218265417203, "grad_norm": 0.9946924448013306, "learning_rate": 0.0005, "loss": 2.1069, "step": 132840 }, { "epoch": 0.5056598890098429, "grad_norm": 0.1304653137922287, "learning_rate": 0.0005, "loss": 2.1049, "step": 132850 }, { "epoch": 0.5056979514779656, "grad_norm": 0.1402243971824646, "learning_rate": 0.0005, "loss": 2.1088, "step": 132860 }, { "epoch": 0.5057360139460884, "grad_norm": 0.13506390154361725, "learning_rate": 0.0005, "loss": 2.1021, "step": 132870 }, { "epoch": 0.505774076414211, "grad_norm": 0.1145101860165596, "learning_rate": 0.0005, "loss": 2.1327, "step": 132880 }, { "epoch": 0.5058121388823337, "grad_norm": 0.11101609468460083, "learning_rate": 0.0005, "loss": 2.1194, "step": 132890 }, { "epoch": 0.5058502013504563, "grad_norm": 0.12633520364761353, "learning_rate": 0.0005, "loss": 2.1274, "step": 132900 }, { "epoch": 0.5058882638185791, "grad_norm": 0.13389374315738678, "learning_rate": 0.0005, "loss": 2.1185, "step": 132910 }, { "epoch": 0.5059263262867018, "grad_norm": 0.12300463765859604, "learning_rate": 0.0005, "loss": 2.1023, "step": 132920 }, { "epoch": 0.5059643887548244, "grad_norm": 0.1362847238779068, "learning_rate": 0.0005, "loss": 2.1067, "step": 132930 }, { "epoch": 0.5060024512229471, "grad_norm": 0.12361106276512146, "learning_rate": 0.0005, "loss": 2.1149, "step": 132940 }, { "epoch": 0.5060405136910698, "grad_norm": 0.12673906981945038, "learning_rate": 0.0005, "loss": 2.1175, "step": 132950 }, { "epoch": 0.5060785761591925, "grad_norm": 0.11915009468793869, "learning_rate": 0.0005, "loss": 2.1133, "step": 132960 }, { "epoch": 0.5061166386273152, "grad_norm": 0.1288875937461853, "learning_rate": 0.0005, "loss": 2.1282, "step": 132970 }, { "epoch": 0.5061547010954378, "grad_norm": 0.11823371052742004, "learning_rate": 0.0005, "loss": 2.1098, "step": 132980 }, { "epoch": 0.5061927635635605, "grad_norm": 0.12016794830560684, "learning_rate": 0.0005, "loss": 2.132, "step": 132990 }, { "epoch": 0.5062308260316832, "grad_norm": 0.13051475584506989, "learning_rate": 0.0005, "loss": 2.1246, "step": 133000 }, { "epoch": 0.5062688884998059, "grad_norm": 0.13054126501083374, "learning_rate": 0.0005, "loss": 2.11, "step": 133010 }, { "epoch": 0.5063069509679285, "grad_norm": 0.12361478805541992, "learning_rate": 0.0005, "loss": 2.1101, "step": 133020 }, { "epoch": 0.5063450134360512, "grad_norm": 0.1327802538871765, "learning_rate": 0.0005, "loss": 2.0975, "step": 133030 }, { "epoch": 0.506383075904174, "grad_norm": 0.12492018938064575, "learning_rate": 0.0005, "loss": 2.1128, "step": 133040 }, { "epoch": 0.5064211383722966, "grad_norm": 0.12457766383886337, "learning_rate": 0.0005, "loss": 2.1028, "step": 133050 }, { "epoch": 0.5064592008404193, "grad_norm": 0.12348872423171997, "learning_rate": 0.0005, "loss": 2.119, "step": 133060 }, { "epoch": 0.506497263308542, "grad_norm": 0.14808453619480133, "learning_rate": 0.0005, "loss": 2.1144, "step": 133070 }, { "epoch": 0.5065353257766647, "grad_norm": 0.13085603713989258, "learning_rate": 0.0005, "loss": 2.1059, "step": 133080 }, { "epoch": 0.5065733882447874, "grad_norm": 0.12417221069335938, "learning_rate": 0.0005, "loss": 2.1011, "step": 133090 }, { "epoch": 0.50661145071291, "grad_norm": 0.12539374828338623, "learning_rate": 0.0005, "loss": 2.0998, "step": 133100 }, { "epoch": 0.5066495131810327, "grad_norm": 0.13433487713336945, "learning_rate": 0.0005, "loss": 2.1134, "step": 133110 }, { "epoch": 0.5066875756491553, "grad_norm": 0.12523174285888672, "learning_rate": 0.0005, "loss": 2.1022, "step": 133120 }, { "epoch": 0.5067256381172781, "grad_norm": 0.13964121043682098, "learning_rate": 0.0005, "loss": 2.1243, "step": 133130 }, { "epoch": 0.5067637005854008, "grad_norm": 0.12529048323631287, "learning_rate": 0.0005, "loss": 2.1088, "step": 133140 }, { "epoch": 0.5068017630535234, "grad_norm": 0.12352463603019714, "learning_rate": 0.0005, "loss": 2.113, "step": 133150 }, { "epoch": 0.5068398255216461, "grad_norm": 0.13642734289169312, "learning_rate": 0.0005, "loss": 2.1174, "step": 133160 }, { "epoch": 0.5068778879897688, "grad_norm": 0.13066624104976654, "learning_rate": 0.0005, "loss": 2.0974, "step": 133170 }, { "epoch": 0.5069159504578915, "grad_norm": 0.11709106713533401, "learning_rate": 0.0005, "loss": 2.1134, "step": 133180 }, { "epoch": 0.5069540129260142, "grad_norm": 0.12521210312843323, "learning_rate": 0.0005, "loss": 2.1087, "step": 133190 }, { "epoch": 0.5069920753941368, "grad_norm": 0.1172792986035347, "learning_rate": 0.0005, "loss": 2.126, "step": 133200 }, { "epoch": 0.5070301378622596, "grad_norm": 0.12813249230384827, "learning_rate": 0.0005, "loss": 2.1213, "step": 133210 }, { "epoch": 0.5070682003303822, "grad_norm": 0.1418289691209793, "learning_rate": 0.0005, "loss": 2.1184, "step": 133220 }, { "epoch": 0.5071062627985049, "grad_norm": 0.12482786178588867, "learning_rate": 0.0005, "loss": 2.1259, "step": 133230 }, { "epoch": 0.5071443252666276, "grad_norm": 0.12194843590259552, "learning_rate": 0.0005, "loss": 2.1146, "step": 133240 }, { "epoch": 0.5071823877347502, "grad_norm": 0.12992645800113678, "learning_rate": 0.0005, "loss": 2.1215, "step": 133250 }, { "epoch": 0.507220450202873, "grad_norm": 0.1264665126800537, "learning_rate": 0.0005, "loss": 2.1181, "step": 133260 }, { "epoch": 0.5072585126709956, "grad_norm": 0.12155953794717789, "learning_rate": 0.0005, "loss": 2.1288, "step": 133270 }, { "epoch": 0.5072965751391183, "grad_norm": 0.13857276737689972, "learning_rate": 0.0005, "loss": 2.1271, "step": 133280 }, { "epoch": 0.507334637607241, "grad_norm": 0.11679156124591827, "learning_rate": 0.0005, "loss": 2.1118, "step": 133290 }, { "epoch": 0.5073727000753637, "grad_norm": 0.12049750983715057, "learning_rate": 0.0005, "loss": 2.1006, "step": 133300 }, { "epoch": 0.5074107625434864, "grad_norm": 0.11648929864168167, "learning_rate": 0.0005, "loss": 2.1081, "step": 133310 }, { "epoch": 0.507448825011609, "grad_norm": 0.1900986135005951, "learning_rate": 0.0005, "loss": 2.1154, "step": 133320 }, { "epoch": 0.5074868874797317, "grad_norm": 0.11860639601945877, "learning_rate": 0.0005, "loss": 2.1147, "step": 133330 }, { "epoch": 0.5075249499478545, "grad_norm": 0.1396195888519287, "learning_rate": 0.0005, "loss": 2.1145, "step": 133340 }, { "epoch": 0.5075630124159771, "grad_norm": 0.13535426557064056, "learning_rate": 0.0005, "loss": 2.1257, "step": 133350 }, { "epoch": 0.5076010748840998, "grad_norm": 0.13391265273094177, "learning_rate": 0.0005, "loss": 2.1153, "step": 133360 }, { "epoch": 0.5076391373522224, "grad_norm": 0.11973792314529419, "learning_rate": 0.0005, "loss": 2.1068, "step": 133370 }, { "epoch": 0.5076771998203452, "grad_norm": 0.12155459821224213, "learning_rate": 0.0005, "loss": 2.1048, "step": 133380 }, { "epoch": 0.5077152622884679, "grad_norm": 0.1285034418106079, "learning_rate": 0.0005, "loss": 2.112, "step": 133390 }, { "epoch": 0.5077533247565905, "grad_norm": 0.12448261678218842, "learning_rate": 0.0005, "loss": 2.1155, "step": 133400 }, { "epoch": 0.5077913872247132, "grad_norm": 0.13152651488780975, "learning_rate": 0.0005, "loss": 2.1146, "step": 133410 }, { "epoch": 0.5078294496928358, "grad_norm": 0.12238927185535431, "learning_rate": 0.0005, "loss": 2.1236, "step": 133420 }, { "epoch": 0.5078675121609586, "grad_norm": 0.12980888783931732, "learning_rate": 0.0005, "loss": 2.11, "step": 133430 }, { "epoch": 0.5079055746290813, "grad_norm": 0.1216517984867096, "learning_rate": 0.0005, "loss": 2.1184, "step": 133440 }, { "epoch": 0.5079436370972039, "grad_norm": 0.14697448909282684, "learning_rate": 0.0005, "loss": 2.1173, "step": 133450 }, { "epoch": 0.5079816995653266, "grad_norm": 0.13025575876235962, "learning_rate": 0.0005, "loss": 2.1105, "step": 133460 }, { "epoch": 0.5080197620334493, "grad_norm": 0.12157538533210754, "learning_rate": 0.0005, "loss": 2.1228, "step": 133470 }, { "epoch": 0.508057824501572, "grad_norm": 0.13055738806724548, "learning_rate": 0.0005, "loss": 2.1062, "step": 133480 }, { "epoch": 0.5080958869696947, "grad_norm": 0.12716266512870789, "learning_rate": 0.0005, "loss": 2.1289, "step": 133490 }, { "epoch": 0.5081339494378173, "grad_norm": 0.12884607911109924, "learning_rate": 0.0005, "loss": 2.1228, "step": 133500 }, { "epoch": 0.5081720119059401, "grad_norm": 0.12093187868595123, "learning_rate": 0.0005, "loss": 2.0979, "step": 133510 }, { "epoch": 0.5082100743740627, "grad_norm": 0.11768296360969543, "learning_rate": 0.0005, "loss": 2.1052, "step": 133520 }, { "epoch": 0.5082481368421854, "grad_norm": 0.11855961382389069, "learning_rate": 0.0005, "loss": 2.1081, "step": 133530 }, { "epoch": 0.508286199310308, "grad_norm": 0.12154291570186615, "learning_rate": 0.0005, "loss": 2.1023, "step": 133540 }, { "epoch": 0.5083242617784307, "grad_norm": 0.11715900152921677, "learning_rate": 0.0005, "loss": 2.1077, "step": 133550 }, { "epoch": 0.5083623242465535, "grad_norm": 0.13073576986789703, "learning_rate": 0.0005, "loss": 2.1147, "step": 133560 }, { "epoch": 0.5084003867146761, "grad_norm": 0.12054118514060974, "learning_rate": 0.0005, "loss": 2.1221, "step": 133570 }, { "epoch": 0.5084384491827988, "grad_norm": 0.11386524140834808, "learning_rate": 0.0005, "loss": 2.116, "step": 133580 }, { "epoch": 0.5084765116509214, "grad_norm": 0.13187789916992188, "learning_rate": 0.0005, "loss": 2.1266, "step": 133590 }, { "epoch": 0.5085145741190442, "grad_norm": 0.13285937905311584, "learning_rate": 0.0005, "loss": 2.1123, "step": 133600 }, { "epoch": 0.5085526365871669, "grad_norm": 0.12216323614120483, "learning_rate": 0.0005, "loss": 2.1252, "step": 133610 }, { "epoch": 0.5085906990552895, "grad_norm": 0.13401517271995544, "learning_rate": 0.0005, "loss": 2.1248, "step": 133620 }, { "epoch": 0.5086287615234122, "grad_norm": 0.13121497631072998, "learning_rate": 0.0005, "loss": 2.1004, "step": 133630 }, { "epoch": 0.508666823991535, "grad_norm": 0.13181401789188385, "learning_rate": 0.0005, "loss": 2.1064, "step": 133640 }, { "epoch": 0.5087048864596576, "grad_norm": 0.12712612748146057, "learning_rate": 0.0005, "loss": 2.1329, "step": 133650 }, { "epoch": 0.5087429489277803, "grad_norm": 0.12323405593633652, "learning_rate": 0.0005, "loss": 2.1219, "step": 133660 }, { "epoch": 0.5087810113959029, "grad_norm": 0.13902431726455688, "learning_rate": 0.0005, "loss": 2.1228, "step": 133670 }, { "epoch": 0.5088190738640256, "grad_norm": 0.12429852038621902, "learning_rate": 0.0005, "loss": 2.1322, "step": 133680 }, { "epoch": 0.5088571363321484, "grad_norm": 0.14107947051525116, "learning_rate": 0.0005, "loss": 2.1082, "step": 133690 }, { "epoch": 0.508895198800271, "grad_norm": 0.13658325374126434, "learning_rate": 0.0005, "loss": 2.1193, "step": 133700 }, { "epoch": 0.5089332612683937, "grad_norm": 0.12138927727937698, "learning_rate": 0.0005, "loss": 2.125, "step": 133710 }, { "epoch": 0.5089713237365163, "grad_norm": 0.13314686715602875, "learning_rate": 0.0005, "loss": 2.1183, "step": 133720 }, { "epoch": 0.5090093862046391, "grad_norm": 0.12922067940235138, "learning_rate": 0.0005, "loss": 2.1215, "step": 133730 }, { "epoch": 0.5090474486727617, "grad_norm": 0.1288197636604309, "learning_rate": 0.0005, "loss": 2.1012, "step": 133740 }, { "epoch": 0.5090855111408844, "grad_norm": 0.12378251552581787, "learning_rate": 0.0005, "loss": 2.1217, "step": 133750 }, { "epoch": 0.5091235736090071, "grad_norm": 0.13623934984207153, "learning_rate": 0.0005, "loss": 2.1007, "step": 133760 }, { "epoch": 0.5091616360771298, "grad_norm": 0.11521940678358078, "learning_rate": 0.0005, "loss": 2.1132, "step": 133770 }, { "epoch": 0.5091996985452525, "grad_norm": 0.13184532523155212, "learning_rate": 0.0005, "loss": 2.1059, "step": 133780 }, { "epoch": 0.5092377610133751, "grad_norm": 0.12468823045492172, "learning_rate": 0.0005, "loss": 2.1233, "step": 133790 }, { "epoch": 0.5092758234814978, "grad_norm": 0.13141892850399017, "learning_rate": 0.0005, "loss": 2.1173, "step": 133800 }, { "epoch": 0.5093138859496206, "grad_norm": 0.12401145696640015, "learning_rate": 0.0005, "loss": 2.1227, "step": 133810 }, { "epoch": 0.5093519484177432, "grad_norm": 0.13101080060005188, "learning_rate": 0.0005, "loss": 2.1301, "step": 133820 }, { "epoch": 0.5093900108858659, "grad_norm": 0.12074775993824005, "learning_rate": 0.0005, "loss": 2.1183, "step": 133830 }, { "epoch": 0.5094280733539885, "grad_norm": 0.13417311012744904, "learning_rate": 0.0005, "loss": 2.107, "step": 133840 }, { "epoch": 0.5094661358221112, "grad_norm": 0.11586230248212814, "learning_rate": 0.0005, "loss": 2.1133, "step": 133850 }, { "epoch": 0.509504198290234, "grad_norm": 0.12526260316371918, "learning_rate": 0.0005, "loss": 2.1384, "step": 133860 }, { "epoch": 0.5095422607583566, "grad_norm": 0.13197200000286102, "learning_rate": 0.0005, "loss": 2.1109, "step": 133870 }, { "epoch": 0.5095803232264793, "grad_norm": 0.1136879101395607, "learning_rate": 0.0005, "loss": 2.1185, "step": 133880 }, { "epoch": 0.5096183856946019, "grad_norm": 0.11466697603464127, "learning_rate": 0.0005, "loss": 2.1157, "step": 133890 }, { "epoch": 0.5096564481627247, "grad_norm": 0.12406015396118164, "learning_rate": 0.0005, "loss": 2.1198, "step": 133900 }, { "epoch": 0.5096945106308474, "grad_norm": 0.1297469139099121, "learning_rate": 0.0005, "loss": 2.1133, "step": 133910 }, { "epoch": 0.50973257309897, "grad_norm": 0.12095653265714645, "learning_rate": 0.0005, "loss": 2.108, "step": 133920 }, { "epoch": 0.5097706355670927, "grad_norm": 0.1300520896911621, "learning_rate": 0.0005, "loss": 2.1121, "step": 133930 }, { "epoch": 0.5098086980352154, "grad_norm": 0.14236518740653992, "learning_rate": 0.0005, "loss": 2.117, "step": 133940 }, { "epoch": 0.5098467605033381, "grad_norm": 0.12811344861984253, "learning_rate": 0.0005, "loss": 2.1175, "step": 133950 }, { "epoch": 0.5098848229714608, "grad_norm": 0.13053585588932037, "learning_rate": 0.0005, "loss": 2.1226, "step": 133960 }, { "epoch": 0.5099228854395834, "grad_norm": 0.11776957660913467, "learning_rate": 0.0005, "loss": 2.1145, "step": 133970 }, { "epoch": 0.5099609479077061, "grad_norm": 0.1257060021162033, "learning_rate": 0.0005, "loss": 2.1023, "step": 133980 }, { "epoch": 0.5099990103758288, "grad_norm": 0.10968641936779022, "learning_rate": 0.0005, "loss": 2.1223, "step": 133990 }, { "epoch": 0.5100370728439515, "grad_norm": 0.11820013076066971, "learning_rate": 0.0005, "loss": 2.1163, "step": 134000 }, { "epoch": 0.5100751353120742, "grad_norm": 0.1360628604888916, "learning_rate": 0.0005, "loss": 2.1095, "step": 134010 }, { "epoch": 0.5101131977801968, "grad_norm": 0.1447390764951706, "learning_rate": 0.0005, "loss": 2.1198, "step": 134020 }, { "epoch": 0.5101512602483196, "grad_norm": 0.12482602894306183, "learning_rate": 0.0005, "loss": 2.1092, "step": 134030 }, { "epoch": 0.5101893227164422, "grad_norm": 0.14182770252227783, "learning_rate": 0.0005, "loss": 2.092, "step": 134040 }, { "epoch": 0.5102273851845649, "grad_norm": 0.11726133525371552, "learning_rate": 0.0005, "loss": 2.1113, "step": 134050 }, { "epoch": 0.5102654476526876, "grad_norm": 0.11940544843673706, "learning_rate": 0.0005, "loss": 2.1214, "step": 134060 }, { "epoch": 0.5103035101208103, "grad_norm": 0.13671204447746277, "learning_rate": 0.0005, "loss": 2.1098, "step": 134070 }, { "epoch": 0.510341572588933, "grad_norm": 0.13532045483589172, "learning_rate": 0.0005, "loss": 2.1082, "step": 134080 }, { "epoch": 0.5103796350570556, "grad_norm": 0.1301160454750061, "learning_rate": 0.0005, "loss": 2.1157, "step": 134090 }, { "epoch": 0.5104176975251783, "grad_norm": 0.11771665513515472, "learning_rate": 0.0005, "loss": 2.1147, "step": 134100 }, { "epoch": 0.510455759993301, "grad_norm": 0.12012787908315659, "learning_rate": 0.0005, "loss": 2.1249, "step": 134110 }, { "epoch": 0.5104938224614237, "grad_norm": 0.12731090188026428, "learning_rate": 0.0005, "loss": 2.1116, "step": 134120 }, { "epoch": 0.5105318849295464, "grad_norm": 0.12644848227500916, "learning_rate": 0.0005, "loss": 2.1198, "step": 134130 }, { "epoch": 0.510569947397669, "grad_norm": 0.12338414788246155, "learning_rate": 0.0005, "loss": 2.1207, "step": 134140 }, { "epoch": 0.5106080098657917, "grad_norm": 0.12706167995929718, "learning_rate": 0.0005, "loss": 2.1041, "step": 134150 }, { "epoch": 0.5106460723339145, "grad_norm": 0.1354827731847763, "learning_rate": 0.0005, "loss": 2.1131, "step": 134160 }, { "epoch": 0.5106841348020371, "grad_norm": 0.12414322048425674, "learning_rate": 0.0005, "loss": 2.1011, "step": 134170 }, { "epoch": 0.5107221972701598, "grad_norm": 0.11386504769325256, "learning_rate": 0.0005, "loss": 2.114, "step": 134180 }, { "epoch": 0.5107602597382824, "grad_norm": 0.11852803826332092, "learning_rate": 0.0005, "loss": 2.1278, "step": 134190 }, { "epoch": 0.5107983222064052, "grad_norm": 0.13569186627864838, "learning_rate": 0.0005, "loss": 2.104, "step": 134200 }, { "epoch": 0.5108363846745279, "grad_norm": 0.11802306771278381, "learning_rate": 0.0005, "loss": 2.1107, "step": 134210 }, { "epoch": 0.5108744471426505, "grad_norm": 0.13798528909683228, "learning_rate": 0.0005, "loss": 2.1057, "step": 134220 }, { "epoch": 0.5109125096107732, "grad_norm": 0.12030106037855148, "learning_rate": 0.0005, "loss": 2.0996, "step": 134230 }, { "epoch": 0.5109505720788959, "grad_norm": 0.13408568501472473, "learning_rate": 0.0005, "loss": 2.1221, "step": 134240 }, { "epoch": 0.5109886345470186, "grad_norm": 0.14743518829345703, "learning_rate": 0.0005, "loss": 2.1289, "step": 134250 }, { "epoch": 0.5110266970151413, "grad_norm": 0.1478193700313568, "learning_rate": 0.0005, "loss": 2.1004, "step": 134260 }, { "epoch": 0.5110647594832639, "grad_norm": 0.12086143344640732, "learning_rate": 0.0005, "loss": 2.1139, "step": 134270 }, { "epoch": 0.5111028219513866, "grad_norm": 0.126709446310997, "learning_rate": 0.0005, "loss": 2.1096, "step": 134280 }, { "epoch": 0.5111408844195093, "grad_norm": 0.12228815257549286, "learning_rate": 0.0005, "loss": 2.1068, "step": 134290 }, { "epoch": 0.511178946887632, "grad_norm": 0.11191216856241226, "learning_rate": 0.0005, "loss": 2.118, "step": 134300 }, { "epoch": 0.5112170093557546, "grad_norm": 0.12554562091827393, "learning_rate": 0.0005, "loss": 2.13, "step": 134310 }, { "epoch": 0.5112550718238773, "grad_norm": 0.12003730237483978, "learning_rate": 0.0005, "loss": 2.0977, "step": 134320 }, { "epoch": 0.5112931342920001, "grad_norm": 0.130716934800148, "learning_rate": 0.0005, "loss": 2.1118, "step": 134330 }, { "epoch": 0.5113311967601227, "grad_norm": 0.12484478950500488, "learning_rate": 0.0005, "loss": 2.1238, "step": 134340 }, { "epoch": 0.5113692592282454, "grad_norm": 0.1287389099597931, "learning_rate": 0.0005, "loss": 2.1116, "step": 134350 }, { "epoch": 0.511407321696368, "grad_norm": 0.121913380920887, "learning_rate": 0.0005, "loss": 2.124, "step": 134360 }, { "epoch": 0.5114453841644908, "grad_norm": 0.12113969773054123, "learning_rate": 0.0005, "loss": 2.123, "step": 134370 }, { "epoch": 0.5114834466326135, "grad_norm": 0.11920718103647232, "learning_rate": 0.0005, "loss": 2.104, "step": 134380 }, { "epoch": 0.5115215091007361, "grad_norm": 0.1182803213596344, "learning_rate": 0.0005, "loss": 2.1149, "step": 134390 }, { "epoch": 0.5115595715688588, "grad_norm": 0.11739551275968552, "learning_rate": 0.0005, "loss": 2.1135, "step": 134400 }, { "epoch": 0.5115976340369814, "grad_norm": 0.12292132526636124, "learning_rate": 0.0005, "loss": 2.1089, "step": 134410 }, { "epoch": 0.5116356965051042, "grad_norm": 0.13265924155712128, "learning_rate": 0.0005, "loss": 2.1161, "step": 134420 }, { "epoch": 0.5116737589732269, "grad_norm": 0.12459909170866013, "learning_rate": 0.0005, "loss": 2.1119, "step": 134430 }, { "epoch": 0.5117118214413495, "grad_norm": 0.11901148408651352, "learning_rate": 0.0005, "loss": 2.1184, "step": 134440 }, { "epoch": 0.5117498839094722, "grad_norm": 0.12968461215496063, "learning_rate": 0.0005, "loss": 2.1106, "step": 134450 }, { "epoch": 0.511787946377595, "grad_norm": 0.13257619738578796, "learning_rate": 0.0005, "loss": 2.1127, "step": 134460 }, { "epoch": 0.5118260088457176, "grad_norm": 0.11957278102636337, "learning_rate": 0.0005, "loss": 2.1064, "step": 134470 }, { "epoch": 0.5118640713138403, "grad_norm": 0.1330718696117401, "learning_rate": 0.0005, "loss": 2.1115, "step": 134480 }, { "epoch": 0.5119021337819629, "grad_norm": 0.12618844211101532, "learning_rate": 0.0005, "loss": 2.1266, "step": 134490 }, { "epoch": 0.5119401962500857, "grad_norm": 0.1173240914940834, "learning_rate": 0.0005, "loss": 2.1129, "step": 134500 }, { "epoch": 0.5119782587182083, "grad_norm": 0.11414072662591934, "learning_rate": 0.0005, "loss": 2.1124, "step": 134510 }, { "epoch": 0.512016321186331, "grad_norm": 0.12122435122728348, "learning_rate": 0.0005, "loss": 2.1198, "step": 134520 }, { "epoch": 0.5120543836544537, "grad_norm": 0.12351789325475693, "learning_rate": 0.0005, "loss": 2.1277, "step": 134530 }, { "epoch": 0.5120924461225763, "grad_norm": 0.1277690827846527, "learning_rate": 0.0005, "loss": 2.1121, "step": 134540 }, { "epoch": 0.5121305085906991, "grad_norm": 0.12026087939739227, "learning_rate": 0.0005, "loss": 2.1285, "step": 134550 }, { "epoch": 0.5121685710588217, "grad_norm": 0.12273011356592178, "learning_rate": 0.0005, "loss": 2.1257, "step": 134560 }, { "epoch": 0.5122066335269444, "grad_norm": 0.13297882676124573, "learning_rate": 0.0005, "loss": 2.1374, "step": 134570 }, { "epoch": 0.5122446959950671, "grad_norm": 0.1275002360343933, "learning_rate": 0.0005, "loss": 2.1369, "step": 134580 }, { "epoch": 0.5122827584631898, "grad_norm": 0.13620533049106598, "learning_rate": 0.0005, "loss": 2.1062, "step": 134590 }, { "epoch": 0.5123208209313125, "grad_norm": 0.14599129557609558, "learning_rate": 0.0005, "loss": 2.0967, "step": 134600 }, { "epoch": 0.5123588833994351, "grad_norm": 0.11535637825727463, "learning_rate": 0.0005, "loss": 2.1285, "step": 134610 }, { "epoch": 0.5123969458675578, "grad_norm": 0.12820352613925934, "learning_rate": 0.0005, "loss": 2.0817, "step": 134620 }, { "epoch": 0.5124350083356806, "grad_norm": 0.12669673562049866, "learning_rate": 0.0005, "loss": 2.1086, "step": 134630 }, { "epoch": 0.5124730708038032, "grad_norm": 0.13037653267383575, "learning_rate": 0.0005, "loss": 2.1168, "step": 134640 }, { "epoch": 0.5125111332719259, "grad_norm": 0.12768852710723877, "learning_rate": 0.0005, "loss": 2.1161, "step": 134650 }, { "epoch": 0.5125491957400485, "grad_norm": 0.1387774795293808, "learning_rate": 0.0005, "loss": 2.1104, "step": 134660 }, { "epoch": 0.5125872582081713, "grad_norm": 0.13335128128528595, "learning_rate": 0.0005, "loss": 2.1248, "step": 134670 }, { "epoch": 0.512625320676294, "grad_norm": 0.14019380509853363, "learning_rate": 0.0005, "loss": 2.1145, "step": 134680 }, { "epoch": 0.5126633831444166, "grad_norm": 0.13378937542438507, "learning_rate": 0.0005, "loss": 2.1039, "step": 134690 }, { "epoch": 0.5127014456125393, "grad_norm": 0.11569223552942276, "learning_rate": 0.0005, "loss": 2.0998, "step": 134700 }, { "epoch": 0.5127395080806619, "grad_norm": 0.12776020169258118, "learning_rate": 0.0005, "loss": 2.1196, "step": 134710 }, { "epoch": 0.5127775705487847, "grad_norm": 0.1253037303686142, "learning_rate": 0.0005, "loss": 2.1091, "step": 134720 }, { "epoch": 0.5128156330169074, "grad_norm": 0.1115291565656662, "learning_rate": 0.0005, "loss": 2.1046, "step": 134730 }, { "epoch": 0.51285369548503, "grad_norm": 0.1221696212887764, "learning_rate": 0.0005, "loss": 2.1144, "step": 134740 }, { "epoch": 0.5128917579531527, "grad_norm": 0.12305761128664017, "learning_rate": 0.0005, "loss": 2.1158, "step": 134750 }, { "epoch": 0.5129298204212754, "grad_norm": 0.13608863949775696, "learning_rate": 0.0005, "loss": 2.1121, "step": 134760 }, { "epoch": 0.5129678828893981, "grad_norm": 0.13254103064537048, "learning_rate": 0.0005, "loss": 2.121, "step": 134770 }, { "epoch": 0.5130059453575208, "grad_norm": 0.12625229358673096, "learning_rate": 0.0005, "loss": 2.104, "step": 134780 }, { "epoch": 0.5130440078256434, "grad_norm": 0.12322506308555603, "learning_rate": 0.0005, "loss": 2.1188, "step": 134790 }, { "epoch": 0.5130820702937662, "grad_norm": 0.12616781890392303, "learning_rate": 0.0005, "loss": 2.118, "step": 134800 }, { "epoch": 0.5131201327618888, "grad_norm": 0.11625972390174866, "learning_rate": 0.0005, "loss": 2.1154, "step": 134810 }, { "epoch": 0.5131581952300115, "grad_norm": 0.13064908981323242, "learning_rate": 0.0005, "loss": 2.1257, "step": 134820 }, { "epoch": 0.5131962576981342, "grad_norm": 0.13486990332603455, "learning_rate": 0.0005, "loss": 2.1064, "step": 134830 }, { "epoch": 0.5132343201662568, "grad_norm": 0.1420249342918396, "learning_rate": 0.0005, "loss": 2.1211, "step": 134840 }, { "epoch": 0.5132723826343796, "grad_norm": 0.11938636749982834, "learning_rate": 0.0005, "loss": 2.1236, "step": 134850 }, { "epoch": 0.5133104451025022, "grad_norm": 0.12307589501142502, "learning_rate": 0.0005, "loss": 2.1249, "step": 134860 }, { "epoch": 0.5133485075706249, "grad_norm": 0.21486619114875793, "learning_rate": 0.0005, "loss": 2.1244, "step": 134870 }, { "epoch": 0.5133865700387475, "grad_norm": 0.13930252194404602, "learning_rate": 0.0005, "loss": 2.1061, "step": 134880 }, { "epoch": 0.5134246325068703, "grad_norm": 0.1287679225206375, "learning_rate": 0.0005, "loss": 2.1164, "step": 134890 }, { "epoch": 0.513462694974993, "grad_norm": 0.12399435043334961, "learning_rate": 0.0005, "loss": 2.1314, "step": 134900 }, { "epoch": 0.5135007574431156, "grad_norm": 0.1333046853542328, "learning_rate": 0.0005, "loss": 2.1087, "step": 134910 }, { "epoch": 0.5135388199112383, "grad_norm": 0.12432458996772766, "learning_rate": 0.0005, "loss": 2.1088, "step": 134920 }, { "epoch": 0.513576882379361, "grad_norm": 0.12399007380008698, "learning_rate": 0.0005, "loss": 2.1092, "step": 134930 }, { "epoch": 0.5136149448474837, "grad_norm": 0.13366355001926422, "learning_rate": 0.0005, "loss": 2.1291, "step": 134940 }, { "epoch": 0.5136530073156064, "grad_norm": 0.13235674798488617, "learning_rate": 0.0005, "loss": 2.1282, "step": 134950 }, { "epoch": 0.513691069783729, "grad_norm": 0.12507614493370056, "learning_rate": 0.0005, "loss": 2.1112, "step": 134960 }, { "epoch": 0.5137291322518518, "grad_norm": 0.11385700106620789, "learning_rate": 0.0005, "loss": 2.1046, "step": 134970 }, { "epoch": 0.5137671947199745, "grad_norm": 0.12334541231393814, "learning_rate": 0.0005, "loss": 2.1198, "step": 134980 }, { "epoch": 0.5138052571880971, "grad_norm": 0.11451227217912674, "learning_rate": 0.0005, "loss": 2.1239, "step": 134990 }, { "epoch": 0.5138433196562198, "grad_norm": 0.10950082540512085, "learning_rate": 0.0005, "loss": 2.1026, "step": 135000 }, { "epoch": 0.5138813821243424, "grad_norm": 0.11528286337852478, "learning_rate": 0.0005, "loss": 2.1183, "step": 135010 }, { "epoch": 0.5139194445924652, "grad_norm": 0.12344331294298172, "learning_rate": 0.0005, "loss": 2.1249, "step": 135020 }, { "epoch": 0.5139575070605878, "grad_norm": 0.12382146716117859, "learning_rate": 0.0005, "loss": 2.1087, "step": 135030 }, { "epoch": 0.5139955695287105, "grad_norm": 0.1312481313943863, "learning_rate": 0.0005, "loss": 2.1383, "step": 135040 }, { "epoch": 0.5140336319968332, "grad_norm": 0.11916206777095795, "learning_rate": 0.0005, "loss": 2.1229, "step": 135050 }, { "epoch": 0.5140716944649559, "grad_norm": 0.11896311491727829, "learning_rate": 0.0005, "loss": 2.1103, "step": 135060 }, { "epoch": 0.5141097569330786, "grad_norm": 0.11607718467712402, "learning_rate": 0.0005, "loss": 2.1194, "step": 135070 }, { "epoch": 0.5141478194012012, "grad_norm": 0.12421748042106628, "learning_rate": 0.0005, "loss": 2.118, "step": 135080 }, { "epoch": 0.5141858818693239, "grad_norm": 0.11468444019556046, "learning_rate": 0.0005, "loss": 2.117, "step": 135090 }, { "epoch": 0.5142239443374467, "grad_norm": 0.12319518625736237, "learning_rate": 0.0005, "loss": 2.1305, "step": 135100 }, { "epoch": 0.5142620068055693, "grad_norm": 0.12367475032806396, "learning_rate": 0.0005, "loss": 2.1035, "step": 135110 }, { "epoch": 0.514300069273692, "grad_norm": 0.12323194742202759, "learning_rate": 0.0005, "loss": 2.1113, "step": 135120 }, { "epoch": 0.5143381317418146, "grad_norm": 0.13855737447738647, "learning_rate": 0.0005, "loss": 2.1188, "step": 135130 }, { "epoch": 0.5143761942099373, "grad_norm": 0.12522666156291962, "learning_rate": 0.0005, "loss": 2.1121, "step": 135140 }, { "epoch": 0.5144142566780601, "grad_norm": 0.12855994701385498, "learning_rate": 0.0005, "loss": 2.104, "step": 135150 }, { "epoch": 0.5144523191461827, "grad_norm": 0.13384929299354553, "learning_rate": 0.0005, "loss": 2.1027, "step": 135160 }, { "epoch": 0.5144903816143054, "grad_norm": 0.14026358723640442, "learning_rate": 0.0005, "loss": 2.1224, "step": 135170 }, { "epoch": 0.514528444082428, "grad_norm": 0.1325213462114334, "learning_rate": 0.0005, "loss": 2.1072, "step": 135180 }, { "epoch": 0.5145665065505508, "grad_norm": 0.12667711079120636, "learning_rate": 0.0005, "loss": 2.1182, "step": 135190 }, { "epoch": 0.5146045690186735, "grad_norm": 0.1194978654384613, "learning_rate": 0.0005, "loss": 2.1244, "step": 135200 }, { "epoch": 0.5146426314867961, "grad_norm": 0.12435804307460785, "learning_rate": 0.0005, "loss": 2.1245, "step": 135210 }, { "epoch": 0.5146806939549188, "grad_norm": 0.12173999845981598, "learning_rate": 0.0005, "loss": 2.1177, "step": 135220 }, { "epoch": 0.5147187564230415, "grad_norm": 0.12142159789800644, "learning_rate": 0.0005, "loss": 2.1138, "step": 135230 }, { "epoch": 0.5147568188911642, "grad_norm": 0.12548331916332245, "learning_rate": 0.0005, "loss": 2.1182, "step": 135240 }, { "epoch": 0.5147948813592869, "grad_norm": 0.13339272141456604, "learning_rate": 0.0005, "loss": 2.1288, "step": 135250 }, { "epoch": 0.5148329438274095, "grad_norm": 0.13147401809692383, "learning_rate": 0.0005, "loss": 2.1166, "step": 135260 }, { "epoch": 0.5148710062955322, "grad_norm": 0.132236048579216, "learning_rate": 0.0005, "loss": 2.1165, "step": 135270 }, { "epoch": 0.5149090687636549, "grad_norm": 0.1356712132692337, "learning_rate": 0.0005, "loss": 2.111, "step": 135280 }, { "epoch": 0.5149471312317776, "grad_norm": 0.1267666220664978, "learning_rate": 0.0005, "loss": 2.1137, "step": 135290 }, { "epoch": 0.5149851936999003, "grad_norm": 0.12242377549409866, "learning_rate": 0.0005, "loss": 2.1145, "step": 135300 }, { "epoch": 0.5150232561680229, "grad_norm": 0.11466419696807861, "learning_rate": 0.0005, "loss": 2.1051, "step": 135310 }, { "epoch": 0.5150613186361457, "grad_norm": 0.12359566986560822, "learning_rate": 0.0005, "loss": 2.1256, "step": 135320 }, { "epoch": 0.5150993811042683, "grad_norm": 0.12927506864070892, "learning_rate": 0.0005, "loss": 2.1326, "step": 135330 }, { "epoch": 0.515137443572391, "grad_norm": 0.12114574760198593, "learning_rate": 0.0005, "loss": 2.1112, "step": 135340 }, { "epoch": 0.5151755060405137, "grad_norm": 0.1556452363729477, "learning_rate": 0.0005, "loss": 2.1085, "step": 135350 }, { "epoch": 0.5152135685086364, "grad_norm": 0.13409800827503204, "learning_rate": 0.0005, "loss": 2.1189, "step": 135360 }, { "epoch": 0.5152516309767591, "grad_norm": 0.1229565292596817, "learning_rate": 0.0005, "loss": 2.115, "step": 135370 }, { "epoch": 0.5152896934448817, "grad_norm": 0.12051396816968918, "learning_rate": 0.0005, "loss": 2.1093, "step": 135380 }, { "epoch": 0.5153277559130044, "grad_norm": 0.12776722013950348, "learning_rate": 0.0005, "loss": 2.1178, "step": 135390 }, { "epoch": 0.5153658183811272, "grad_norm": 0.12183408439159393, "learning_rate": 0.0005, "loss": 2.0979, "step": 135400 }, { "epoch": 0.5154038808492498, "grad_norm": 0.12558986246585846, "learning_rate": 0.0005, "loss": 2.1292, "step": 135410 }, { "epoch": 0.5154419433173725, "grad_norm": 0.12178440392017365, "learning_rate": 0.0005, "loss": 2.1196, "step": 135420 }, { "epoch": 0.5154800057854951, "grad_norm": 0.11090958118438721, "learning_rate": 0.0005, "loss": 2.1114, "step": 135430 }, { "epoch": 0.5155180682536178, "grad_norm": 0.12164914608001709, "learning_rate": 0.0005, "loss": 2.1171, "step": 135440 }, { "epoch": 0.5155561307217406, "grad_norm": 0.12471088767051697, "learning_rate": 0.0005, "loss": 2.1161, "step": 135450 }, { "epoch": 0.5155941931898632, "grad_norm": 0.11975961923599243, "learning_rate": 0.0005, "loss": 2.1044, "step": 135460 }, { "epoch": 0.5156322556579859, "grad_norm": 0.1201275885105133, "learning_rate": 0.0005, "loss": 2.1137, "step": 135470 }, { "epoch": 0.5156703181261085, "grad_norm": 0.1290825456380844, "learning_rate": 0.0005, "loss": 2.1102, "step": 135480 }, { "epoch": 0.5157083805942313, "grad_norm": 0.12944602966308594, "learning_rate": 0.0005, "loss": 2.1137, "step": 135490 }, { "epoch": 0.515746443062354, "grad_norm": 0.11592540889978409, "learning_rate": 0.0005, "loss": 2.1166, "step": 135500 }, { "epoch": 0.5157845055304766, "grad_norm": 0.12791410088539124, "learning_rate": 0.0005, "loss": 2.1126, "step": 135510 }, { "epoch": 0.5158225679985993, "grad_norm": 0.12224525213241577, "learning_rate": 0.0005, "loss": 2.1107, "step": 135520 }, { "epoch": 0.515860630466722, "grad_norm": 0.11816330254077911, "learning_rate": 0.0005, "loss": 2.1288, "step": 135530 }, { "epoch": 0.5158986929348447, "grad_norm": 0.11541763693094254, "learning_rate": 0.0005, "loss": 2.1181, "step": 135540 }, { "epoch": 0.5159367554029674, "grad_norm": 0.13468272984027863, "learning_rate": 0.0005, "loss": 2.1197, "step": 135550 }, { "epoch": 0.51597481787109, "grad_norm": 0.11575797945261002, "learning_rate": 0.0005, "loss": 2.1249, "step": 135560 }, { "epoch": 0.5160128803392127, "grad_norm": 0.11992021650075912, "learning_rate": 0.0005, "loss": 2.1205, "step": 135570 }, { "epoch": 0.5160509428073354, "grad_norm": 0.12182078510522842, "learning_rate": 0.0005, "loss": 2.107, "step": 135580 }, { "epoch": 0.5160890052754581, "grad_norm": 0.12293105572462082, "learning_rate": 0.0005, "loss": 2.1228, "step": 135590 }, { "epoch": 0.5161270677435807, "grad_norm": 0.13838279247283936, "learning_rate": 0.0005, "loss": 2.1153, "step": 135600 }, { "epoch": 0.5161651302117034, "grad_norm": 0.11866675317287445, "learning_rate": 0.0005, "loss": 2.1063, "step": 135610 }, { "epoch": 0.5162031926798262, "grad_norm": 0.13188183307647705, "learning_rate": 0.0005, "loss": 2.1316, "step": 135620 }, { "epoch": 0.5162412551479488, "grad_norm": 0.12662546336650848, "learning_rate": 0.0005, "loss": 2.1262, "step": 135630 }, { "epoch": 0.5162793176160715, "grad_norm": 0.12657001614570618, "learning_rate": 0.0005, "loss": 2.125, "step": 135640 }, { "epoch": 0.5163173800841941, "grad_norm": 0.146830752491951, "learning_rate": 0.0005, "loss": 2.1184, "step": 135650 }, { "epoch": 0.5163554425523169, "grad_norm": 0.12488903850317001, "learning_rate": 0.0005, "loss": 2.12, "step": 135660 }, { "epoch": 0.5163935050204396, "grad_norm": 0.12733852863311768, "learning_rate": 0.0005, "loss": 2.1048, "step": 135670 }, { "epoch": 0.5164315674885622, "grad_norm": 0.12367139756679535, "learning_rate": 0.0005, "loss": 2.1279, "step": 135680 }, { "epoch": 0.5164696299566849, "grad_norm": 0.1233339011669159, "learning_rate": 0.0005, "loss": 2.1204, "step": 135690 }, { "epoch": 0.5165076924248075, "grad_norm": 0.12250369787216187, "learning_rate": 0.0005, "loss": 2.1125, "step": 135700 }, { "epoch": 0.5165457548929303, "grad_norm": 0.127598375082016, "learning_rate": 0.0005, "loss": 2.1114, "step": 135710 }, { "epoch": 0.516583817361053, "grad_norm": 0.12334663420915604, "learning_rate": 0.0005, "loss": 2.1259, "step": 135720 }, { "epoch": 0.5166218798291756, "grad_norm": 0.12220164388418198, "learning_rate": 0.0005, "loss": 2.1054, "step": 135730 }, { "epoch": 0.5166599422972983, "grad_norm": 0.11895354092121124, "learning_rate": 0.0005, "loss": 2.1097, "step": 135740 }, { "epoch": 0.516698004765421, "grad_norm": 0.11887135356664658, "learning_rate": 0.0005, "loss": 2.1215, "step": 135750 }, { "epoch": 0.5167360672335437, "grad_norm": 0.13403892517089844, "learning_rate": 0.0005, "loss": 2.1169, "step": 135760 }, { "epoch": 0.5167741297016664, "grad_norm": 0.13431161642074585, "learning_rate": 0.0005, "loss": 2.1243, "step": 135770 }, { "epoch": 0.516812192169789, "grad_norm": 0.1349843144416809, "learning_rate": 0.0005, "loss": 2.1216, "step": 135780 }, { "epoch": 0.5168502546379118, "grad_norm": 0.26945772767066956, "learning_rate": 0.0005, "loss": 2.1284, "step": 135790 }, { "epoch": 0.5168883171060344, "grad_norm": 0.11636281758546829, "learning_rate": 0.0005, "loss": 2.1077, "step": 135800 }, { "epoch": 0.5169263795741571, "grad_norm": 0.11911586672067642, "learning_rate": 0.0005, "loss": 2.1258, "step": 135810 }, { "epoch": 0.5169644420422798, "grad_norm": 0.1230466291308403, "learning_rate": 0.0005, "loss": 2.1302, "step": 135820 }, { "epoch": 0.5170025045104025, "grad_norm": 0.11399305611848831, "learning_rate": 0.0005, "loss": 2.1099, "step": 135830 }, { "epoch": 0.5170405669785252, "grad_norm": 0.12976102530956268, "learning_rate": 0.0005, "loss": 2.1257, "step": 135840 }, { "epoch": 0.5170786294466478, "grad_norm": 0.122156523168087, "learning_rate": 0.0005, "loss": 2.1212, "step": 135850 }, { "epoch": 0.5171166919147705, "grad_norm": 0.11330889910459518, "learning_rate": 0.0005, "loss": 2.1239, "step": 135860 }, { "epoch": 0.5171547543828932, "grad_norm": 0.13504265248775482, "learning_rate": 0.0005, "loss": 2.1015, "step": 135870 }, { "epoch": 0.5171928168510159, "grad_norm": 0.1345272660255432, "learning_rate": 0.0005, "loss": 2.1232, "step": 135880 }, { "epoch": 0.5172308793191386, "grad_norm": 0.15253782272338867, "learning_rate": 0.0005, "loss": 2.1013, "step": 135890 }, { "epoch": 0.5172689417872612, "grad_norm": 0.11856013536453247, "learning_rate": 0.0005, "loss": 2.102, "step": 135900 }, { "epoch": 0.5173070042553839, "grad_norm": 0.11281318217515945, "learning_rate": 0.0005, "loss": 2.1126, "step": 135910 }, { "epoch": 0.5173450667235067, "grad_norm": 0.12678377330303192, "learning_rate": 0.0005, "loss": 2.1229, "step": 135920 }, { "epoch": 0.5173831291916293, "grad_norm": 0.11883621662855148, "learning_rate": 0.0005, "loss": 2.1121, "step": 135930 }, { "epoch": 0.517421191659752, "grad_norm": 0.13795502483844757, "learning_rate": 0.0005, "loss": 2.1193, "step": 135940 }, { "epoch": 0.5174592541278746, "grad_norm": 0.12521414458751678, "learning_rate": 0.0005, "loss": 2.1025, "step": 135950 }, { "epoch": 0.5174973165959974, "grad_norm": 0.1392013430595398, "learning_rate": 0.0005, "loss": 2.1232, "step": 135960 }, { "epoch": 0.5175353790641201, "grad_norm": 0.13591915369033813, "learning_rate": 0.0005, "loss": 2.1162, "step": 135970 }, { "epoch": 0.5175734415322427, "grad_norm": 0.13453233242034912, "learning_rate": 0.0005, "loss": 2.1248, "step": 135980 }, { "epoch": 0.5176115040003654, "grad_norm": 0.12628062069416046, "learning_rate": 0.0005, "loss": 2.0976, "step": 135990 }, { "epoch": 0.517649566468488, "grad_norm": 0.12293115258216858, "learning_rate": 0.0005, "loss": 2.1255, "step": 136000 }, { "epoch": 0.5176876289366108, "grad_norm": 0.1296631395816803, "learning_rate": 0.0005, "loss": 2.1208, "step": 136010 }, { "epoch": 0.5177256914047335, "grad_norm": 0.1257656365633011, "learning_rate": 0.0005, "loss": 2.105, "step": 136020 }, { "epoch": 0.5177637538728561, "grad_norm": 0.12673147022724152, "learning_rate": 0.0005, "loss": 2.1167, "step": 136030 }, { "epoch": 0.5178018163409788, "grad_norm": 0.13222235441207886, "learning_rate": 0.0005, "loss": 2.1096, "step": 136040 }, { "epoch": 0.5178398788091015, "grad_norm": 0.11780260503292084, "learning_rate": 0.0005, "loss": 2.1035, "step": 136050 }, { "epoch": 0.5178779412772242, "grad_norm": 0.1233372688293457, "learning_rate": 0.0005, "loss": 2.1087, "step": 136060 }, { "epoch": 0.5179160037453469, "grad_norm": 0.11666984856128693, "learning_rate": 0.0005, "loss": 2.1129, "step": 136070 }, { "epoch": 0.5179540662134695, "grad_norm": 0.11872230470180511, "learning_rate": 0.0005, "loss": 2.1111, "step": 136080 }, { "epoch": 0.5179921286815923, "grad_norm": 0.14317534863948822, "learning_rate": 0.0005, "loss": 2.1117, "step": 136090 }, { "epoch": 0.5180301911497149, "grad_norm": 0.11910288780927658, "learning_rate": 0.0005, "loss": 2.1301, "step": 136100 }, { "epoch": 0.5180682536178376, "grad_norm": 0.13141492009162903, "learning_rate": 0.0005, "loss": 2.1145, "step": 136110 }, { "epoch": 0.5181063160859603, "grad_norm": 0.11873306334018707, "learning_rate": 0.0005, "loss": 2.1049, "step": 136120 }, { "epoch": 0.5181443785540829, "grad_norm": 0.12319959700107574, "learning_rate": 0.0005, "loss": 2.124, "step": 136130 }, { "epoch": 0.5181824410222057, "grad_norm": 0.12672430276870728, "learning_rate": 0.0005, "loss": 2.12, "step": 136140 }, { "epoch": 0.5182205034903283, "grad_norm": 0.12143968045711517, "learning_rate": 0.0005, "loss": 2.1107, "step": 136150 }, { "epoch": 0.518258565958451, "grad_norm": 0.1358950138092041, "learning_rate": 0.0005, "loss": 2.1127, "step": 136160 }, { "epoch": 0.5182966284265736, "grad_norm": 0.11966950446367264, "learning_rate": 0.0005, "loss": 2.1173, "step": 136170 }, { "epoch": 0.5183346908946964, "grad_norm": 0.13092869520187378, "learning_rate": 0.0005, "loss": 2.1043, "step": 136180 }, { "epoch": 0.5183727533628191, "grad_norm": 0.13360098004341125, "learning_rate": 0.0005, "loss": 2.1085, "step": 136190 }, { "epoch": 0.5184108158309417, "grad_norm": 0.1463603377342224, "learning_rate": 0.0005, "loss": 2.1277, "step": 136200 }, { "epoch": 0.5184488782990644, "grad_norm": 0.12061507254838943, "learning_rate": 0.0005, "loss": 2.1105, "step": 136210 }, { "epoch": 0.5184869407671872, "grad_norm": 0.12189993262290955, "learning_rate": 0.0005, "loss": 2.118, "step": 136220 }, { "epoch": 0.5185250032353098, "grad_norm": 0.11584679037332535, "learning_rate": 0.0005, "loss": 2.1196, "step": 136230 }, { "epoch": 0.5185630657034325, "grad_norm": 0.12555517256259918, "learning_rate": 0.0005, "loss": 2.1181, "step": 136240 }, { "epoch": 0.5186011281715551, "grad_norm": 0.12317892163991928, "learning_rate": 0.0005, "loss": 2.1141, "step": 136250 }, { "epoch": 0.5186391906396779, "grad_norm": 0.12790797650814056, "learning_rate": 0.0005, "loss": 2.1247, "step": 136260 }, { "epoch": 0.5186772531078006, "grad_norm": 0.13499782979488373, "learning_rate": 0.0005, "loss": 2.1196, "step": 136270 }, { "epoch": 0.5187153155759232, "grad_norm": 0.13588197529315948, "learning_rate": 0.0005, "loss": 2.1288, "step": 136280 }, { "epoch": 0.5187533780440459, "grad_norm": 0.12234912067651749, "learning_rate": 0.0005, "loss": 2.1095, "step": 136290 }, { "epoch": 0.5187914405121685, "grad_norm": 0.1208055168390274, "learning_rate": 0.0005, "loss": 2.0943, "step": 136300 }, { "epoch": 0.5188295029802913, "grad_norm": 0.12459038197994232, "learning_rate": 0.0005, "loss": 2.1099, "step": 136310 }, { "epoch": 0.518867565448414, "grad_norm": 0.13378497958183289, "learning_rate": 0.0005, "loss": 2.1313, "step": 136320 }, { "epoch": 0.5189056279165366, "grad_norm": 0.13146470487117767, "learning_rate": 0.0005, "loss": 2.1272, "step": 136330 }, { "epoch": 0.5189436903846593, "grad_norm": 0.12453589588403702, "learning_rate": 0.0005, "loss": 2.1286, "step": 136340 }, { "epoch": 0.518981752852782, "grad_norm": 0.13514123857021332, "learning_rate": 0.0005, "loss": 2.1307, "step": 136350 }, { "epoch": 0.5190198153209047, "grad_norm": 0.12778690457344055, "learning_rate": 0.0005, "loss": 2.1151, "step": 136360 }, { "epoch": 0.5190578777890273, "grad_norm": 0.32647815346717834, "learning_rate": 0.0005, "loss": 2.1223, "step": 136370 }, { "epoch": 0.51909594025715, "grad_norm": 0.11649586260318756, "learning_rate": 0.0005, "loss": 2.1244, "step": 136380 }, { "epoch": 0.5191340027252728, "grad_norm": 0.11900041252374649, "learning_rate": 0.0005, "loss": 2.1166, "step": 136390 }, { "epoch": 0.5191720651933954, "grad_norm": 0.1257382035255432, "learning_rate": 0.0005, "loss": 2.1095, "step": 136400 }, { "epoch": 0.5192101276615181, "grad_norm": 0.12999407947063446, "learning_rate": 0.0005, "loss": 2.1113, "step": 136410 }, { "epoch": 0.5192481901296407, "grad_norm": 0.12291140109300613, "learning_rate": 0.0005, "loss": 2.1203, "step": 136420 }, { "epoch": 0.5192862525977634, "grad_norm": 0.12455683946609497, "learning_rate": 0.0005, "loss": 2.1232, "step": 136430 }, { "epoch": 0.5193243150658862, "grad_norm": 0.1276245266199112, "learning_rate": 0.0005, "loss": 2.0975, "step": 136440 }, { "epoch": 0.5193623775340088, "grad_norm": 0.11303102225065231, "learning_rate": 0.0005, "loss": 2.1002, "step": 136450 }, { "epoch": 0.5194004400021315, "grad_norm": 0.11451926827430725, "learning_rate": 0.0005, "loss": 2.103, "step": 136460 }, { "epoch": 0.5194385024702541, "grad_norm": 0.12597942352294922, "learning_rate": 0.0005, "loss": 2.099, "step": 136470 }, { "epoch": 0.5194765649383769, "grad_norm": 0.1587076187133789, "learning_rate": 0.0005, "loss": 2.1196, "step": 136480 }, { "epoch": 0.5195146274064996, "grad_norm": 0.12485882639884949, "learning_rate": 0.0005, "loss": 2.1067, "step": 136490 }, { "epoch": 0.5195526898746222, "grad_norm": 0.12216902524232864, "learning_rate": 0.0005, "loss": 2.1144, "step": 136500 }, { "epoch": 0.5195907523427449, "grad_norm": 0.14184367656707764, "learning_rate": 0.0005, "loss": 2.1265, "step": 136510 }, { "epoch": 0.5196288148108676, "grad_norm": 0.13335593044757843, "learning_rate": 0.0005, "loss": 2.1174, "step": 136520 }, { "epoch": 0.5196668772789903, "grad_norm": 0.12494330108165741, "learning_rate": 0.0005, "loss": 2.106, "step": 136530 }, { "epoch": 0.519704939747113, "grad_norm": 0.1236383244395256, "learning_rate": 0.0005, "loss": 2.1038, "step": 136540 }, { "epoch": 0.5197430022152356, "grad_norm": 0.12758508324623108, "learning_rate": 0.0005, "loss": 2.1136, "step": 136550 }, { "epoch": 0.5197810646833583, "grad_norm": 0.13176749646663666, "learning_rate": 0.0005, "loss": 2.1215, "step": 136560 }, { "epoch": 0.519819127151481, "grad_norm": 0.1244225725531578, "learning_rate": 0.0005, "loss": 2.1118, "step": 136570 }, { "epoch": 0.5198571896196037, "grad_norm": 0.1265987902879715, "learning_rate": 0.0005, "loss": 2.1072, "step": 136580 }, { "epoch": 0.5198952520877264, "grad_norm": 0.11397150158882141, "learning_rate": 0.0005, "loss": 2.1197, "step": 136590 }, { "epoch": 0.519933314555849, "grad_norm": 0.11553878337144852, "learning_rate": 0.0005, "loss": 2.1277, "step": 136600 }, { "epoch": 0.5199713770239718, "grad_norm": 0.11481893807649612, "learning_rate": 0.0005, "loss": 2.1212, "step": 136610 }, { "epoch": 0.5200094394920944, "grad_norm": 0.12730897963047028, "learning_rate": 0.0005, "loss": 2.0991, "step": 136620 }, { "epoch": 0.5200475019602171, "grad_norm": 0.1259535700082779, "learning_rate": 0.0005, "loss": 2.1263, "step": 136630 }, { "epoch": 0.5200855644283398, "grad_norm": 0.12053762376308441, "learning_rate": 0.0005, "loss": 2.1146, "step": 136640 }, { "epoch": 0.5201236268964625, "grad_norm": 0.13139915466308594, "learning_rate": 0.0005, "loss": 2.11, "step": 136650 }, { "epoch": 0.5201616893645852, "grad_norm": 0.11750254034996033, "learning_rate": 0.0005, "loss": 2.1084, "step": 136660 }, { "epoch": 0.5201997518327078, "grad_norm": 0.12227505445480347, "learning_rate": 0.0005, "loss": 2.1127, "step": 136670 }, { "epoch": 0.5202378143008305, "grad_norm": 0.1355392038822174, "learning_rate": 0.0005, "loss": 2.115, "step": 136680 }, { "epoch": 0.5202758767689533, "grad_norm": 0.13740362226963043, "learning_rate": 0.0005, "loss": 2.1141, "step": 136690 }, { "epoch": 0.5203139392370759, "grad_norm": 0.1517869085073471, "learning_rate": 0.0005, "loss": 2.11, "step": 136700 }, { "epoch": 0.5203520017051986, "grad_norm": 0.12694501876831055, "learning_rate": 0.0005, "loss": 2.1106, "step": 136710 }, { "epoch": 0.5203900641733212, "grad_norm": 0.11573896557092667, "learning_rate": 0.0005, "loss": 2.1017, "step": 136720 }, { "epoch": 0.5204281266414439, "grad_norm": 0.12164445966482162, "learning_rate": 0.0005, "loss": 2.1021, "step": 136730 }, { "epoch": 0.5204661891095667, "grad_norm": 0.12111037224531174, "learning_rate": 0.0005, "loss": 2.1117, "step": 136740 }, { "epoch": 0.5205042515776893, "grad_norm": 0.11075384169816971, "learning_rate": 0.0005, "loss": 2.0966, "step": 136750 }, { "epoch": 0.520542314045812, "grad_norm": 0.13149183988571167, "learning_rate": 0.0005, "loss": 2.1119, "step": 136760 }, { "epoch": 0.5205803765139346, "grad_norm": 0.1264672875404358, "learning_rate": 0.0005, "loss": 2.1253, "step": 136770 }, { "epoch": 0.5206184389820574, "grad_norm": 0.13758137822151184, "learning_rate": 0.0005, "loss": 2.1095, "step": 136780 }, { "epoch": 0.52065650145018, "grad_norm": 0.11698931455612183, "learning_rate": 0.0005, "loss": 2.1182, "step": 136790 }, { "epoch": 0.5206945639183027, "grad_norm": 0.131836399435997, "learning_rate": 0.0005, "loss": 2.1144, "step": 136800 }, { "epoch": 0.5207326263864254, "grad_norm": 0.12469332665205002, "learning_rate": 0.0005, "loss": 2.1098, "step": 136810 }, { "epoch": 0.5207706888545481, "grad_norm": 0.13206984102725983, "learning_rate": 0.0005, "loss": 2.1103, "step": 136820 }, { "epoch": 0.5208087513226708, "grad_norm": 0.12733477354049683, "learning_rate": 0.0005, "loss": 2.1168, "step": 136830 }, { "epoch": 0.5208468137907935, "grad_norm": 0.12322621047496796, "learning_rate": 0.0005, "loss": 2.111, "step": 136840 }, { "epoch": 0.5208848762589161, "grad_norm": 0.13477306067943573, "learning_rate": 0.0005, "loss": 2.1268, "step": 136850 }, { "epoch": 0.5209229387270388, "grad_norm": 0.11726487427949905, "learning_rate": 0.0005, "loss": 2.117, "step": 136860 }, { "epoch": 0.5209610011951615, "grad_norm": 0.13089509308338165, "learning_rate": 0.0005, "loss": 2.1126, "step": 136870 }, { "epoch": 0.5209990636632842, "grad_norm": 0.13881553709506989, "learning_rate": 0.0005, "loss": 2.1164, "step": 136880 }, { "epoch": 0.5210371261314068, "grad_norm": 0.1210898607969284, "learning_rate": 0.0005, "loss": 2.1209, "step": 136890 }, { "epoch": 0.5210751885995295, "grad_norm": 0.12279921770095825, "learning_rate": 0.0005, "loss": 2.1111, "step": 136900 }, { "epoch": 0.5211132510676523, "grad_norm": 0.12662425637245178, "learning_rate": 0.0005, "loss": 2.1084, "step": 136910 }, { "epoch": 0.5211513135357749, "grad_norm": 0.11807191371917725, "learning_rate": 0.0005, "loss": 2.1044, "step": 136920 }, { "epoch": 0.5211893760038976, "grad_norm": 0.12020532786846161, "learning_rate": 0.0005, "loss": 2.1259, "step": 136930 }, { "epoch": 0.5212274384720202, "grad_norm": 0.12616896629333496, "learning_rate": 0.0005, "loss": 2.1267, "step": 136940 }, { "epoch": 0.521265500940143, "grad_norm": 0.13148759305477142, "learning_rate": 0.0005, "loss": 2.1093, "step": 136950 }, { "epoch": 0.5213035634082657, "grad_norm": 0.1483425796031952, "learning_rate": 0.0005, "loss": 2.1237, "step": 136960 }, { "epoch": 0.5213416258763883, "grad_norm": 0.1538805514574051, "learning_rate": 0.0005, "loss": 2.1128, "step": 136970 }, { "epoch": 0.521379688344511, "grad_norm": 0.11839305609464645, "learning_rate": 0.0005, "loss": 2.1133, "step": 136980 }, { "epoch": 0.5214177508126336, "grad_norm": 0.12727127969264984, "learning_rate": 0.0005, "loss": 2.1273, "step": 136990 }, { "epoch": 0.5214558132807564, "grad_norm": 0.12757115066051483, "learning_rate": 0.0005, "loss": 2.1073, "step": 137000 }, { "epoch": 0.5214938757488791, "grad_norm": 0.1204015463590622, "learning_rate": 0.0005, "loss": 2.1208, "step": 137010 }, { "epoch": 0.5215319382170017, "grad_norm": 0.14261414110660553, "learning_rate": 0.0005, "loss": 2.1024, "step": 137020 }, { "epoch": 0.5215700006851244, "grad_norm": 0.14065109193325043, "learning_rate": 0.0005, "loss": 2.1227, "step": 137030 }, { "epoch": 0.5216080631532471, "grad_norm": 0.13176940381526947, "learning_rate": 0.0005, "loss": 2.1079, "step": 137040 }, { "epoch": 0.5216461256213698, "grad_norm": 0.14611373841762543, "learning_rate": 0.0005, "loss": 2.1129, "step": 137050 }, { "epoch": 0.5216841880894925, "grad_norm": 0.1353708803653717, "learning_rate": 0.0005, "loss": 2.1086, "step": 137060 }, { "epoch": 0.5217222505576151, "grad_norm": 0.12236816436052322, "learning_rate": 0.0005, "loss": 2.1099, "step": 137070 }, { "epoch": 0.5217603130257379, "grad_norm": 0.12845854461193085, "learning_rate": 0.0005, "loss": 2.0969, "step": 137080 }, { "epoch": 0.5217983754938605, "grad_norm": 0.13327331840991974, "learning_rate": 0.0005, "loss": 2.1261, "step": 137090 }, { "epoch": 0.5218364379619832, "grad_norm": 0.1174556240439415, "learning_rate": 0.0005, "loss": 2.1184, "step": 137100 }, { "epoch": 0.5218745004301059, "grad_norm": 0.11884109675884247, "learning_rate": 0.0005, "loss": 2.1072, "step": 137110 }, { "epoch": 0.5219125628982286, "grad_norm": 0.13140694797039032, "learning_rate": 0.0005, "loss": 2.1085, "step": 137120 }, { "epoch": 0.5219506253663513, "grad_norm": 0.1556568294763565, "learning_rate": 0.0005, "loss": 2.1259, "step": 137130 }, { "epoch": 0.5219886878344739, "grad_norm": 0.14347811043262482, "learning_rate": 0.0005, "loss": 2.1102, "step": 137140 }, { "epoch": 0.5220267503025966, "grad_norm": 0.1311759650707245, "learning_rate": 0.0005, "loss": 2.1261, "step": 137150 }, { "epoch": 0.5220648127707193, "grad_norm": 0.12397903949022293, "learning_rate": 0.0005, "loss": 2.1478, "step": 137160 }, { "epoch": 0.522102875238842, "grad_norm": 0.12823760509490967, "learning_rate": 0.0005, "loss": 2.1203, "step": 137170 }, { "epoch": 0.5221409377069647, "grad_norm": 0.12383680045604706, "learning_rate": 0.0005, "loss": 2.0997, "step": 137180 }, { "epoch": 0.5221790001750873, "grad_norm": 0.12665867805480957, "learning_rate": 0.0005, "loss": 2.1123, "step": 137190 }, { "epoch": 0.52221706264321, "grad_norm": 0.11916995048522949, "learning_rate": 0.0005, "loss": 2.1323, "step": 137200 }, { "epoch": 0.5222551251113328, "grad_norm": 0.12199389934539795, "learning_rate": 0.0005, "loss": 2.1025, "step": 137210 }, { "epoch": 0.5222931875794554, "grad_norm": 0.13059760630130768, "learning_rate": 0.0005, "loss": 2.1094, "step": 137220 }, { "epoch": 0.5223312500475781, "grad_norm": 0.12276890873908997, "learning_rate": 0.0005, "loss": 2.1166, "step": 137230 }, { "epoch": 0.5223693125157007, "grad_norm": 0.12334048002958298, "learning_rate": 0.0005, "loss": 2.126, "step": 137240 }, { "epoch": 0.5224073749838235, "grad_norm": 0.13500618934631348, "learning_rate": 0.0005, "loss": 2.1048, "step": 137250 }, { "epoch": 0.5224454374519462, "grad_norm": 0.13345398008823395, "learning_rate": 0.0005, "loss": 2.106, "step": 137260 }, { "epoch": 0.5224834999200688, "grad_norm": 0.12334723025560379, "learning_rate": 0.0005, "loss": 2.1147, "step": 137270 }, { "epoch": 0.5225215623881915, "grad_norm": 0.13009530305862427, "learning_rate": 0.0005, "loss": 2.1022, "step": 137280 }, { "epoch": 0.5225596248563141, "grad_norm": 0.13310536742210388, "learning_rate": 0.0005, "loss": 2.1054, "step": 137290 }, { "epoch": 0.5225976873244369, "grad_norm": 0.11736752837896347, "learning_rate": 0.0005, "loss": 2.114, "step": 137300 }, { "epoch": 0.5226357497925596, "grad_norm": 0.12839005887508392, "learning_rate": 0.0005, "loss": 2.1205, "step": 137310 }, { "epoch": 0.5226738122606822, "grad_norm": 0.11895774304866791, "learning_rate": 0.0005, "loss": 2.1199, "step": 137320 }, { "epoch": 0.5227118747288049, "grad_norm": 0.14122259616851807, "learning_rate": 0.0005, "loss": 2.1047, "step": 137330 }, { "epoch": 0.5227499371969276, "grad_norm": 0.12385997921228409, "learning_rate": 0.0005, "loss": 2.1311, "step": 137340 }, { "epoch": 0.5227879996650503, "grad_norm": 0.12733817100524902, "learning_rate": 0.0005, "loss": 2.1134, "step": 137350 }, { "epoch": 0.522826062133173, "grad_norm": 0.12660536170005798, "learning_rate": 0.0005, "loss": 2.0942, "step": 137360 }, { "epoch": 0.5228641246012956, "grad_norm": 0.12909385561943054, "learning_rate": 0.0005, "loss": 2.1072, "step": 137370 }, { "epoch": 0.5229021870694184, "grad_norm": 0.12075277417898178, "learning_rate": 0.0005, "loss": 2.1052, "step": 137380 }, { "epoch": 0.522940249537541, "grad_norm": 0.1270180195569992, "learning_rate": 0.0005, "loss": 2.1077, "step": 137390 }, { "epoch": 0.5229783120056637, "grad_norm": 0.13144104182720184, "learning_rate": 0.0005, "loss": 2.1127, "step": 137400 }, { "epoch": 0.5230163744737863, "grad_norm": 0.12336520850658417, "learning_rate": 0.0005, "loss": 2.1222, "step": 137410 }, { "epoch": 0.523054436941909, "grad_norm": 0.135188028216362, "learning_rate": 0.0005, "loss": 2.1218, "step": 137420 }, { "epoch": 0.5230924994100318, "grad_norm": 0.13050761818885803, "learning_rate": 0.0005, "loss": 2.1161, "step": 137430 }, { "epoch": 0.5231305618781544, "grad_norm": 0.12770605087280273, "learning_rate": 0.0005, "loss": 2.1311, "step": 137440 }, { "epoch": 0.5231686243462771, "grad_norm": 0.15316686034202576, "learning_rate": 0.0005, "loss": 2.1182, "step": 137450 }, { "epoch": 0.5232066868143997, "grad_norm": 0.13354888558387756, "learning_rate": 0.0005, "loss": 2.1159, "step": 137460 }, { "epoch": 0.5232447492825225, "grad_norm": 0.13278037309646606, "learning_rate": 0.0005, "loss": 2.1089, "step": 137470 }, { "epoch": 0.5232828117506452, "grad_norm": 0.1274474710226059, "learning_rate": 0.0005, "loss": 2.12, "step": 137480 }, { "epoch": 0.5233208742187678, "grad_norm": 0.12818734347820282, "learning_rate": 0.0005, "loss": 2.1066, "step": 137490 }, { "epoch": 0.5233589366868905, "grad_norm": 0.12248261272907257, "learning_rate": 0.0005, "loss": 2.1187, "step": 137500 }, { "epoch": 0.5233969991550133, "grad_norm": 0.12802907824516296, "learning_rate": 0.0005, "loss": 2.1022, "step": 137510 }, { "epoch": 0.5234350616231359, "grad_norm": 0.1282433569431305, "learning_rate": 0.0005, "loss": 2.1059, "step": 137520 }, { "epoch": 0.5234731240912586, "grad_norm": 0.12959887087345123, "learning_rate": 0.0005, "loss": 2.108, "step": 137530 }, { "epoch": 0.5235111865593812, "grad_norm": 0.12287507951259613, "learning_rate": 0.0005, "loss": 2.1226, "step": 137540 }, { "epoch": 0.523549249027504, "grad_norm": 0.1263885498046875, "learning_rate": 0.0005, "loss": 2.1137, "step": 137550 }, { "epoch": 0.5235873114956267, "grad_norm": 0.13547179102897644, "learning_rate": 0.0005, "loss": 2.1175, "step": 137560 }, { "epoch": 0.5236253739637493, "grad_norm": 0.11977437883615494, "learning_rate": 0.0005, "loss": 2.123, "step": 137570 }, { "epoch": 0.523663436431872, "grad_norm": 0.12466085702180862, "learning_rate": 0.0005, "loss": 2.1182, "step": 137580 }, { "epoch": 0.5237014988999946, "grad_norm": 0.12097606062889099, "learning_rate": 0.0005, "loss": 2.1183, "step": 137590 }, { "epoch": 0.5237395613681174, "grad_norm": 0.12236282974481583, "learning_rate": 0.0005, "loss": 2.1206, "step": 137600 }, { "epoch": 0.52377762383624, "grad_norm": 0.1352507621049881, "learning_rate": 0.0005, "loss": 2.1281, "step": 137610 }, { "epoch": 0.5238156863043627, "grad_norm": 0.13352009654045105, "learning_rate": 0.0005, "loss": 2.0936, "step": 137620 }, { "epoch": 0.5238537487724854, "grad_norm": 0.1264696568250656, "learning_rate": 0.0005, "loss": 2.1214, "step": 137630 }, { "epoch": 0.5238918112406081, "grad_norm": 0.11878474056720734, "learning_rate": 0.0005, "loss": 2.1168, "step": 137640 }, { "epoch": 0.5239298737087308, "grad_norm": 0.11161357909440994, "learning_rate": 0.0005, "loss": 2.1154, "step": 137650 }, { "epoch": 0.5239679361768534, "grad_norm": 0.12533272802829742, "learning_rate": 0.0005, "loss": 2.1161, "step": 137660 }, { "epoch": 0.5240059986449761, "grad_norm": 0.13127167522907257, "learning_rate": 0.0005, "loss": 2.1029, "step": 137670 }, { "epoch": 0.5240440611130989, "grad_norm": 0.15212424099445343, "learning_rate": 0.0005, "loss": 2.1001, "step": 137680 }, { "epoch": 0.5240821235812215, "grad_norm": 0.14096777141094208, "learning_rate": 0.0005, "loss": 2.116, "step": 137690 }, { "epoch": 0.5241201860493442, "grad_norm": 0.13191179931163788, "learning_rate": 0.0005, "loss": 2.1092, "step": 137700 }, { "epoch": 0.5241582485174668, "grad_norm": 0.1232769638299942, "learning_rate": 0.0005, "loss": 2.1184, "step": 137710 }, { "epoch": 0.5241963109855895, "grad_norm": 0.13931381702423096, "learning_rate": 0.0005, "loss": 2.1235, "step": 137720 }, { "epoch": 0.5242343734537123, "grad_norm": 0.12155848741531372, "learning_rate": 0.0005, "loss": 2.1078, "step": 137730 }, { "epoch": 0.5242724359218349, "grad_norm": 0.12789209187030792, "learning_rate": 0.0005, "loss": 2.125, "step": 137740 }, { "epoch": 0.5243104983899576, "grad_norm": 0.12094086408615112, "learning_rate": 0.0005, "loss": 2.1139, "step": 137750 }, { "epoch": 0.5243485608580802, "grad_norm": 0.13228756189346313, "learning_rate": 0.0005, "loss": 2.1033, "step": 137760 }, { "epoch": 0.524386623326203, "grad_norm": 0.12024088203907013, "learning_rate": 0.0005, "loss": 2.1167, "step": 137770 }, { "epoch": 0.5244246857943257, "grad_norm": 0.1733071357011795, "learning_rate": 0.0005, "loss": 2.1145, "step": 137780 }, { "epoch": 0.5244627482624483, "grad_norm": 0.22339996695518494, "learning_rate": 0.0005, "loss": 2.1158, "step": 137790 }, { "epoch": 0.524500810730571, "grad_norm": 0.12036958336830139, "learning_rate": 0.0005, "loss": 2.1118, "step": 137800 }, { "epoch": 0.5245388731986937, "grad_norm": 0.12528793513774872, "learning_rate": 0.0005, "loss": 2.12, "step": 137810 }, { "epoch": 0.5245769356668164, "grad_norm": 0.11740902066230774, "learning_rate": 0.0005, "loss": 2.1105, "step": 137820 }, { "epoch": 0.5246149981349391, "grad_norm": 0.13410566747188568, "learning_rate": 0.0005, "loss": 2.11, "step": 137830 }, { "epoch": 0.5246530606030617, "grad_norm": 0.11765021085739136, "learning_rate": 0.0005, "loss": 2.0992, "step": 137840 }, { "epoch": 0.5246911230711844, "grad_norm": 0.11986323446035385, "learning_rate": 0.0005, "loss": 2.13, "step": 137850 }, { "epoch": 0.5247291855393071, "grad_norm": 0.1241866871714592, "learning_rate": 0.0005, "loss": 2.1222, "step": 137860 }, { "epoch": 0.5247672480074298, "grad_norm": 0.13713204860687256, "learning_rate": 0.0005, "loss": 2.1243, "step": 137870 }, { "epoch": 0.5248053104755525, "grad_norm": 0.127635657787323, "learning_rate": 0.0005, "loss": 2.1107, "step": 137880 }, { "epoch": 0.5248433729436751, "grad_norm": 0.11746551096439362, "learning_rate": 0.0005, "loss": 2.1172, "step": 137890 }, { "epoch": 0.5248814354117979, "grad_norm": 0.1233421340584755, "learning_rate": 0.0005, "loss": 2.1019, "step": 137900 }, { "epoch": 0.5249194978799205, "grad_norm": 0.12225458770990372, "learning_rate": 0.0005, "loss": 2.1075, "step": 137910 }, { "epoch": 0.5249575603480432, "grad_norm": 0.14724929630756378, "learning_rate": 0.0005, "loss": 2.1251, "step": 137920 }, { "epoch": 0.5249956228161659, "grad_norm": 0.1190890446305275, "learning_rate": 0.0005, "loss": 2.1055, "step": 137930 }, { "epoch": 0.5250336852842886, "grad_norm": 0.12628717720508575, "learning_rate": 0.0005, "loss": 2.1154, "step": 137940 }, { "epoch": 0.5250717477524113, "grad_norm": 0.11910507082939148, "learning_rate": 0.0005, "loss": 2.1214, "step": 137950 }, { "epoch": 0.5251098102205339, "grad_norm": 0.11877349019050598, "learning_rate": 0.0005, "loss": 2.114, "step": 137960 }, { "epoch": 0.5251478726886566, "grad_norm": 0.1267973929643631, "learning_rate": 0.0005, "loss": 2.1132, "step": 137970 }, { "epoch": 0.5251859351567794, "grad_norm": 0.1362115740776062, "learning_rate": 0.0005, "loss": 2.1181, "step": 137980 }, { "epoch": 0.525223997624902, "grad_norm": 0.12277247756719589, "learning_rate": 0.0005, "loss": 2.1275, "step": 137990 }, { "epoch": 0.5252620600930247, "grad_norm": 0.12753120064735413, "learning_rate": 0.0005, "loss": 2.1009, "step": 138000 }, { "epoch": 0.5253001225611473, "grad_norm": 0.12616737186908722, "learning_rate": 0.0005, "loss": 2.1141, "step": 138010 }, { "epoch": 0.52533818502927, "grad_norm": 0.11854333430528641, "learning_rate": 0.0005, "loss": 2.112, "step": 138020 }, { "epoch": 0.5253762474973928, "grad_norm": 0.12479977309703827, "learning_rate": 0.0005, "loss": 2.1175, "step": 138030 }, { "epoch": 0.5254143099655154, "grad_norm": 0.13225720822811127, "learning_rate": 0.0005, "loss": 2.1361, "step": 138040 }, { "epoch": 0.5254523724336381, "grad_norm": 0.13469231128692627, "learning_rate": 0.0005, "loss": 2.109, "step": 138050 }, { "epoch": 0.5254904349017607, "grad_norm": 0.1489909440279007, "learning_rate": 0.0005, "loss": 2.1134, "step": 138060 }, { "epoch": 0.5255284973698835, "grad_norm": 0.12749110162258148, "learning_rate": 0.0005, "loss": 2.1122, "step": 138070 }, { "epoch": 0.5255665598380062, "grad_norm": 0.13680586218833923, "learning_rate": 0.0005, "loss": 2.1075, "step": 138080 }, { "epoch": 0.5256046223061288, "grad_norm": 0.12244150042533875, "learning_rate": 0.0005, "loss": 2.116, "step": 138090 }, { "epoch": 0.5256426847742515, "grad_norm": 0.12733541429042816, "learning_rate": 0.0005, "loss": 2.1273, "step": 138100 }, { "epoch": 0.5256807472423742, "grad_norm": 0.11963535845279694, "learning_rate": 0.0005, "loss": 2.1231, "step": 138110 }, { "epoch": 0.5257188097104969, "grad_norm": 0.12353172153234482, "learning_rate": 0.0005, "loss": 2.1064, "step": 138120 }, { "epoch": 0.5257568721786195, "grad_norm": 0.11347753554582596, "learning_rate": 0.0005, "loss": 2.1299, "step": 138130 }, { "epoch": 0.5257949346467422, "grad_norm": 0.12878623604774475, "learning_rate": 0.0005, "loss": 2.1166, "step": 138140 }, { "epoch": 0.5258329971148649, "grad_norm": 0.14339739084243774, "learning_rate": 0.0005, "loss": 2.1172, "step": 138150 }, { "epoch": 0.5258710595829876, "grad_norm": 0.12231696397066116, "learning_rate": 0.0005, "loss": 2.1143, "step": 138160 }, { "epoch": 0.5259091220511103, "grad_norm": 0.12762510776519775, "learning_rate": 0.0005, "loss": 2.113, "step": 138170 }, { "epoch": 0.525947184519233, "grad_norm": 0.11546134203672409, "learning_rate": 0.0005, "loss": 2.1309, "step": 138180 }, { "epoch": 0.5259852469873556, "grad_norm": 0.11776210367679596, "learning_rate": 0.0005, "loss": 2.122, "step": 138190 }, { "epoch": 0.5260233094554784, "grad_norm": 0.13029833137989044, "learning_rate": 0.0005, "loss": 2.1094, "step": 138200 }, { "epoch": 0.526061371923601, "grad_norm": 0.12698499858379364, "learning_rate": 0.0005, "loss": 2.1189, "step": 138210 }, { "epoch": 0.5260994343917237, "grad_norm": 0.12059946358203888, "learning_rate": 0.0005, "loss": 2.1254, "step": 138220 }, { "epoch": 0.5261374968598463, "grad_norm": 0.13680903613567352, "learning_rate": 0.0005, "loss": 2.116, "step": 138230 }, { "epoch": 0.5261755593279691, "grad_norm": 0.12311997264623642, "learning_rate": 0.0005, "loss": 2.1278, "step": 138240 }, { "epoch": 0.5262136217960918, "grad_norm": 0.13110458850860596, "learning_rate": 0.0005, "loss": 2.1118, "step": 138250 }, { "epoch": 0.5262516842642144, "grad_norm": 0.12085889279842377, "learning_rate": 0.0005, "loss": 2.1138, "step": 138260 }, { "epoch": 0.5262897467323371, "grad_norm": 0.1268099844455719, "learning_rate": 0.0005, "loss": 2.1183, "step": 138270 }, { "epoch": 0.5263278092004597, "grad_norm": 0.1265743523836136, "learning_rate": 0.0005, "loss": 2.0959, "step": 138280 }, { "epoch": 0.5263658716685825, "grad_norm": 0.12535488605499268, "learning_rate": 0.0005, "loss": 2.125, "step": 138290 }, { "epoch": 0.5264039341367052, "grad_norm": 0.12558478116989136, "learning_rate": 0.0005, "loss": 2.1181, "step": 138300 }, { "epoch": 0.5264419966048278, "grad_norm": 0.13227242231369019, "learning_rate": 0.0005, "loss": 2.1124, "step": 138310 }, { "epoch": 0.5264800590729505, "grad_norm": 0.12063269317150116, "learning_rate": 0.0005, "loss": 2.1199, "step": 138320 }, { "epoch": 0.5265181215410732, "grad_norm": 0.13169559836387634, "learning_rate": 0.0005, "loss": 2.1037, "step": 138330 }, { "epoch": 0.5265561840091959, "grad_norm": 0.1179480329155922, "learning_rate": 0.0005, "loss": 2.0988, "step": 138340 }, { "epoch": 0.5265942464773186, "grad_norm": 0.12810704112052917, "learning_rate": 0.0005, "loss": 2.094, "step": 138350 }, { "epoch": 0.5266323089454412, "grad_norm": 0.13122254610061646, "learning_rate": 0.0005, "loss": 2.1095, "step": 138360 }, { "epoch": 0.526670371413564, "grad_norm": 0.13543862104415894, "learning_rate": 0.0005, "loss": 2.1272, "step": 138370 }, { "epoch": 0.5267084338816866, "grad_norm": 0.12366582453250885, "learning_rate": 0.0005, "loss": 2.1161, "step": 138380 }, { "epoch": 0.5267464963498093, "grad_norm": 0.13607007265090942, "learning_rate": 0.0005, "loss": 2.1155, "step": 138390 }, { "epoch": 0.526784558817932, "grad_norm": 0.1335546374320984, "learning_rate": 0.0005, "loss": 2.1113, "step": 138400 }, { "epoch": 0.5268226212860547, "grad_norm": 0.12716715037822723, "learning_rate": 0.0005, "loss": 2.1203, "step": 138410 }, { "epoch": 0.5268606837541774, "grad_norm": 0.1355588138103485, "learning_rate": 0.0005, "loss": 2.1045, "step": 138420 }, { "epoch": 0.5268987462223, "grad_norm": 0.11754409968852997, "learning_rate": 0.0005, "loss": 2.1172, "step": 138430 }, { "epoch": 0.5269368086904227, "grad_norm": 0.1135902926325798, "learning_rate": 0.0005, "loss": 2.1108, "step": 138440 }, { "epoch": 0.5269748711585454, "grad_norm": 0.12047741562128067, "learning_rate": 0.0005, "loss": 2.1175, "step": 138450 }, { "epoch": 0.5270129336266681, "grad_norm": 0.12416040897369385, "learning_rate": 0.0005, "loss": 2.1306, "step": 138460 }, { "epoch": 0.5270509960947908, "grad_norm": 0.15139645338058472, "learning_rate": 0.0005, "loss": 2.1226, "step": 138470 }, { "epoch": 0.5270890585629134, "grad_norm": 0.12424907088279724, "learning_rate": 0.0005, "loss": 2.1429, "step": 138480 }, { "epoch": 0.5271271210310361, "grad_norm": 0.1323985457420349, "learning_rate": 0.0005, "loss": 2.0967, "step": 138490 }, { "epoch": 0.5271651834991589, "grad_norm": 0.12402381002902985, "learning_rate": 0.0005, "loss": 2.1114, "step": 138500 }, { "epoch": 0.5272032459672815, "grad_norm": 0.11723242700099945, "learning_rate": 0.0005, "loss": 2.1143, "step": 138510 }, { "epoch": 0.5272413084354042, "grad_norm": 0.12278267741203308, "learning_rate": 0.0005, "loss": 2.1209, "step": 138520 }, { "epoch": 0.5272793709035268, "grad_norm": 0.13405431807041168, "learning_rate": 0.0005, "loss": 2.0984, "step": 138530 }, { "epoch": 0.5273174333716496, "grad_norm": 0.12649041414260864, "learning_rate": 0.0005, "loss": 2.1217, "step": 138540 }, { "epoch": 0.5273554958397723, "grad_norm": 0.13687849044799805, "learning_rate": 0.0005, "loss": 2.1081, "step": 138550 }, { "epoch": 0.5273935583078949, "grad_norm": 0.12157398462295532, "learning_rate": 0.0005, "loss": 2.1139, "step": 138560 }, { "epoch": 0.5274316207760176, "grad_norm": 0.1232946366071701, "learning_rate": 0.0005, "loss": 2.1291, "step": 138570 }, { "epoch": 0.5274696832441402, "grad_norm": 0.12800607085227966, "learning_rate": 0.0005, "loss": 2.1227, "step": 138580 }, { "epoch": 0.527507745712263, "grad_norm": 0.14093177020549774, "learning_rate": 0.0005, "loss": 2.1203, "step": 138590 }, { "epoch": 0.5275458081803857, "grad_norm": 0.13657447695732117, "learning_rate": 0.0005, "loss": 2.1152, "step": 138600 }, { "epoch": 0.5275838706485083, "grad_norm": 0.12304209172725677, "learning_rate": 0.0005, "loss": 2.1129, "step": 138610 }, { "epoch": 0.527621933116631, "grad_norm": 0.11970432847738266, "learning_rate": 0.0005, "loss": 2.1126, "step": 138620 }, { "epoch": 0.5276599955847537, "grad_norm": 0.12269636243581772, "learning_rate": 0.0005, "loss": 2.102, "step": 138630 }, { "epoch": 0.5276980580528764, "grad_norm": 0.1293053925037384, "learning_rate": 0.0005, "loss": 2.1085, "step": 138640 }, { "epoch": 0.527736120520999, "grad_norm": 0.11836563050746918, "learning_rate": 0.0005, "loss": 2.1003, "step": 138650 }, { "epoch": 0.5277741829891217, "grad_norm": 0.12414873391389847, "learning_rate": 0.0005, "loss": 2.1261, "step": 138660 }, { "epoch": 0.5278122454572445, "grad_norm": 0.14319206774234772, "learning_rate": 0.0005, "loss": 2.0954, "step": 138670 }, { "epoch": 0.5278503079253671, "grad_norm": 0.13531246781349182, "learning_rate": 0.0005, "loss": 2.1239, "step": 138680 }, { "epoch": 0.5278883703934898, "grad_norm": 0.13123445212841034, "learning_rate": 0.0005, "loss": 2.1075, "step": 138690 }, { "epoch": 0.5279264328616124, "grad_norm": 0.11836890876293182, "learning_rate": 0.0005, "loss": 2.1068, "step": 138700 }, { "epoch": 0.5279644953297351, "grad_norm": 0.11703275889158249, "learning_rate": 0.0005, "loss": 2.11, "step": 138710 }, { "epoch": 0.5280025577978579, "grad_norm": 0.12264472991228104, "learning_rate": 0.0005, "loss": 2.1181, "step": 138720 }, { "epoch": 0.5280406202659805, "grad_norm": 0.12100914865732193, "learning_rate": 0.0005, "loss": 2.1205, "step": 138730 }, { "epoch": 0.5280786827341032, "grad_norm": 0.12464544177055359, "learning_rate": 0.0005, "loss": 2.1273, "step": 138740 }, { "epoch": 0.5281167452022258, "grad_norm": 0.13402172923088074, "learning_rate": 0.0005, "loss": 2.0946, "step": 138750 }, { "epoch": 0.5281548076703486, "grad_norm": 0.13545958697795868, "learning_rate": 0.0005, "loss": 2.1081, "step": 138760 }, { "epoch": 0.5281928701384713, "grad_norm": 0.11684264242649078, "learning_rate": 0.0005, "loss": 2.1163, "step": 138770 }, { "epoch": 0.5282309326065939, "grad_norm": 0.15134216845035553, "learning_rate": 0.0005, "loss": 2.1239, "step": 138780 }, { "epoch": 0.5282689950747166, "grad_norm": 0.12242384999990463, "learning_rate": 0.0005, "loss": 2.0945, "step": 138790 }, { "epoch": 0.5283070575428394, "grad_norm": 0.12735465168952942, "learning_rate": 0.0005, "loss": 2.1126, "step": 138800 }, { "epoch": 0.528345120010962, "grad_norm": 0.12246635556221008, "learning_rate": 0.0005, "loss": 2.1159, "step": 138810 }, { "epoch": 0.5283831824790847, "grad_norm": 0.1216377466917038, "learning_rate": 0.0005, "loss": 2.1046, "step": 138820 }, { "epoch": 0.5284212449472073, "grad_norm": 0.1245758906006813, "learning_rate": 0.0005, "loss": 2.1069, "step": 138830 }, { "epoch": 0.5284593074153301, "grad_norm": 0.13611829280853271, "learning_rate": 0.0005, "loss": 2.0995, "step": 138840 }, { "epoch": 0.5284973698834528, "grad_norm": 0.12166754901409149, "learning_rate": 0.0005, "loss": 2.1083, "step": 138850 }, { "epoch": 0.5285354323515754, "grad_norm": 0.12426643818616867, "learning_rate": 0.0005, "loss": 2.1277, "step": 138860 }, { "epoch": 0.5285734948196981, "grad_norm": 0.15440742671489716, "learning_rate": 0.0005, "loss": 2.1187, "step": 138870 }, { "epoch": 0.5286115572878207, "grad_norm": 0.11902732402086258, "learning_rate": 0.0005, "loss": 2.1166, "step": 138880 }, { "epoch": 0.5286496197559435, "grad_norm": 0.1373627632856369, "learning_rate": 0.0005, "loss": 2.1098, "step": 138890 }, { "epoch": 0.5286876822240661, "grad_norm": 0.1300365924835205, "learning_rate": 0.0005, "loss": 2.1154, "step": 138900 }, { "epoch": 0.5287257446921888, "grad_norm": 0.1294863224029541, "learning_rate": 0.0005, "loss": 2.1266, "step": 138910 }, { "epoch": 0.5287638071603115, "grad_norm": 0.12608158588409424, "learning_rate": 0.0005, "loss": 2.1179, "step": 138920 }, { "epoch": 0.5288018696284342, "grad_norm": 0.12034744024276733, "learning_rate": 0.0005, "loss": 2.1269, "step": 138930 }, { "epoch": 0.5288399320965569, "grad_norm": 0.11805101484060287, "learning_rate": 0.0005, "loss": 2.1157, "step": 138940 }, { "epoch": 0.5288779945646795, "grad_norm": 0.12345664203166962, "learning_rate": 0.0005, "loss": 2.1091, "step": 138950 }, { "epoch": 0.5289160570328022, "grad_norm": 0.1263914555311203, "learning_rate": 0.0005, "loss": 2.1201, "step": 138960 }, { "epoch": 0.528954119500925, "grad_norm": 0.11450544744729996, "learning_rate": 0.0005, "loss": 2.1254, "step": 138970 }, { "epoch": 0.5289921819690476, "grad_norm": 0.125608429312706, "learning_rate": 0.0005, "loss": 2.1188, "step": 138980 }, { "epoch": 0.5290302444371703, "grad_norm": 0.12446246296167374, "learning_rate": 0.0005, "loss": 2.1177, "step": 138990 }, { "epoch": 0.5290683069052929, "grad_norm": 0.13462740182876587, "learning_rate": 0.0005, "loss": 2.1078, "step": 139000 }, { "epoch": 0.5291063693734156, "grad_norm": 0.14255592226982117, "learning_rate": 0.0005, "loss": 2.1088, "step": 139010 }, { "epoch": 0.5291444318415384, "grad_norm": 0.11985404044389725, "learning_rate": 0.0005, "loss": 2.1204, "step": 139020 }, { "epoch": 0.529182494309661, "grad_norm": 0.12564119696617126, "learning_rate": 0.0005, "loss": 2.1203, "step": 139030 }, { "epoch": 0.5292205567777837, "grad_norm": 0.13089881837368011, "learning_rate": 0.0005, "loss": 2.1234, "step": 139040 }, { "epoch": 0.5292586192459063, "grad_norm": 0.11932288110256195, "learning_rate": 0.0005, "loss": 2.1073, "step": 139050 }, { "epoch": 0.5292966817140291, "grad_norm": 0.1306738406419754, "learning_rate": 0.0005, "loss": 2.111, "step": 139060 }, { "epoch": 0.5293347441821518, "grad_norm": 0.11912896484136581, "learning_rate": 0.0005, "loss": 2.1088, "step": 139070 }, { "epoch": 0.5293728066502744, "grad_norm": 0.12531140446662903, "learning_rate": 0.0005, "loss": 2.1068, "step": 139080 }, { "epoch": 0.5294108691183971, "grad_norm": 0.12904711067676544, "learning_rate": 0.0005, "loss": 2.116, "step": 139090 }, { "epoch": 0.5294489315865198, "grad_norm": 0.12961961328983307, "learning_rate": 0.0005, "loss": 2.1145, "step": 139100 }, { "epoch": 0.5294869940546425, "grad_norm": 0.1254771202802658, "learning_rate": 0.0005, "loss": 2.1115, "step": 139110 }, { "epoch": 0.5295250565227652, "grad_norm": 0.1275731474161148, "learning_rate": 0.0005, "loss": 2.1244, "step": 139120 }, { "epoch": 0.5295631189908878, "grad_norm": 0.1152644231915474, "learning_rate": 0.0005, "loss": 2.1209, "step": 139130 }, { "epoch": 0.5296011814590105, "grad_norm": 0.12683624029159546, "learning_rate": 0.0005, "loss": 2.1107, "step": 139140 }, { "epoch": 0.5296392439271332, "grad_norm": 0.13341467082500458, "learning_rate": 0.0005, "loss": 2.0962, "step": 139150 }, { "epoch": 0.5296773063952559, "grad_norm": 0.1260858178138733, "learning_rate": 0.0005, "loss": 2.1146, "step": 139160 }, { "epoch": 0.5297153688633786, "grad_norm": 0.13160032033920288, "learning_rate": 0.0005, "loss": 2.1012, "step": 139170 }, { "epoch": 0.5297534313315012, "grad_norm": 0.12412998825311661, "learning_rate": 0.0005, "loss": 2.1138, "step": 139180 }, { "epoch": 0.529791493799624, "grad_norm": 0.12666431069374084, "learning_rate": 0.0005, "loss": 2.1144, "step": 139190 }, { "epoch": 0.5298295562677466, "grad_norm": 0.13844729959964752, "learning_rate": 0.0005, "loss": 2.1126, "step": 139200 }, { "epoch": 0.5298676187358693, "grad_norm": 0.1178123876452446, "learning_rate": 0.0005, "loss": 2.1241, "step": 139210 }, { "epoch": 0.529905681203992, "grad_norm": 0.1174953356385231, "learning_rate": 0.0005, "loss": 2.0996, "step": 139220 }, { "epoch": 0.5299437436721147, "grad_norm": 0.12705713510513306, "learning_rate": 0.0005, "loss": 2.0992, "step": 139230 }, { "epoch": 0.5299818061402374, "grad_norm": 0.13195791840553284, "learning_rate": 0.0005, "loss": 2.1242, "step": 139240 }, { "epoch": 0.53001986860836, "grad_norm": 0.12599407136440277, "learning_rate": 0.0005, "loss": 2.1196, "step": 139250 }, { "epoch": 0.5300579310764827, "grad_norm": 0.1358048915863037, "learning_rate": 0.0005, "loss": 2.1262, "step": 139260 }, { "epoch": 0.5300959935446055, "grad_norm": 0.12501151859760284, "learning_rate": 0.0005, "loss": 2.1429, "step": 139270 }, { "epoch": 0.5301340560127281, "grad_norm": 0.1353365033864975, "learning_rate": 0.0005, "loss": 2.1035, "step": 139280 }, { "epoch": 0.5301721184808508, "grad_norm": 0.1324470341205597, "learning_rate": 0.0005, "loss": 2.1181, "step": 139290 }, { "epoch": 0.5302101809489734, "grad_norm": 0.12147196382284164, "learning_rate": 0.0005, "loss": 2.1083, "step": 139300 }, { "epoch": 0.5302482434170961, "grad_norm": 0.12834204733371735, "learning_rate": 0.0005, "loss": 2.1026, "step": 139310 }, { "epoch": 0.5302863058852189, "grad_norm": 0.13552463054656982, "learning_rate": 0.0005, "loss": 2.1243, "step": 139320 }, { "epoch": 0.5303243683533415, "grad_norm": 0.13295625150203705, "learning_rate": 0.0005, "loss": 2.1154, "step": 139330 }, { "epoch": 0.5303624308214642, "grad_norm": 0.13045884668827057, "learning_rate": 0.0005, "loss": 2.1073, "step": 139340 }, { "epoch": 0.5304004932895868, "grad_norm": 0.12231920659542084, "learning_rate": 0.0005, "loss": 2.1241, "step": 139350 }, { "epoch": 0.5304385557577096, "grad_norm": 0.12363464385271072, "learning_rate": 0.0005, "loss": 2.1153, "step": 139360 }, { "epoch": 0.5304766182258323, "grad_norm": 0.15936076641082764, "learning_rate": 0.0005, "loss": 2.1178, "step": 139370 }, { "epoch": 0.5305146806939549, "grad_norm": 0.13969101011753082, "learning_rate": 0.0005, "loss": 2.1225, "step": 139380 }, { "epoch": 0.5305527431620776, "grad_norm": 0.11162112653255463, "learning_rate": 0.0005, "loss": 2.1091, "step": 139390 }, { "epoch": 0.5305908056302003, "grad_norm": 0.12614837288856506, "learning_rate": 0.0005, "loss": 2.1184, "step": 139400 }, { "epoch": 0.530628868098323, "grad_norm": 0.13985757529735565, "learning_rate": 0.0005, "loss": 2.1092, "step": 139410 }, { "epoch": 0.5306669305664456, "grad_norm": 0.13297826051712036, "learning_rate": 0.0005, "loss": 2.1008, "step": 139420 }, { "epoch": 0.5307049930345683, "grad_norm": 0.11873283982276917, "learning_rate": 0.0005, "loss": 2.1197, "step": 139430 }, { "epoch": 0.530743055502691, "grad_norm": 0.13034650683403015, "learning_rate": 0.0005, "loss": 2.0997, "step": 139440 }, { "epoch": 0.5307811179708137, "grad_norm": 0.1307368278503418, "learning_rate": 0.0005, "loss": 2.108, "step": 139450 }, { "epoch": 0.5308191804389364, "grad_norm": 0.12589477002620697, "learning_rate": 0.0005, "loss": 2.1213, "step": 139460 }, { "epoch": 0.530857242907059, "grad_norm": 0.1286430060863495, "learning_rate": 0.0005, "loss": 2.1324, "step": 139470 }, { "epoch": 0.5308953053751817, "grad_norm": 0.1236337199807167, "learning_rate": 0.0005, "loss": 2.1167, "step": 139480 }, { "epoch": 0.5309333678433045, "grad_norm": 0.11303827166557312, "learning_rate": 0.0005, "loss": 2.1033, "step": 139490 }, { "epoch": 0.5309714303114271, "grad_norm": 0.13597261905670166, "learning_rate": 0.0005, "loss": 2.1118, "step": 139500 }, { "epoch": 0.5310094927795498, "grad_norm": 0.12540364265441895, "learning_rate": 0.0005, "loss": 2.0956, "step": 139510 }, { "epoch": 0.5310475552476724, "grad_norm": 0.13377848267555237, "learning_rate": 0.0005, "loss": 2.1213, "step": 139520 }, { "epoch": 0.5310856177157952, "grad_norm": 0.13392139971256256, "learning_rate": 0.0005, "loss": 2.1017, "step": 139530 }, { "epoch": 0.5311236801839179, "grad_norm": 0.12368487566709518, "learning_rate": 0.0005, "loss": 2.1106, "step": 139540 }, { "epoch": 0.5311617426520405, "grad_norm": 0.1207844540476799, "learning_rate": 0.0005, "loss": 2.1079, "step": 139550 }, { "epoch": 0.5311998051201632, "grad_norm": 0.12541265785694122, "learning_rate": 0.0005, "loss": 2.0961, "step": 139560 }, { "epoch": 0.531237867588286, "grad_norm": 0.12517641484737396, "learning_rate": 0.0005, "loss": 2.1154, "step": 139570 }, { "epoch": 0.5312759300564086, "grad_norm": 0.12465142458677292, "learning_rate": 0.0005, "loss": 2.1305, "step": 139580 }, { "epoch": 0.5313139925245313, "grad_norm": 0.13334906101226807, "learning_rate": 0.0005, "loss": 2.1287, "step": 139590 }, { "epoch": 0.5313520549926539, "grad_norm": 0.13244402408599854, "learning_rate": 0.0005, "loss": 2.1232, "step": 139600 }, { "epoch": 0.5313901174607766, "grad_norm": 0.1352616250514984, "learning_rate": 0.0005, "loss": 2.112, "step": 139610 }, { "epoch": 0.5314281799288993, "grad_norm": 0.13310536742210388, "learning_rate": 0.0005, "loss": 2.1091, "step": 139620 }, { "epoch": 0.531466242397022, "grad_norm": 0.13594430685043335, "learning_rate": 0.0005, "loss": 2.1077, "step": 139630 }, { "epoch": 0.5315043048651447, "grad_norm": 0.12137679010629654, "learning_rate": 0.0005, "loss": 2.1047, "step": 139640 }, { "epoch": 0.5315423673332673, "grad_norm": 0.12929204106330872, "learning_rate": 0.0005, "loss": 2.1125, "step": 139650 }, { "epoch": 0.5315804298013901, "grad_norm": 0.13630947470664978, "learning_rate": 0.0005, "loss": 2.0938, "step": 139660 }, { "epoch": 0.5316184922695127, "grad_norm": 0.12881417572498322, "learning_rate": 0.0005, "loss": 2.0922, "step": 139670 }, { "epoch": 0.5316565547376354, "grad_norm": 0.1301601678133011, "learning_rate": 0.0005, "loss": 2.1101, "step": 139680 }, { "epoch": 0.5316946172057581, "grad_norm": 0.1197756677865982, "learning_rate": 0.0005, "loss": 2.1045, "step": 139690 }, { "epoch": 0.5317326796738808, "grad_norm": 0.13916751742362976, "learning_rate": 0.0005, "loss": 2.111, "step": 139700 }, { "epoch": 0.5317707421420035, "grad_norm": 0.1245514303445816, "learning_rate": 0.0005, "loss": 2.108, "step": 139710 }, { "epoch": 0.5318088046101261, "grad_norm": 0.12214001268148422, "learning_rate": 0.0005, "loss": 2.1125, "step": 139720 }, { "epoch": 0.5318468670782488, "grad_norm": 0.126341313123703, "learning_rate": 0.0005, "loss": 2.1101, "step": 139730 }, { "epoch": 0.5318849295463715, "grad_norm": 0.13778264820575714, "learning_rate": 0.0005, "loss": 2.1031, "step": 139740 }, { "epoch": 0.5319229920144942, "grad_norm": 0.12353502213954926, "learning_rate": 0.0005, "loss": 2.113, "step": 139750 }, { "epoch": 0.5319610544826169, "grad_norm": 0.12548722326755524, "learning_rate": 0.0005, "loss": 2.1122, "step": 139760 }, { "epoch": 0.5319991169507395, "grad_norm": 0.12514159083366394, "learning_rate": 0.0005, "loss": 2.1091, "step": 139770 }, { "epoch": 0.5320371794188622, "grad_norm": 0.13197045028209686, "learning_rate": 0.0005, "loss": 2.1126, "step": 139780 }, { "epoch": 0.532075241886985, "grad_norm": 0.14428088068962097, "learning_rate": 0.0005, "loss": 2.1025, "step": 139790 }, { "epoch": 0.5321133043551076, "grad_norm": 0.12338276207447052, "learning_rate": 0.0005, "loss": 2.0921, "step": 139800 }, { "epoch": 0.5321513668232303, "grad_norm": 0.121820829808712, "learning_rate": 0.0005, "loss": 2.1248, "step": 139810 }, { "epoch": 0.5321894292913529, "grad_norm": 0.12613916397094727, "learning_rate": 0.0005, "loss": 2.1133, "step": 139820 }, { "epoch": 0.5322274917594757, "grad_norm": 0.1097206324338913, "learning_rate": 0.0005, "loss": 2.1183, "step": 139830 }, { "epoch": 0.5322655542275984, "grad_norm": 0.12880319356918335, "learning_rate": 0.0005, "loss": 2.1131, "step": 139840 }, { "epoch": 0.532303616695721, "grad_norm": 0.1289016604423523, "learning_rate": 0.0005, "loss": 2.123, "step": 139850 }, { "epoch": 0.5323416791638437, "grad_norm": 0.11930489540100098, "learning_rate": 0.0005, "loss": 2.1128, "step": 139860 }, { "epoch": 0.5323797416319663, "grad_norm": 0.12602676451206207, "learning_rate": 0.0005, "loss": 2.1199, "step": 139870 }, { "epoch": 0.5324178041000891, "grad_norm": 0.14378313720226288, "learning_rate": 0.0005, "loss": 2.1232, "step": 139880 }, { "epoch": 0.5324558665682118, "grad_norm": 0.12042778730392456, "learning_rate": 0.0005, "loss": 2.1151, "step": 139890 }, { "epoch": 0.5324939290363344, "grad_norm": 0.12165861576795578, "learning_rate": 0.0005, "loss": 2.1188, "step": 139900 }, { "epoch": 0.5325319915044571, "grad_norm": 0.1306481957435608, "learning_rate": 0.0005, "loss": 2.1041, "step": 139910 }, { "epoch": 0.5325700539725798, "grad_norm": 0.12598676979541779, "learning_rate": 0.0005, "loss": 2.1287, "step": 139920 }, { "epoch": 0.5326081164407025, "grad_norm": 0.13994857668876648, "learning_rate": 0.0005, "loss": 2.1071, "step": 139930 }, { "epoch": 0.5326461789088252, "grad_norm": 0.12887181341648102, "learning_rate": 0.0005, "loss": 2.1188, "step": 139940 }, { "epoch": 0.5326842413769478, "grad_norm": 0.12587593495845795, "learning_rate": 0.0005, "loss": 2.1158, "step": 139950 }, { "epoch": 0.5327223038450706, "grad_norm": 0.13747109472751617, "learning_rate": 0.0005, "loss": 2.1126, "step": 139960 }, { "epoch": 0.5327603663131932, "grad_norm": 0.12219313532114029, "learning_rate": 0.0005, "loss": 2.0977, "step": 139970 }, { "epoch": 0.5327984287813159, "grad_norm": 0.12008965760469437, "learning_rate": 0.0005, "loss": 2.1182, "step": 139980 }, { "epoch": 0.5328364912494385, "grad_norm": 0.14939166605472565, "learning_rate": 0.0005, "loss": 2.1093, "step": 139990 }, { "epoch": 0.5328745537175613, "grad_norm": 0.11491386592388153, "learning_rate": 0.0005, "loss": 2.1129, "step": 140000 }, { "epoch": 0.532912616185684, "grad_norm": 0.1381085067987442, "learning_rate": 0.0005, "loss": 2.1227, "step": 140010 }, { "epoch": 0.5329506786538066, "grad_norm": 0.13951145112514496, "learning_rate": 0.0005, "loss": 2.1091, "step": 140020 }, { "epoch": 0.5329887411219293, "grad_norm": 0.1354573667049408, "learning_rate": 0.0005, "loss": 2.122, "step": 140030 }, { "epoch": 0.533026803590052, "grad_norm": 0.13436217606067657, "learning_rate": 0.0005, "loss": 2.1134, "step": 140040 }, { "epoch": 0.5330648660581747, "grad_norm": 0.13453523814678192, "learning_rate": 0.0005, "loss": 2.114, "step": 140050 }, { "epoch": 0.5331029285262974, "grad_norm": 0.1241929680109024, "learning_rate": 0.0005, "loss": 2.101, "step": 140060 }, { "epoch": 0.53314099099442, "grad_norm": 0.12845444679260254, "learning_rate": 0.0005, "loss": 2.1202, "step": 140070 }, { "epoch": 0.5331790534625427, "grad_norm": 0.12883029878139496, "learning_rate": 0.0005, "loss": 2.0998, "step": 140080 }, { "epoch": 0.5332171159306655, "grad_norm": 0.1188458651304245, "learning_rate": 0.0005, "loss": 2.1023, "step": 140090 }, { "epoch": 0.5332551783987881, "grad_norm": 0.11591944843530655, "learning_rate": 0.0005, "loss": 2.119, "step": 140100 }, { "epoch": 0.5332932408669108, "grad_norm": 0.12100506573915482, "learning_rate": 0.0005, "loss": 2.1147, "step": 140110 }, { "epoch": 0.5333313033350334, "grad_norm": 0.1290658563375473, "learning_rate": 0.0005, "loss": 2.1124, "step": 140120 }, { "epoch": 0.5333693658031562, "grad_norm": 0.11808887869119644, "learning_rate": 0.0005, "loss": 2.1256, "step": 140130 }, { "epoch": 0.5334074282712788, "grad_norm": 0.11424297094345093, "learning_rate": 0.0005, "loss": 2.0961, "step": 140140 }, { "epoch": 0.5334454907394015, "grad_norm": 0.11808013170957565, "learning_rate": 0.0005, "loss": 2.114, "step": 140150 }, { "epoch": 0.5334835532075242, "grad_norm": 0.118148572742939, "learning_rate": 0.0005, "loss": 2.1255, "step": 140160 }, { "epoch": 0.5335216156756468, "grad_norm": 0.1298682987689972, "learning_rate": 0.0005, "loss": 2.1139, "step": 140170 }, { "epoch": 0.5335596781437696, "grad_norm": 0.12634819746017456, "learning_rate": 0.0005, "loss": 2.1057, "step": 140180 }, { "epoch": 0.5335977406118922, "grad_norm": 0.12795348465442657, "learning_rate": 0.0005, "loss": 2.1147, "step": 140190 }, { "epoch": 0.5336358030800149, "grad_norm": 0.12447839230298996, "learning_rate": 0.0005, "loss": 2.1278, "step": 140200 }, { "epoch": 0.5336738655481376, "grad_norm": 0.11496274173259735, "learning_rate": 0.0005, "loss": 2.1088, "step": 140210 }, { "epoch": 0.5337119280162603, "grad_norm": 0.13818976283073425, "learning_rate": 0.0005, "loss": 2.1311, "step": 140220 }, { "epoch": 0.533749990484383, "grad_norm": 0.12268754094839096, "learning_rate": 0.0005, "loss": 2.1175, "step": 140230 }, { "epoch": 0.5337880529525056, "grad_norm": 0.11690956354141235, "learning_rate": 0.0005, "loss": 2.1155, "step": 140240 }, { "epoch": 0.5338261154206283, "grad_norm": 0.12202656269073486, "learning_rate": 0.0005, "loss": 2.1186, "step": 140250 }, { "epoch": 0.5338641778887511, "grad_norm": 0.12960444390773773, "learning_rate": 0.0005, "loss": 2.1273, "step": 140260 }, { "epoch": 0.5339022403568737, "grad_norm": 0.1281118392944336, "learning_rate": 0.0005, "loss": 2.1122, "step": 140270 }, { "epoch": 0.5339403028249964, "grad_norm": 0.12352045625448227, "learning_rate": 0.0005, "loss": 2.1126, "step": 140280 }, { "epoch": 0.533978365293119, "grad_norm": 0.13139109313488007, "learning_rate": 0.0005, "loss": 2.117, "step": 140290 }, { "epoch": 0.5340164277612417, "grad_norm": 0.13089576363563538, "learning_rate": 0.0005, "loss": 2.1072, "step": 140300 }, { "epoch": 0.5340544902293645, "grad_norm": 0.12949113547801971, "learning_rate": 0.0005, "loss": 2.1073, "step": 140310 }, { "epoch": 0.5340925526974871, "grad_norm": 0.1362101286649704, "learning_rate": 0.0005, "loss": 2.0981, "step": 140320 }, { "epoch": 0.5341306151656098, "grad_norm": 0.12745702266693115, "learning_rate": 0.0005, "loss": 2.1118, "step": 140330 }, { "epoch": 0.5341686776337324, "grad_norm": 0.12172038108110428, "learning_rate": 0.0005, "loss": 2.1103, "step": 140340 }, { "epoch": 0.5342067401018552, "grad_norm": 0.11687658727169037, "learning_rate": 0.0005, "loss": 2.1115, "step": 140350 }, { "epoch": 0.5342448025699779, "grad_norm": 0.13451483845710754, "learning_rate": 0.0005, "loss": 2.1151, "step": 140360 }, { "epoch": 0.5342828650381005, "grad_norm": 0.12194819748401642, "learning_rate": 0.0005, "loss": 2.0998, "step": 140370 }, { "epoch": 0.5343209275062232, "grad_norm": 0.12656249105930328, "learning_rate": 0.0005, "loss": 2.121, "step": 140380 }, { "epoch": 0.5343589899743459, "grad_norm": 0.12181355804204941, "learning_rate": 0.0005, "loss": 2.0933, "step": 140390 }, { "epoch": 0.5343970524424686, "grad_norm": 0.11588042974472046, "learning_rate": 0.0005, "loss": 2.1217, "step": 140400 }, { "epoch": 0.5344351149105913, "grad_norm": 0.12667755782604218, "learning_rate": 0.0005, "loss": 2.1098, "step": 140410 }, { "epoch": 0.5344731773787139, "grad_norm": 0.12891434133052826, "learning_rate": 0.0005, "loss": 2.1297, "step": 140420 }, { "epoch": 0.5345112398468367, "grad_norm": 0.12638193368911743, "learning_rate": 0.0005, "loss": 2.133, "step": 140430 }, { "epoch": 0.5345493023149593, "grad_norm": 0.12351429462432861, "learning_rate": 0.0005, "loss": 2.115, "step": 140440 }, { "epoch": 0.534587364783082, "grad_norm": 0.12079423666000366, "learning_rate": 0.0005, "loss": 2.1193, "step": 140450 }, { "epoch": 0.5346254272512047, "grad_norm": 0.1397024393081665, "learning_rate": 0.0005, "loss": 2.1125, "step": 140460 }, { "epoch": 0.5346634897193273, "grad_norm": 0.13598190248012543, "learning_rate": 0.0005, "loss": 2.1033, "step": 140470 }, { "epoch": 0.5347015521874501, "grad_norm": 0.13075561821460724, "learning_rate": 0.0005, "loss": 2.1094, "step": 140480 }, { "epoch": 0.5347396146555727, "grad_norm": 0.13019627332687378, "learning_rate": 0.0005, "loss": 2.1378, "step": 140490 }, { "epoch": 0.5347776771236954, "grad_norm": 0.1375575214624405, "learning_rate": 0.0005, "loss": 2.1312, "step": 140500 }, { "epoch": 0.534815739591818, "grad_norm": 0.11885309964418411, "learning_rate": 0.0005, "loss": 2.0908, "step": 140510 }, { "epoch": 0.5348538020599408, "grad_norm": 0.12946408987045288, "learning_rate": 0.0005, "loss": 2.1126, "step": 140520 }, { "epoch": 0.5348918645280635, "grad_norm": 0.12864582240581512, "learning_rate": 0.0005, "loss": 2.1087, "step": 140530 }, { "epoch": 0.5349299269961861, "grad_norm": 0.129059836268425, "learning_rate": 0.0005, "loss": 2.1065, "step": 140540 }, { "epoch": 0.5349679894643088, "grad_norm": 0.1326047033071518, "learning_rate": 0.0005, "loss": 2.1016, "step": 140550 }, { "epoch": 0.5350060519324316, "grad_norm": 0.12458567321300507, "learning_rate": 0.0005, "loss": 2.1168, "step": 140560 }, { "epoch": 0.5350441144005542, "grad_norm": 0.1340087354183197, "learning_rate": 0.0005, "loss": 2.1154, "step": 140570 }, { "epoch": 0.5350821768686769, "grad_norm": 0.12168935686349869, "learning_rate": 0.0005, "loss": 2.1149, "step": 140580 }, { "epoch": 0.5351202393367995, "grad_norm": 0.1205526664853096, "learning_rate": 0.0005, "loss": 2.1035, "step": 140590 }, { "epoch": 0.5351583018049222, "grad_norm": 0.1175910159945488, "learning_rate": 0.0005, "loss": 2.112, "step": 140600 }, { "epoch": 0.535196364273045, "grad_norm": 0.11866150796413422, "learning_rate": 0.0005, "loss": 2.1075, "step": 140610 }, { "epoch": 0.5352344267411676, "grad_norm": 0.11844488233327866, "learning_rate": 0.0005, "loss": 2.1293, "step": 140620 }, { "epoch": 0.5352724892092903, "grad_norm": 0.1390773504972458, "learning_rate": 0.0005, "loss": 2.1058, "step": 140630 }, { "epoch": 0.5353105516774129, "grad_norm": 0.12005293369293213, "learning_rate": 0.0005, "loss": 2.1137, "step": 140640 }, { "epoch": 0.5353486141455357, "grad_norm": 0.12795551121234894, "learning_rate": 0.0005, "loss": 2.1092, "step": 140650 }, { "epoch": 0.5353866766136584, "grad_norm": 0.11821883171796799, "learning_rate": 0.0005, "loss": 2.099, "step": 140660 }, { "epoch": 0.535424739081781, "grad_norm": 0.12025831639766693, "learning_rate": 0.0005, "loss": 2.1133, "step": 140670 }, { "epoch": 0.5354628015499037, "grad_norm": 0.13171258568763733, "learning_rate": 0.0005, "loss": 2.1218, "step": 140680 }, { "epoch": 0.5355008640180264, "grad_norm": 0.12721110880374908, "learning_rate": 0.0005, "loss": 2.1186, "step": 140690 }, { "epoch": 0.5355389264861491, "grad_norm": 0.127712681889534, "learning_rate": 0.0005, "loss": 2.1143, "step": 140700 }, { "epoch": 0.5355769889542717, "grad_norm": 0.15471547842025757, "learning_rate": 0.0005, "loss": 2.1042, "step": 140710 }, { "epoch": 0.5356150514223944, "grad_norm": 0.12533149123191833, "learning_rate": 0.0005, "loss": 2.1153, "step": 140720 }, { "epoch": 0.5356531138905171, "grad_norm": 0.13401567935943604, "learning_rate": 0.0005, "loss": 2.1207, "step": 140730 }, { "epoch": 0.5356911763586398, "grad_norm": 0.12059596925973892, "learning_rate": 0.0005, "loss": 2.1072, "step": 140740 }, { "epoch": 0.5357292388267625, "grad_norm": 0.12798458337783813, "learning_rate": 0.0005, "loss": 2.1319, "step": 140750 }, { "epoch": 0.5357673012948851, "grad_norm": 0.11938324570655823, "learning_rate": 0.0005, "loss": 2.1067, "step": 140760 }, { "epoch": 0.5358053637630078, "grad_norm": 0.1102285087108612, "learning_rate": 0.0005, "loss": 2.1282, "step": 140770 }, { "epoch": 0.5358434262311306, "grad_norm": 0.1263628453016281, "learning_rate": 0.0005, "loss": 2.1141, "step": 140780 }, { "epoch": 0.5358814886992532, "grad_norm": 0.12735337018966675, "learning_rate": 0.0005, "loss": 2.1046, "step": 140790 }, { "epoch": 0.5359195511673759, "grad_norm": 0.14012378454208374, "learning_rate": 0.0005, "loss": 2.1078, "step": 140800 }, { "epoch": 0.5359576136354985, "grad_norm": 0.1268850862979889, "learning_rate": 0.0005, "loss": 2.1272, "step": 140810 }, { "epoch": 0.5359956761036213, "grad_norm": 0.13749361038208008, "learning_rate": 0.0005, "loss": 2.1123, "step": 140820 }, { "epoch": 0.536033738571744, "grad_norm": 0.11975759267807007, "learning_rate": 0.0005, "loss": 2.1213, "step": 140830 }, { "epoch": 0.5360718010398666, "grad_norm": 0.12557609379291534, "learning_rate": 0.0005, "loss": 2.1031, "step": 140840 }, { "epoch": 0.5361098635079893, "grad_norm": 0.11973942816257477, "learning_rate": 0.0005, "loss": 2.1249, "step": 140850 }, { "epoch": 0.536147925976112, "grad_norm": 0.1194792166352272, "learning_rate": 0.0005, "loss": 2.115, "step": 140860 }, { "epoch": 0.5361859884442347, "grad_norm": 0.12609535455703735, "learning_rate": 0.0005, "loss": 2.1361, "step": 140870 }, { "epoch": 0.5362240509123574, "grad_norm": 0.11945301294326782, "learning_rate": 0.0005, "loss": 2.1016, "step": 140880 }, { "epoch": 0.53626211338048, "grad_norm": 0.126478374004364, "learning_rate": 0.0005, "loss": 2.1131, "step": 140890 }, { "epoch": 0.5363001758486027, "grad_norm": 0.1255444586277008, "learning_rate": 0.0005, "loss": 2.1076, "step": 140900 }, { "epoch": 0.5363382383167254, "grad_norm": 0.12098710983991623, "learning_rate": 0.0005, "loss": 2.1065, "step": 140910 }, { "epoch": 0.5363763007848481, "grad_norm": 0.12323322147130966, "learning_rate": 0.0005, "loss": 2.1125, "step": 140920 }, { "epoch": 0.5364143632529708, "grad_norm": 0.13023199141025543, "learning_rate": 0.0005, "loss": 2.111, "step": 140930 }, { "epoch": 0.5364524257210934, "grad_norm": 0.12331162393093109, "learning_rate": 0.0005, "loss": 2.125, "step": 140940 }, { "epoch": 0.5364904881892162, "grad_norm": 0.1288183182477951, "learning_rate": 0.0005, "loss": 2.1189, "step": 140950 }, { "epoch": 0.5365285506573388, "grad_norm": 0.12472337484359741, "learning_rate": 0.0005, "loss": 2.102, "step": 140960 }, { "epoch": 0.5365666131254615, "grad_norm": 0.138634592294693, "learning_rate": 0.0005, "loss": 2.1047, "step": 140970 }, { "epoch": 0.5366046755935842, "grad_norm": 0.12713125348091125, "learning_rate": 0.0005, "loss": 2.122, "step": 140980 }, { "epoch": 0.5366427380617069, "grad_norm": 0.12022874504327774, "learning_rate": 0.0005, "loss": 2.1182, "step": 140990 }, { "epoch": 0.5366808005298296, "grad_norm": 0.13861408829689026, "learning_rate": 0.0005, "loss": 2.1223, "step": 141000 }, { "epoch": 0.5367188629979522, "grad_norm": 0.13212622702121735, "learning_rate": 0.0005, "loss": 2.1018, "step": 141010 }, { "epoch": 0.5367569254660749, "grad_norm": 0.12079843878746033, "learning_rate": 0.0005, "loss": 2.1133, "step": 141020 }, { "epoch": 0.5367949879341976, "grad_norm": 0.11715279519557953, "learning_rate": 0.0005, "loss": 2.1126, "step": 141030 }, { "epoch": 0.5368330504023203, "grad_norm": 0.1280633807182312, "learning_rate": 0.0005, "loss": 2.1075, "step": 141040 }, { "epoch": 0.536871112870443, "grad_norm": 0.1294240951538086, "learning_rate": 0.0005, "loss": 2.1088, "step": 141050 }, { "epoch": 0.5369091753385656, "grad_norm": 0.12403983622789383, "learning_rate": 0.0005, "loss": 2.105, "step": 141060 }, { "epoch": 0.5369472378066883, "grad_norm": 0.12071875482797623, "learning_rate": 0.0005, "loss": 2.1223, "step": 141070 }, { "epoch": 0.5369853002748111, "grad_norm": 0.12532472610473633, "learning_rate": 0.0005, "loss": 2.1082, "step": 141080 }, { "epoch": 0.5370233627429337, "grad_norm": 0.1143755242228508, "learning_rate": 0.0005, "loss": 2.1066, "step": 141090 }, { "epoch": 0.5370614252110564, "grad_norm": 0.11905299127101898, "learning_rate": 0.0005, "loss": 2.1142, "step": 141100 }, { "epoch": 0.537099487679179, "grad_norm": 0.12017108500003815, "learning_rate": 0.0005, "loss": 2.1134, "step": 141110 }, { "epoch": 0.5371375501473018, "grad_norm": 0.12630660831928253, "learning_rate": 0.0005, "loss": 2.112, "step": 141120 }, { "epoch": 0.5371756126154245, "grad_norm": 0.11764616519212723, "learning_rate": 0.0005, "loss": 2.1016, "step": 141130 }, { "epoch": 0.5372136750835471, "grad_norm": 0.11926303058862686, "learning_rate": 0.0005, "loss": 2.1194, "step": 141140 }, { "epoch": 0.5372517375516698, "grad_norm": 0.12672436237335205, "learning_rate": 0.0005, "loss": 2.1159, "step": 141150 }, { "epoch": 0.5372898000197924, "grad_norm": 0.11773819476366043, "learning_rate": 0.0005, "loss": 2.1349, "step": 141160 }, { "epoch": 0.5373278624879152, "grad_norm": 0.12111266702413559, "learning_rate": 0.0005, "loss": 2.1026, "step": 141170 }, { "epoch": 0.5373659249560379, "grad_norm": 0.1228339672088623, "learning_rate": 0.0005, "loss": 2.1195, "step": 141180 }, { "epoch": 0.5374039874241605, "grad_norm": 0.13635188341140747, "learning_rate": 0.0005, "loss": 2.131, "step": 141190 }, { "epoch": 0.5374420498922832, "grad_norm": 0.1257738471031189, "learning_rate": 0.0005, "loss": 2.1159, "step": 141200 }, { "epoch": 0.5374801123604059, "grad_norm": 0.12580062448978424, "learning_rate": 0.0005, "loss": 2.113, "step": 141210 }, { "epoch": 0.5375181748285286, "grad_norm": 0.1258445829153061, "learning_rate": 0.0005, "loss": 2.1163, "step": 141220 }, { "epoch": 0.5375562372966513, "grad_norm": 0.12222804129123688, "learning_rate": 0.0005, "loss": 2.1274, "step": 141230 }, { "epoch": 0.5375942997647739, "grad_norm": 0.11764946579933167, "learning_rate": 0.0005, "loss": 2.1145, "step": 141240 }, { "epoch": 0.5376323622328967, "grad_norm": 0.12180469930171967, "learning_rate": 0.0005, "loss": 2.0993, "step": 141250 }, { "epoch": 0.5376704247010193, "grad_norm": 0.1325564682483673, "learning_rate": 0.0005, "loss": 2.1011, "step": 141260 }, { "epoch": 0.537708487169142, "grad_norm": 0.13016091287136078, "learning_rate": 0.0005, "loss": 2.11, "step": 141270 }, { "epoch": 0.5377465496372646, "grad_norm": 0.11561047285795212, "learning_rate": 0.0005, "loss": 2.121, "step": 141280 }, { "epoch": 0.5377846121053874, "grad_norm": 0.12065380811691284, "learning_rate": 0.0005, "loss": 2.1059, "step": 141290 }, { "epoch": 0.5378226745735101, "grad_norm": 0.12640777230262756, "learning_rate": 0.0005, "loss": 2.1213, "step": 141300 }, { "epoch": 0.5378607370416327, "grad_norm": 0.11981063336133957, "learning_rate": 0.0005, "loss": 2.0958, "step": 141310 }, { "epoch": 0.5378987995097554, "grad_norm": 0.11782816797494888, "learning_rate": 0.0005, "loss": 2.1129, "step": 141320 }, { "epoch": 0.537936861977878, "grad_norm": 0.13507914543151855, "learning_rate": 0.0005, "loss": 2.1184, "step": 141330 }, { "epoch": 0.5379749244460008, "grad_norm": 0.1195915937423706, "learning_rate": 0.0005, "loss": 2.1218, "step": 141340 }, { "epoch": 0.5380129869141235, "grad_norm": 0.12683017551898956, "learning_rate": 0.0005, "loss": 2.1284, "step": 141350 }, { "epoch": 0.5380510493822461, "grad_norm": 0.11754672974348068, "learning_rate": 0.0005, "loss": 2.1126, "step": 141360 }, { "epoch": 0.5380891118503688, "grad_norm": 0.1195908859372139, "learning_rate": 0.0005, "loss": 2.1374, "step": 141370 }, { "epoch": 0.5381271743184916, "grad_norm": 0.1198858767747879, "learning_rate": 0.0005, "loss": 2.1083, "step": 141380 }, { "epoch": 0.5381652367866142, "grad_norm": 0.12950566411018372, "learning_rate": 0.0005, "loss": 2.1088, "step": 141390 }, { "epoch": 0.5382032992547369, "grad_norm": 0.12549936771392822, "learning_rate": 0.0005, "loss": 2.1351, "step": 141400 }, { "epoch": 0.5382413617228595, "grad_norm": 0.17845281958580017, "learning_rate": 0.0005, "loss": 2.1084, "step": 141410 }, { "epoch": 0.5382794241909823, "grad_norm": 0.12847577035427094, "learning_rate": 0.0005, "loss": 2.1102, "step": 141420 }, { "epoch": 0.538317486659105, "grad_norm": 0.12952706217765808, "learning_rate": 0.0005, "loss": 2.1071, "step": 141430 }, { "epoch": 0.5383555491272276, "grad_norm": 0.1426181048154831, "learning_rate": 0.0005, "loss": 2.1277, "step": 141440 }, { "epoch": 0.5383936115953503, "grad_norm": 0.11868982762098312, "learning_rate": 0.0005, "loss": 2.1277, "step": 141450 }, { "epoch": 0.5384316740634729, "grad_norm": 0.12122055143117905, "learning_rate": 0.0005, "loss": 2.1239, "step": 141460 }, { "epoch": 0.5384697365315957, "grad_norm": 0.11714940518140793, "learning_rate": 0.0005, "loss": 2.1163, "step": 141470 }, { "epoch": 0.5385077989997183, "grad_norm": 0.12399179488420486, "learning_rate": 0.0005, "loss": 2.1167, "step": 141480 }, { "epoch": 0.538545861467841, "grad_norm": 0.1398458331823349, "learning_rate": 0.0005, "loss": 2.1217, "step": 141490 }, { "epoch": 0.5385839239359637, "grad_norm": 0.1362568438053131, "learning_rate": 0.0005, "loss": 2.1304, "step": 141500 }, { "epoch": 0.5386219864040864, "grad_norm": 0.11364667862653732, "learning_rate": 0.0005, "loss": 2.0949, "step": 141510 }, { "epoch": 0.5386600488722091, "grad_norm": 0.12641511857509613, "learning_rate": 0.0005, "loss": 2.1159, "step": 141520 }, { "epoch": 0.5386981113403317, "grad_norm": 0.12470997869968414, "learning_rate": 0.0005, "loss": 2.1137, "step": 141530 }, { "epoch": 0.5387361738084544, "grad_norm": 0.13600875437259674, "learning_rate": 0.0005, "loss": 2.1112, "step": 141540 }, { "epoch": 0.5387742362765772, "grad_norm": 0.1253259927034378, "learning_rate": 0.0005, "loss": 2.0862, "step": 141550 }, { "epoch": 0.5388122987446998, "grad_norm": 0.12575694918632507, "learning_rate": 0.0005, "loss": 2.1313, "step": 141560 }, { "epoch": 0.5388503612128225, "grad_norm": 0.13145343959331512, "learning_rate": 0.0005, "loss": 2.1188, "step": 141570 }, { "epoch": 0.5388884236809451, "grad_norm": 0.12116347253322601, "learning_rate": 0.0005, "loss": 2.1054, "step": 141580 }, { "epoch": 0.5389264861490678, "grad_norm": 0.1289222240447998, "learning_rate": 0.0005, "loss": 2.1104, "step": 141590 }, { "epoch": 0.5389645486171906, "grad_norm": 0.13402898609638214, "learning_rate": 0.0005, "loss": 2.102, "step": 141600 }, { "epoch": 0.5390026110853132, "grad_norm": 0.13756172358989716, "learning_rate": 0.0005, "loss": 2.1118, "step": 141610 }, { "epoch": 0.5390406735534359, "grad_norm": 0.11667651683092117, "learning_rate": 0.0005, "loss": 2.0997, "step": 141620 }, { "epoch": 0.5390787360215585, "grad_norm": 0.11143524944782257, "learning_rate": 0.0005, "loss": 2.1076, "step": 141630 }, { "epoch": 0.5391167984896813, "grad_norm": 0.1342800408601761, "learning_rate": 0.0005, "loss": 2.1094, "step": 141640 }, { "epoch": 0.539154860957804, "grad_norm": 0.1318407654762268, "learning_rate": 0.0005, "loss": 2.1084, "step": 141650 }, { "epoch": 0.5391929234259266, "grad_norm": 0.12428022176027298, "learning_rate": 0.0005, "loss": 2.1144, "step": 141660 }, { "epoch": 0.5392309858940493, "grad_norm": 0.1295686662197113, "learning_rate": 0.0005, "loss": 2.1147, "step": 141670 }, { "epoch": 0.539269048362172, "grad_norm": 0.12853935360908508, "learning_rate": 0.0005, "loss": 2.1058, "step": 141680 }, { "epoch": 0.5393071108302947, "grad_norm": 0.1273634135723114, "learning_rate": 0.0005, "loss": 2.0992, "step": 141690 }, { "epoch": 0.5393451732984174, "grad_norm": 0.12686489522457123, "learning_rate": 0.0005, "loss": 2.1134, "step": 141700 }, { "epoch": 0.53938323576654, "grad_norm": 0.14804643392562866, "learning_rate": 0.0005, "loss": 2.1192, "step": 141710 }, { "epoch": 0.5394212982346628, "grad_norm": 0.12041910737752914, "learning_rate": 0.0005, "loss": 2.1066, "step": 141720 }, { "epoch": 0.5394593607027854, "grad_norm": 0.12201380729675293, "learning_rate": 0.0005, "loss": 2.1093, "step": 141730 }, { "epoch": 0.5394974231709081, "grad_norm": 0.13108478486537933, "learning_rate": 0.0005, "loss": 2.1062, "step": 141740 }, { "epoch": 0.5395354856390308, "grad_norm": 0.12690266966819763, "learning_rate": 0.0005, "loss": 2.1058, "step": 141750 }, { "epoch": 0.5395735481071534, "grad_norm": 0.11823736876249313, "learning_rate": 0.0005, "loss": 2.108, "step": 141760 }, { "epoch": 0.5396116105752762, "grad_norm": 0.1334260106086731, "learning_rate": 0.0005, "loss": 2.1306, "step": 141770 }, { "epoch": 0.5396496730433988, "grad_norm": 0.1342170089483261, "learning_rate": 0.0005, "loss": 2.1084, "step": 141780 }, { "epoch": 0.5396877355115215, "grad_norm": 0.1342397779226303, "learning_rate": 0.0005, "loss": 2.0944, "step": 141790 }, { "epoch": 0.5397257979796442, "grad_norm": 0.14033067226409912, "learning_rate": 0.0005, "loss": 2.1142, "step": 141800 }, { "epoch": 0.5397638604477669, "grad_norm": 0.12297873944044113, "learning_rate": 0.0005, "loss": 2.121, "step": 141810 }, { "epoch": 0.5398019229158896, "grad_norm": 0.11781862378120422, "learning_rate": 0.0005, "loss": 2.1081, "step": 141820 }, { "epoch": 0.5398399853840122, "grad_norm": 0.11869970709085464, "learning_rate": 0.0005, "loss": 2.1108, "step": 141830 }, { "epoch": 0.5398780478521349, "grad_norm": 0.1195460706949234, "learning_rate": 0.0005, "loss": 2.1091, "step": 141840 }, { "epoch": 0.5399161103202577, "grad_norm": 0.13290493190288544, "learning_rate": 0.0005, "loss": 2.1135, "step": 141850 }, { "epoch": 0.5399541727883803, "grad_norm": 0.13338586688041687, "learning_rate": 0.0005, "loss": 2.1216, "step": 141860 }, { "epoch": 0.539992235256503, "grad_norm": 0.1205737292766571, "learning_rate": 0.0005, "loss": 2.1307, "step": 141870 }, { "epoch": 0.5400302977246256, "grad_norm": 0.12896011769771576, "learning_rate": 0.0005, "loss": 2.1256, "step": 141880 }, { "epoch": 0.5400683601927483, "grad_norm": 0.12277299165725708, "learning_rate": 0.0005, "loss": 2.1109, "step": 141890 }, { "epoch": 0.540106422660871, "grad_norm": 0.1205860897898674, "learning_rate": 0.0005, "loss": 2.1113, "step": 141900 }, { "epoch": 0.5401444851289937, "grad_norm": 0.12812530994415283, "learning_rate": 0.0005, "loss": 2.1029, "step": 141910 }, { "epoch": 0.5401825475971164, "grad_norm": 0.12242037057876587, "learning_rate": 0.0005, "loss": 2.1064, "step": 141920 }, { "epoch": 0.540220610065239, "grad_norm": 0.12503241002559662, "learning_rate": 0.0005, "loss": 2.1094, "step": 141930 }, { "epoch": 0.5402586725333618, "grad_norm": 0.13053427636623383, "learning_rate": 0.0005, "loss": 2.1131, "step": 141940 }, { "epoch": 0.5402967350014845, "grad_norm": 0.12927544116973877, "learning_rate": 0.0005, "loss": 2.1165, "step": 141950 }, { "epoch": 0.5403347974696071, "grad_norm": 0.1277080774307251, "learning_rate": 0.0005, "loss": 2.0988, "step": 141960 }, { "epoch": 0.5403728599377298, "grad_norm": 0.11748964339494705, "learning_rate": 0.0005, "loss": 2.1286, "step": 141970 }, { "epoch": 0.5404109224058525, "grad_norm": 0.11776341497898102, "learning_rate": 0.0005, "loss": 2.1081, "step": 141980 }, { "epoch": 0.5404489848739752, "grad_norm": 0.12624762952327728, "learning_rate": 0.0005, "loss": 2.1007, "step": 141990 }, { "epoch": 0.5404870473420978, "grad_norm": 0.12573248147964478, "learning_rate": 0.0005, "loss": 2.1132, "step": 142000 }, { "epoch": 0.5405251098102205, "grad_norm": 0.1226499155163765, "learning_rate": 0.0005, "loss": 2.0992, "step": 142010 }, { "epoch": 0.5405631722783432, "grad_norm": 0.11763577908277512, "learning_rate": 0.0005, "loss": 2.1059, "step": 142020 }, { "epoch": 0.5406012347464659, "grad_norm": 0.12168823182582855, "learning_rate": 0.0005, "loss": 2.102, "step": 142030 }, { "epoch": 0.5406392972145886, "grad_norm": 0.12195418775081635, "learning_rate": 0.0005, "loss": 2.1309, "step": 142040 }, { "epoch": 0.5406773596827112, "grad_norm": 0.11766864359378815, "learning_rate": 0.0005, "loss": 2.1132, "step": 142050 }, { "epoch": 0.5407154221508339, "grad_norm": 0.14633165299892426, "learning_rate": 0.0005, "loss": 2.1074, "step": 142060 }, { "epoch": 0.5407534846189567, "grad_norm": 0.12138644605875015, "learning_rate": 0.0005, "loss": 2.1089, "step": 142070 }, { "epoch": 0.5407915470870793, "grad_norm": 0.11825988441705704, "learning_rate": 0.0005, "loss": 2.0913, "step": 142080 }, { "epoch": 0.540829609555202, "grad_norm": 0.11417256295681, "learning_rate": 0.0005, "loss": 2.116, "step": 142090 }, { "epoch": 0.5408676720233246, "grad_norm": 0.12141603231430054, "learning_rate": 0.0005, "loss": 2.1058, "step": 142100 }, { "epoch": 0.5409057344914474, "grad_norm": 0.1170593649148941, "learning_rate": 0.0005, "loss": 2.1075, "step": 142110 }, { "epoch": 0.5409437969595701, "grad_norm": 0.12261858582496643, "learning_rate": 0.0005, "loss": 2.1219, "step": 142120 }, { "epoch": 0.5409818594276927, "grad_norm": 0.12808889150619507, "learning_rate": 0.0005, "loss": 2.0992, "step": 142130 }, { "epoch": 0.5410199218958154, "grad_norm": 0.12382281571626663, "learning_rate": 0.0005, "loss": 2.1162, "step": 142140 }, { "epoch": 0.5410579843639381, "grad_norm": 0.11859180778265, "learning_rate": 0.0005, "loss": 2.1167, "step": 142150 }, { "epoch": 0.5410960468320608, "grad_norm": 0.13315057754516602, "learning_rate": 0.0005, "loss": 2.1313, "step": 142160 }, { "epoch": 0.5411341093001835, "grad_norm": 0.1338150054216385, "learning_rate": 0.0005, "loss": 2.1156, "step": 142170 }, { "epoch": 0.5411721717683061, "grad_norm": 0.13176853954792023, "learning_rate": 0.0005, "loss": 2.1253, "step": 142180 }, { "epoch": 0.5412102342364288, "grad_norm": 0.1379355937242508, "learning_rate": 0.0005, "loss": 2.1129, "step": 142190 }, { "epoch": 0.5412482967045515, "grad_norm": 0.12809717655181885, "learning_rate": 0.0005, "loss": 2.0982, "step": 142200 }, { "epoch": 0.5412863591726742, "grad_norm": 0.1322251707315445, "learning_rate": 0.0005, "loss": 2.1167, "step": 142210 }, { "epoch": 0.5413244216407969, "grad_norm": 0.13161404430866241, "learning_rate": 0.0005, "loss": 2.1081, "step": 142220 }, { "epoch": 0.5413624841089195, "grad_norm": 0.13390658795833588, "learning_rate": 0.0005, "loss": 2.1085, "step": 142230 }, { "epoch": 0.5414005465770423, "grad_norm": 0.12221341580152512, "learning_rate": 0.0005, "loss": 2.0948, "step": 142240 }, { "epoch": 0.5414386090451649, "grad_norm": 0.12755192816257477, "learning_rate": 0.0005, "loss": 2.1097, "step": 142250 }, { "epoch": 0.5414766715132876, "grad_norm": 0.11980967968702316, "learning_rate": 0.0005, "loss": 2.1192, "step": 142260 }, { "epoch": 0.5415147339814103, "grad_norm": 0.1292906403541565, "learning_rate": 0.0005, "loss": 2.1072, "step": 142270 }, { "epoch": 0.541552796449533, "grad_norm": 0.11971554160118103, "learning_rate": 0.0005, "loss": 2.1013, "step": 142280 }, { "epoch": 0.5415908589176557, "grad_norm": 0.11435715854167938, "learning_rate": 0.0005, "loss": 2.0947, "step": 142290 }, { "epoch": 0.5416289213857783, "grad_norm": 0.12895023822784424, "learning_rate": 0.0005, "loss": 2.11, "step": 142300 }, { "epoch": 0.541666983853901, "grad_norm": 0.11909782886505127, "learning_rate": 0.0005, "loss": 2.1274, "step": 142310 }, { "epoch": 0.5417050463220237, "grad_norm": 0.12879256904125214, "learning_rate": 0.0005, "loss": 2.1063, "step": 142320 }, { "epoch": 0.5417431087901464, "grad_norm": 0.12347320467233658, "learning_rate": 0.0005, "loss": 2.1294, "step": 142330 }, { "epoch": 0.5417811712582691, "grad_norm": 0.12600278854370117, "learning_rate": 0.0005, "loss": 2.0998, "step": 142340 }, { "epoch": 0.5418192337263917, "grad_norm": 0.14141617715358734, "learning_rate": 0.0005, "loss": 2.1142, "step": 142350 }, { "epoch": 0.5418572961945144, "grad_norm": 0.13818462193012238, "learning_rate": 0.0005, "loss": 2.1022, "step": 142360 }, { "epoch": 0.5418953586626372, "grad_norm": 0.1478867083787918, "learning_rate": 0.0005, "loss": 2.1235, "step": 142370 }, { "epoch": 0.5419334211307598, "grad_norm": 0.12242863327264786, "learning_rate": 0.0005, "loss": 2.1126, "step": 142380 }, { "epoch": 0.5419714835988825, "grad_norm": 0.128449484705925, "learning_rate": 0.0005, "loss": 2.1122, "step": 142390 }, { "epoch": 0.5420095460670051, "grad_norm": 0.13392701745033264, "learning_rate": 0.0005, "loss": 2.1172, "step": 142400 }, { "epoch": 0.5420476085351279, "grad_norm": 0.12477075308561325, "learning_rate": 0.0005, "loss": 2.0901, "step": 142410 }, { "epoch": 0.5420856710032506, "grad_norm": 0.12193284183740616, "learning_rate": 0.0005, "loss": 2.1088, "step": 142420 }, { "epoch": 0.5421237334713732, "grad_norm": 0.1314396858215332, "learning_rate": 0.0005, "loss": 2.1073, "step": 142430 }, { "epoch": 0.5421617959394959, "grad_norm": 0.12364444881677628, "learning_rate": 0.0005, "loss": 2.1175, "step": 142440 }, { "epoch": 0.5421998584076185, "grad_norm": 0.12623411417007446, "learning_rate": 0.0005, "loss": 2.1039, "step": 142450 }, { "epoch": 0.5422379208757413, "grad_norm": 0.1251022219657898, "learning_rate": 0.0005, "loss": 2.0981, "step": 142460 }, { "epoch": 0.542275983343864, "grad_norm": 0.12845852971076965, "learning_rate": 0.0005, "loss": 2.116, "step": 142470 }, { "epoch": 0.5423140458119866, "grad_norm": 0.11740121245384216, "learning_rate": 0.0005, "loss": 2.1135, "step": 142480 }, { "epoch": 0.5423521082801093, "grad_norm": 0.1352996677160263, "learning_rate": 0.0005, "loss": 2.1214, "step": 142490 }, { "epoch": 0.542390170748232, "grad_norm": 0.11552150547504425, "learning_rate": 0.0005, "loss": 2.1012, "step": 142500 }, { "epoch": 0.5424282332163547, "grad_norm": 0.12196964770555496, "learning_rate": 0.0005, "loss": 2.1148, "step": 142510 }, { "epoch": 0.5424662956844774, "grad_norm": 0.1349833607673645, "learning_rate": 0.0005, "loss": 2.1357, "step": 142520 }, { "epoch": 0.5425043581526, "grad_norm": 0.11843696981668472, "learning_rate": 0.0005, "loss": 2.1041, "step": 142530 }, { "epoch": 0.5425424206207228, "grad_norm": 0.12222271412611008, "learning_rate": 0.0005, "loss": 2.1078, "step": 142540 }, { "epoch": 0.5425804830888454, "grad_norm": 0.12116561830043793, "learning_rate": 0.0005, "loss": 2.1186, "step": 142550 }, { "epoch": 0.5426185455569681, "grad_norm": 0.13041633367538452, "learning_rate": 0.0005, "loss": 2.1115, "step": 142560 }, { "epoch": 0.5426566080250907, "grad_norm": 0.12237266451120377, "learning_rate": 0.0005, "loss": 2.1073, "step": 142570 }, { "epoch": 0.5426946704932135, "grad_norm": 0.12065397948026657, "learning_rate": 0.0005, "loss": 2.106, "step": 142580 }, { "epoch": 0.5427327329613362, "grad_norm": 0.12925831973552704, "learning_rate": 0.0005, "loss": 2.1004, "step": 142590 }, { "epoch": 0.5427707954294588, "grad_norm": 0.12130697816610336, "learning_rate": 0.0005, "loss": 2.1148, "step": 142600 }, { "epoch": 0.5428088578975815, "grad_norm": 0.1194259449839592, "learning_rate": 0.0005, "loss": 2.1025, "step": 142610 }, { "epoch": 0.5428469203657041, "grad_norm": 0.13163216412067413, "learning_rate": 0.0005, "loss": 2.1121, "step": 142620 }, { "epoch": 0.5428849828338269, "grad_norm": 0.12867335975170135, "learning_rate": 0.0005, "loss": 2.104, "step": 142630 }, { "epoch": 0.5429230453019496, "grad_norm": 0.11896099895238876, "learning_rate": 0.0005, "loss": 2.0988, "step": 142640 }, { "epoch": 0.5429611077700722, "grad_norm": 0.12593241035938263, "learning_rate": 0.0005, "loss": 2.1179, "step": 142650 }, { "epoch": 0.5429991702381949, "grad_norm": 0.14316798746585846, "learning_rate": 0.0005, "loss": 2.1184, "step": 142660 }, { "epoch": 0.5430372327063177, "grad_norm": 0.1276978999376297, "learning_rate": 0.0005, "loss": 2.1265, "step": 142670 }, { "epoch": 0.5430752951744403, "grad_norm": 0.1307539939880371, "learning_rate": 0.0005, "loss": 2.1207, "step": 142680 }, { "epoch": 0.543113357642563, "grad_norm": 0.12179253995418549, "learning_rate": 0.0005, "loss": 2.1065, "step": 142690 }, { "epoch": 0.5431514201106856, "grad_norm": 0.12673813104629517, "learning_rate": 0.0005, "loss": 2.1269, "step": 142700 }, { "epoch": 0.5431894825788084, "grad_norm": 0.12629275023937225, "learning_rate": 0.0005, "loss": 2.102, "step": 142710 }, { "epoch": 0.543227545046931, "grad_norm": 0.11426787823438644, "learning_rate": 0.0005, "loss": 2.1201, "step": 142720 }, { "epoch": 0.5432656075150537, "grad_norm": 0.11684422940015793, "learning_rate": 0.0005, "loss": 2.1177, "step": 142730 }, { "epoch": 0.5433036699831764, "grad_norm": 0.1389496624469757, "learning_rate": 0.0005, "loss": 2.117, "step": 142740 }, { "epoch": 0.543341732451299, "grad_norm": 0.1356937736272812, "learning_rate": 0.0005, "loss": 2.1262, "step": 142750 }, { "epoch": 0.5433797949194218, "grad_norm": 0.12385162711143494, "learning_rate": 0.0005, "loss": 2.1168, "step": 142760 }, { "epoch": 0.5434178573875444, "grad_norm": 0.12453263252973557, "learning_rate": 0.0005, "loss": 2.1067, "step": 142770 }, { "epoch": 0.5434559198556671, "grad_norm": 0.12733960151672363, "learning_rate": 0.0005, "loss": 2.1172, "step": 142780 }, { "epoch": 0.5434939823237898, "grad_norm": 0.1254967898130417, "learning_rate": 0.0005, "loss": 2.1094, "step": 142790 }, { "epoch": 0.5435320447919125, "grad_norm": 0.11459669470787048, "learning_rate": 0.0005, "loss": 2.1191, "step": 142800 }, { "epoch": 0.5435701072600352, "grad_norm": 0.11403346061706543, "learning_rate": 0.0005, "loss": 2.111, "step": 142810 }, { "epoch": 0.5436081697281578, "grad_norm": 0.1326649934053421, "learning_rate": 0.0005, "loss": 2.1002, "step": 142820 }, { "epoch": 0.5436462321962805, "grad_norm": 0.12152086943387985, "learning_rate": 0.0005, "loss": 2.1106, "step": 142830 }, { "epoch": 0.5436842946644033, "grad_norm": 0.11927484720945358, "learning_rate": 0.0005, "loss": 2.1243, "step": 142840 }, { "epoch": 0.5437223571325259, "grad_norm": 0.11907021701335907, "learning_rate": 0.0005, "loss": 2.1049, "step": 142850 }, { "epoch": 0.5437604196006486, "grad_norm": 0.11528225243091583, "learning_rate": 0.0005, "loss": 2.1089, "step": 142860 }, { "epoch": 0.5437984820687712, "grad_norm": 0.12464199960231781, "learning_rate": 0.0005, "loss": 2.1033, "step": 142870 }, { "epoch": 0.5438365445368939, "grad_norm": 0.125784769654274, "learning_rate": 0.0005, "loss": 2.1308, "step": 142880 }, { "epoch": 0.5438746070050167, "grad_norm": 0.11493180692195892, "learning_rate": 0.0005, "loss": 2.1162, "step": 142890 }, { "epoch": 0.5439126694731393, "grad_norm": 0.11990799009799957, "learning_rate": 0.0005, "loss": 2.1002, "step": 142900 }, { "epoch": 0.543950731941262, "grad_norm": 0.1233905702829361, "learning_rate": 0.0005, "loss": 2.1245, "step": 142910 }, { "epoch": 0.5439887944093846, "grad_norm": 0.11480487883090973, "learning_rate": 0.0005, "loss": 2.1007, "step": 142920 }, { "epoch": 0.5440268568775074, "grad_norm": 0.13271062076091766, "learning_rate": 0.0005, "loss": 2.119, "step": 142930 }, { "epoch": 0.5440649193456301, "grad_norm": 0.12418513745069504, "learning_rate": 0.0005, "loss": 2.0973, "step": 142940 }, { "epoch": 0.5441029818137527, "grad_norm": 0.11403152346611023, "learning_rate": 0.0005, "loss": 2.1102, "step": 142950 }, { "epoch": 0.5441410442818754, "grad_norm": 0.11532585322856903, "learning_rate": 0.0005, "loss": 2.1346, "step": 142960 }, { "epoch": 0.5441791067499981, "grad_norm": 0.11966732144355774, "learning_rate": 0.0005, "loss": 2.1165, "step": 142970 }, { "epoch": 0.5442171692181208, "grad_norm": 0.1239163726568222, "learning_rate": 0.0005, "loss": 2.1048, "step": 142980 }, { "epoch": 0.5442552316862435, "grad_norm": 0.12124045938253403, "learning_rate": 0.0005, "loss": 2.1244, "step": 142990 }, { "epoch": 0.5442932941543661, "grad_norm": 0.12653355300426483, "learning_rate": 0.0005, "loss": 2.1115, "step": 143000 }, { "epoch": 0.5443313566224889, "grad_norm": 0.12473955005407333, "learning_rate": 0.0005, "loss": 2.1097, "step": 143010 }, { "epoch": 0.5443694190906115, "grad_norm": 0.12466265261173248, "learning_rate": 0.0005, "loss": 2.1091, "step": 143020 }, { "epoch": 0.5444074815587342, "grad_norm": 0.12356074899435043, "learning_rate": 0.0005, "loss": 2.1128, "step": 143030 }, { "epoch": 0.5444455440268569, "grad_norm": 0.1347591131925583, "learning_rate": 0.0005, "loss": 2.1172, "step": 143040 }, { "epoch": 0.5444836064949795, "grad_norm": 0.1383460909128189, "learning_rate": 0.0005, "loss": 2.0978, "step": 143050 }, { "epoch": 0.5445216689631023, "grad_norm": 0.12867282330989838, "learning_rate": 0.0005, "loss": 2.1251, "step": 143060 }, { "epoch": 0.5445597314312249, "grad_norm": 0.12898746132850647, "learning_rate": 0.0005, "loss": 2.1063, "step": 143070 }, { "epoch": 0.5445977938993476, "grad_norm": 0.13376469910144806, "learning_rate": 0.0005, "loss": 2.1098, "step": 143080 }, { "epoch": 0.5446358563674703, "grad_norm": 0.12445175647735596, "learning_rate": 0.0005, "loss": 2.115, "step": 143090 }, { "epoch": 0.544673918835593, "grad_norm": 0.1341526210308075, "learning_rate": 0.0005, "loss": 2.1156, "step": 143100 }, { "epoch": 0.5447119813037157, "grad_norm": 0.13039885461330414, "learning_rate": 0.0005, "loss": 2.1169, "step": 143110 }, { "epoch": 0.5447500437718383, "grad_norm": 0.11942508816719055, "learning_rate": 0.0005, "loss": 2.1165, "step": 143120 }, { "epoch": 0.544788106239961, "grad_norm": 0.1156260222196579, "learning_rate": 0.0005, "loss": 2.1148, "step": 143130 }, { "epoch": 0.5448261687080838, "grad_norm": 0.12988288700580597, "learning_rate": 0.0005, "loss": 2.1145, "step": 143140 }, { "epoch": 0.5448642311762064, "grad_norm": 0.13214020431041718, "learning_rate": 0.0005, "loss": 2.0997, "step": 143150 }, { "epoch": 0.5449022936443291, "grad_norm": 0.12015897035598755, "learning_rate": 0.0005, "loss": 2.1147, "step": 143160 }, { "epoch": 0.5449403561124517, "grad_norm": 0.12021349370479584, "learning_rate": 0.0005, "loss": 2.1168, "step": 143170 }, { "epoch": 0.5449784185805744, "grad_norm": 0.1231183409690857, "learning_rate": 0.0005, "loss": 2.1055, "step": 143180 }, { "epoch": 0.5450164810486972, "grad_norm": 0.13053947687149048, "learning_rate": 0.0005, "loss": 2.1112, "step": 143190 }, { "epoch": 0.5450545435168198, "grad_norm": 0.1145208552479744, "learning_rate": 0.0005, "loss": 2.1226, "step": 143200 }, { "epoch": 0.5450926059849425, "grad_norm": 0.12140737473964691, "learning_rate": 0.0005, "loss": 2.1247, "step": 143210 }, { "epoch": 0.5451306684530651, "grad_norm": 0.14588138461112976, "learning_rate": 0.0005, "loss": 2.1052, "step": 143220 }, { "epoch": 0.5451687309211879, "grad_norm": 0.1178545281291008, "learning_rate": 0.0005, "loss": 2.1026, "step": 143230 }, { "epoch": 0.5452067933893106, "grad_norm": 0.11969448626041412, "learning_rate": 0.0005, "loss": 2.1176, "step": 143240 }, { "epoch": 0.5452448558574332, "grad_norm": 0.12450990080833435, "learning_rate": 0.0005, "loss": 2.0958, "step": 143250 }, { "epoch": 0.5452829183255559, "grad_norm": 0.12544794380664825, "learning_rate": 0.0005, "loss": 2.1068, "step": 143260 }, { "epoch": 0.5453209807936786, "grad_norm": 0.11299612373113632, "learning_rate": 0.0005, "loss": 2.0959, "step": 143270 }, { "epoch": 0.5453590432618013, "grad_norm": 0.11318644136190414, "learning_rate": 0.0005, "loss": 2.1207, "step": 143280 }, { "epoch": 0.545397105729924, "grad_norm": 0.12822823226451874, "learning_rate": 0.0005, "loss": 2.1054, "step": 143290 }, { "epoch": 0.5454351681980466, "grad_norm": 0.11486469954252243, "learning_rate": 0.0005, "loss": 2.1179, "step": 143300 }, { "epoch": 0.5454732306661693, "grad_norm": 0.14914393424987793, "learning_rate": 0.0005, "loss": 2.1145, "step": 143310 }, { "epoch": 0.545511293134292, "grad_norm": 0.15870623290538788, "learning_rate": 0.0005, "loss": 2.1115, "step": 143320 }, { "epoch": 0.5455493556024147, "grad_norm": 0.15740184485912323, "learning_rate": 0.0005, "loss": 2.12, "step": 143330 }, { "epoch": 0.5455874180705373, "grad_norm": 0.1271182894706726, "learning_rate": 0.0005, "loss": 2.1122, "step": 143340 }, { "epoch": 0.54562548053866, "grad_norm": 0.12603406608104706, "learning_rate": 0.0005, "loss": 2.1167, "step": 143350 }, { "epoch": 0.5456635430067828, "grad_norm": 0.11953301727771759, "learning_rate": 0.0005, "loss": 2.1122, "step": 143360 }, { "epoch": 0.5457016054749054, "grad_norm": 0.1294298619031906, "learning_rate": 0.0005, "loss": 2.123, "step": 143370 }, { "epoch": 0.5457396679430281, "grad_norm": 0.14407844841480255, "learning_rate": 0.0005, "loss": 2.1127, "step": 143380 }, { "epoch": 0.5457777304111507, "grad_norm": 0.6056898236274719, "learning_rate": 0.0005, "loss": 2.1222, "step": 143390 }, { "epoch": 0.5458157928792735, "grad_norm": 0.11539902538061142, "learning_rate": 0.0005, "loss": 2.1055, "step": 143400 }, { "epoch": 0.5458538553473962, "grad_norm": 0.11941809207201004, "learning_rate": 0.0005, "loss": 2.1155, "step": 143410 }, { "epoch": 0.5458919178155188, "grad_norm": 0.12149988114833832, "learning_rate": 0.0005, "loss": 2.1061, "step": 143420 }, { "epoch": 0.5459299802836415, "grad_norm": 0.11598517745733261, "learning_rate": 0.0005, "loss": 2.1172, "step": 143430 }, { "epoch": 0.5459680427517642, "grad_norm": 0.12989123165607452, "learning_rate": 0.0005, "loss": 2.0952, "step": 143440 }, { "epoch": 0.5460061052198869, "grad_norm": 0.13632255792617798, "learning_rate": 0.0005, "loss": 2.1123, "step": 143450 }, { "epoch": 0.5460441676880096, "grad_norm": 0.12819725275039673, "learning_rate": 0.0005, "loss": 2.1074, "step": 143460 }, { "epoch": 0.5460822301561322, "grad_norm": 0.1302596926689148, "learning_rate": 0.0005, "loss": 2.1171, "step": 143470 }, { "epoch": 0.5461202926242549, "grad_norm": 0.11973568797111511, "learning_rate": 0.0005, "loss": 2.1172, "step": 143480 }, { "epoch": 0.5461583550923776, "grad_norm": 0.1287134885787964, "learning_rate": 0.0005, "loss": 2.1053, "step": 143490 }, { "epoch": 0.5461964175605003, "grad_norm": 0.12302163988351822, "learning_rate": 0.0005, "loss": 2.1131, "step": 143500 }, { "epoch": 0.546234480028623, "grad_norm": 0.14660359919071198, "learning_rate": 0.0005, "loss": 2.1133, "step": 143510 }, { "epoch": 0.5462725424967456, "grad_norm": 0.13259245455265045, "learning_rate": 0.0005, "loss": 2.1164, "step": 143520 }, { "epoch": 0.5463106049648684, "grad_norm": 0.1288929283618927, "learning_rate": 0.0005, "loss": 2.1122, "step": 143530 }, { "epoch": 0.546348667432991, "grad_norm": 0.1310984343290329, "learning_rate": 0.0005, "loss": 2.0969, "step": 143540 }, { "epoch": 0.5463867299011137, "grad_norm": 0.121677465736866, "learning_rate": 0.0005, "loss": 2.1159, "step": 143550 }, { "epoch": 0.5464247923692364, "grad_norm": 0.12330687791109085, "learning_rate": 0.0005, "loss": 2.1192, "step": 143560 }, { "epoch": 0.5464628548373591, "grad_norm": 0.1153598204255104, "learning_rate": 0.0005, "loss": 2.1184, "step": 143570 }, { "epoch": 0.5465009173054818, "grad_norm": 0.12227758020162582, "learning_rate": 0.0005, "loss": 2.1123, "step": 143580 }, { "epoch": 0.5465389797736044, "grad_norm": 0.13273516297340393, "learning_rate": 0.0005, "loss": 2.1196, "step": 143590 }, { "epoch": 0.5465770422417271, "grad_norm": 0.1361711323261261, "learning_rate": 0.0005, "loss": 2.1198, "step": 143600 }, { "epoch": 0.5466151047098498, "grad_norm": 0.13460688292980194, "learning_rate": 0.0005, "loss": 2.1085, "step": 143610 }, { "epoch": 0.5466531671779725, "grad_norm": 0.11942264437675476, "learning_rate": 0.0005, "loss": 2.115, "step": 143620 }, { "epoch": 0.5466912296460952, "grad_norm": 0.12050394713878632, "learning_rate": 0.0005, "loss": 2.1111, "step": 143630 }, { "epoch": 0.5467292921142178, "grad_norm": 0.1352856457233429, "learning_rate": 0.0005, "loss": 2.1253, "step": 143640 }, { "epoch": 0.5467673545823405, "grad_norm": 0.12913435697555542, "learning_rate": 0.0005, "loss": 2.1239, "step": 143650 }, { "epoch": 0.5468054170504633, "grad_norm": 0.1403176337480545, "learning_rate": 0.0005, "loss": 2.1259, "step": 143660 }, { "epoch": 0.5468434795185859, "grad_norm": 0.12202751636505127, "learning_rate": 0.0005, "loss": 2.1098, "step": 143670 }, { "epoch": 0.5468815419867086, "grad_norm": 0.13158494234085083, "learning_rate": 0.0005, "loss": 2.1239, "step": 143680 }, { "epoch": 0.5469196044548312, "grad_norm": 0.11883752048015594, "learning_rate": 0.0005, "loss": 2.1047, "step": 143690 }, { "epoch": 0.546957666922954, "grad_norm": 0.12454967200756073, "learning_rate": 0.0005, "loss": 2.1075, "step": 143700 }, { "epoch": 0.5469957293910767, "grad_norm": 0.13018742203712463, "learning_rate": 0.0005, "loss": 2.1177, "step": 143710 }, { "epoch": 0.5470337918591993, "grad_norm": 0.13621671497821808, "learning_rate": 0.0005, "loss": 2.1217, "step": 143720 }, { "epoch": 0.547071854327322, "grad_norm": 0.12384206801652908, "learning_rate": 0.0005, "loss": 2.1211, "step": 143730 }, { "epoch": 0.5471099167954446, "grad_norm": 0.12141196429729462, "learning_rate": 0.0005, "loss": 2.1118, "step": 143740 }, { "epoch": 0.5471479792635674, "grad_norm": 0.12233904749155045, "learning_rate": 0.0005, "loss": 2.1068, "step": 143750 }, { "epoch": 0.54718604173169, "grad_norm": 0.13541485369205475, "learning_rate": 0.0005, "loss": 2.1284, "step": 143760 }, { "epoch": 0.5472241041998127, "grad_norm": 0.12836025655269623, "learning_rate": 0.0005, "loss": 2.1257, "step": 143770 }, { "epoch": 0.5472621666679354, "grad_norm": 0.12747050821781158, "learning_rate": 0.0005, "loss": 2.1284, "step": 143780 }, { "epoch": 0.5473002291360581, "grad_norm": 0.12959854304790497, "learning_rate": 0.0005, "loss": 2.1017, "step": 143790 }, { "epoch": 0.5473382916041808, "grad_norm": 0.12214165925979614, "learning_rate": 0.0005, "loss": 2.1071, "step": 143800 }, { "epoch": 0.5473763540723035, "grad_norm": 0.13413529098033905, "learning_rate": 0.0005, "loss": 2.1237, "step": 143810 }, { "epoch": 0.5474144165404261, "grad_norm": 0.1225849986076355, "learning_rate": 0.0005, "loss": 2.1142, "step": 143820 }, { "epoch": 0.5474524790085489, "grad_norm": 0.13006827235221863, "learning_rate": 0.0005, "loss": 2.116, "step": 143830 }, { "epoch": 0.5474905414766715, "grad_norm": 0.1268097460269928, "learning_rate": 0.0005, "loss": 2.1153, "step": 143840 }, { "epoch": 0.5475286039447942, "grad_norm": 0.1367637664079666, "learning_rate": 0.0005, "loss": 2.1163, "step": 143850 }, { "epoch": 0.5475666664129168, "grad_norm": 0.12964405119419098, "learning_rate": 0.0005, "loss": 2.1072, "step": 143860 }, { "epoch": 0.5476047288810396, "grad_norm": 0.14246685802936554, "learning_rate": 0.0005, "loss": 2.1175, "step": 143870 }, { "epoch": 0.5476427913491623, "grad_norm": 0.14590322971343994, "learning_rate": 0.0005, "loss": 2.1061, "step": 143880 }, { "epoch": 0.5476808538172849, "grad_norm": 0.13974499702453613, "learning_rate": 0.0005, "loss": 2.108, "step": 143890 }, { "epoch": 0.5477189162854076, "grad_norm": 0.12832729518413544, "learning_rate": 0.0005, "loss": 2.1091, "step": 143900 }, { "epoch": 0.5477569787535302, "grad_norm": 0.1358684003353119, "learning_rate": 0.0005, "loss": 2.1041, "step": 143910 }, { "epoch": 0.547795041221653, "grad_norm": 0.11089088767766953, "learning_rate": 0.0005, "loss": 2.1099, "step": 143920 }, { "epoch": 0.5478331036897757, "grad_norm": 0.12461254000663757, "learning_rate": 0.0005, "loss": 2.1122, "step": 143930 }, { "epoch": 0.5478711661578983, "grad_norm": 0.13170070946216583, "learning_rate": 0.0005, "loss": 2.1132, "step": 143940 }, { "epoch": 0.547909228626021, "grad_norm": 0.14326369762420654, "learning_rate": 0.0005, "loss": 2.1222, "step": 143950 }, { "epoch": 0.5479472910941438, "grad_norm": 0.13025934994220734, "learning_rate": 0.0005, "loss": 2.0971, "step": 143960 }, { "epoch": 0.5479853535622664, "grad_norm": 0.13151970505714417, "learning_rate": 0.0005, "loss": 2.1085, "step": 143970 }, { "epoch": 0.5480234160303891, "grad_norm": 0.13055887818336487, "learning_rate": 0.0005, "loss": 2.1138, "step": 143980 }, { "epoch": 0.5480614784985117, "grad_norm": 0.11997563391923904, "learning_rate": 0.0005, "loss": 2.1114, "step": 143990 }, { "epoch": 0.5480995409666345, "grad_norm": 0.13561861217021942, "learning_rate": 0.0005, "loss": 2.1333, "step": 144000 }, { "epoch": 0.5481376034347571, "grad_norm": 0.11981268227100372, "learning_rate": 0.0005, "loss": 2.1219, "step": 144010 }, { "epoch": 0.5481756659028798, "grad_norm": 0.13623487949371338, "learning_rate": 0.0005, "loss": 2.1122, "step": 144020 }, { "epoch": 0.5482137283710025, "grad_norm": 0.13205698132514954, "learning_rate": 0.0005, "loss": 2.1036, "step": 144030 }, { "epoch": 0.5482517908391251, "grad_norm": 0.12684205174446106, "learning_rate": 0.0005, "loss": 2.1255, "step": 144040 }, { "epoch": 0.5482898533072479, "grad_norm": 0.1281534880399704, "learning_rate": 0.0005, "loss": 2.1159, "step": 144050 }, { "epoch": 0.5483279157753705, "grad_norm": 0.14929701387882233, "learning_rate": 0.0005, "loss": 2.1229, "step": 144060 }, { "epoch": 0.5483659782434932, "grad_norm": 0.13052833080291748, "learning_rate": 0.0005, "loss": 2.1411, "step": 144070 }, { "epoch": 0.5484040407116159, "grad_norm": 0.11817128211259842, "learning_rate": 0.0005, "loss": 2.1071, "step": 144080 }, { "epoch": 0.5484421031797386, "grad_norm": 0.12966929376125336, "learning_rate": 0.0005, "loss": 2.1127, "step": 144090 }, { "epoch": 0.5484801656478613, "grad_norm": 0.12929899990558624, "learning_rate": 0.0005, "loss": 2.1032, "step": 144100 }, { "epoch": 0.5485182281159839, "grad_norm": 0.11421916633844376, "learning_rate": 0.0005, "loss": 2.113, "step": 144110 }, { "epoch": 0.5485562905841066, "grad_norm": 0.12206301838159561, "learning_rate": 0.0005, "loss": 2.1146, "step": 144120 }, { "epoch": 0.5485943530522294, "grad_norm": 0.11934498697519302, "learning_rate": 0.0005, "loss": 2.1162, "step": 144130 }, { "epoch": 0.548632415520352, "grad_norm": 0.12408069521188736, "learning_rate": 0.0005, "loss": 2.1188, "step": 144140 }, { "epoch": 0.5486704779884747, "grad_norm": 0.1259586066007614, "learning_rate": 0.0005, "loss": 2.1191, "step": 144150 }, { "epoch": 0.5487085404565973, "grad_norm": 0.12521126866340637, "learning_rate": 0.0005, "loss": 2.1162, "step": 144160 }, { "epoch": 0.5487466029247201, "grad_norm": 0.133035346865654, "learning_rate": 0.0005, "loss": 2.1055, "step": 144170 }, { "epoch": 0.5487846653928428, "grad_norm": 0.1211579367518425, "learning_rate": 0.0005, "loss": 2.1064, "step": 144180 }, { "epoch": 0.5488227278609654, "grad_norm": 0.12172611802816391, "learning_rate": 0.0005, "loss": 2.1134, "step": 144190 }, { "epoch": 0.5488607903290881, "grad_norm": 0.10662341862916946, "learning_rate": 0.0005, "loss": 2.1049, "step": 144200 }, { "epoch": 0.5488988527972107, "grad_norm": 0.12909914553165436, "learning_rate": 0.0005, "loss": 2.117, "step": 144210 }, { "epoch": 0.5489369152653335, "grad_norm": 0.1285155564546585, "learning_rate": 0.0005, "loss": 2.1014, "step": 144220 }, { "epoch": 0.5489749777334562, "grad_norm": 0.12418292462825775, "learning_rate": 0.0005, "loss": 2.0955, "step": 144230 }, { "epoch": 0.5490130402015788, "grad_norm": 0.12626810371875763, "learning_rate": 0.0005, "loss": 2.1282, "step": 144240 }, { "epoch": 0.5490511026697015, "grad_norm": 0.1202656552195549, "learning_rate": 0.0005, "loss": 2.1105, "step": 144250 }, { "epoch": 0.5490891651378242, "grad_norm": 0.12186427414417267, "learning_rate": 0.0005, "loss": 2.1102, "step": 144260 }, { "epoch": 0.5491272276059469, "grad_norm": 0.11100109666585922, "learning_rate": 0.0005, "loss": 2.1092, "step": 144270 }, { "epoch": 0.5491652900740696, "grad_norm": 0.13168954849243164, "learning_rate": 0.0005, "loss": 2.1053, "step": 144280 }, { "epoch": 0.5492033525421922, "grad_norm": 0.129677876830101, "learning_rate": 0.0005, "loss": 2.1039, "step": 144290 }, { "epoch": 0.549241415010315, "grad_norm": 0.12331412732601166, "learning_rate": 0.0005, "loss": 2.1085, "step": 144300 }, { "epoch": 0.5492794774784376, "grad_norm": 0.12670284509658813, "learning_rate": 0.0005, "loss": 2.1049, "step": 144310 }, { "epoch": 0.5493175399465603, "grad_norm": 0.1265244036912918, "learning_rate": 0.0005, "loss": 2.1092, "step": 144320 }, { "epoch": 0.549355602414683, "grad_norm": 0.12023013830184937, "learning_rate": 0.0005, "loss": 2.1152, "step": 144330 }, { "epoch": 0.5493936648828056, "grad_norm": 0.13818997144699097, "learning_rate": 0.0005, "loss": 2.1152, "step": 144340 }, { "epoch": 0.5494317273509284, "grad_norm": 0.11964023113250732, "learning_rate": 0.0005, "loss": 2.0969, "step": 144350 }, { "epoch": 0.549469789819051, "grad_norm": 0.1293233186006546, "learning_rate": 0.0005, "loss": 2.1142, "step": 144360 }, { "epoch": 0.5495078522871737, "grad_norm": 0.12815843522548676, "learning_rate": 0.0005, "loss": 2.1078, "step": 144370 }, { "epoch": 0.5495459147552963, "grad_norm": 0.12119992822408676, "learning_rate": 0.0005, "loss": 2.1097, "step": 144380 }, { "epoch": 0.5495839772234191, "grad_norm": 0.12910519540309906, "learning_rate": 0.0005, "loss": 2.1118, "step": 144390 }, { "epoch": 0.5496220396915418, "grad_norm": 0.12036329507827759, "learning_rate": 0.0005, "loss": 2.1148, "step": 144400 }, { "epoch": 0.5496601021596644, "grad_norm": 0.11510950326919556, "learning_rate": 0.0005, "loss": 2.1175, "step": 144410 }, { "epoch": 0.5496981646277871, "grad_norm": 0.12537814676761627, "learning_rate": 0.0005, "loss": 2.1231, "step": 144420 }, { "epoch": 0.5497362270959099, "grad_norm": 0.12165727466344833, "learning_rate": 0.0005, "loss": 2.1279, "step": 144430 }, { "epoch": 0.5497742895640325, "grad_norm": 0.12165447324514389, "learning_rate": 0.0005, "loss": 2.1201, "step": 144440 }, { "epoch": 0.5498123520321552, "grad_norm": 0.12614521384239197, "learning_rate": 0.0005, "loss": 2.1153, "step": 144450 }, { "epoch": 0.5498504145002778, "grad_norm": 0.11235406249761581, "learning_rate": 0.0005, "loss": 2.1069, "step": 144460 }, { "epoch": 0.5498884769684005, "grad_norm": 0.12776117026805878, "learning_rate": 0.0005, "loss": 2.1164, "step": 144470 }, { "epoch": 0.5499265394365233, "grad_norm": 0.3991162180900574, "learning_rate": 0.0005, "loss": 2.117, "step": 144480 }, { "epoch": 0.5499646019046459, "grad_norm": 0.12186644971370697, "learning_rate": 0.0005, "loss": 2.1263, "step": 144490 }, { "epoch": 0.5500026643727686, "grad_norm": 0.12400142848491669, "learning_rate": 0.0005, "loss": 2.1068, "step": 144500 }, { "epoch": 0.5500407268408912, "grad_norm": 0.12243487685918808, "learning_rate": 0.0005, "loss": 2.116, "step": 144510 }, { "epoch": 0.550078789309014, "grad_norm": 0.12870800495147705, "learning_rate": 0.0005, "loss": 2.1296, "step": 144520 }, { "epoch": 0.5501168517771367, "grad_norm": 0.11797146499156952, "learning_rate": 0.0005, "loss": 2.1118, "step": 144530 }, { "epoch": 0.5501549142452593, "grad_norm": 0.11497573554515839, "learning_rate": 0.0005, "loss": 2.1175, "step": 144540 }, { "epoch": 0.550192976713382, "grad_norm": 0.12143855541944504, "learning_rate": 0.0005, "loss": 2.1058, "step": 144550 }, { "epoch": 0.5502310391815047, "grad_norm": 0.11397191137075424, "learning_rate": 0.0005, "loss": 2.1187, "step": 144560 }, { "epoch": 0.5502691016496274, "grad_norm": 0.13390995562076569, "learning_rate": 0.0005, "loss": 2.1277, "step": 144570 }, { "epoch": 0.55030716411775, "grad_norm": 0.13767433166503906, "learning_rate": 0.0005, "loss": 2.127, "step": 144580 }, { "epoch": 0.5503452265858727, "grad_norm": 0.18040162324905396, "learning_rate": 0.0005, "loss": 2.1286, "step": 144590 }, { "epoch": 0.5503832890539955, "grad_norm": 0.14773793518543243, "learning_rate": 0.0005, "loss": 2.1177, "step": 144600 }, { "epoch": 0.5504213515221181, "grad_norm": 0.12226738035678864, "learning_rate": 0.0005, "loss": 2.1065, "step": 144610 }, { "epoch": 0.5504594139902408, "grad_norm": 0.1189928650856018, "learning_rate": 0.0005, "loss": 2.1162, "step": 144620 }, { "epoch": 0.5504974764583634, "grad_norm": 0.11632554978132248, "learning_rate": 0.0005, "loss": 2.1043, "step": 144630 }, { "epoch": 0.5505355389264861, "grad_norm": 0.11748391389846802, "learning_rate": 0.0005, "loss": 2.1105, "step": 144640 }, { "epoch": 0.5505736013946089, "grad_norm": 0.144392192363739, "learning_rate": 0.0005, "loss": 2.1287, "step": 144650 }, { "epoch": 0.5506116638627315, "grad_norm": 0.15021365880966187, "learning_rate": 0.0005, "loss": 2.1213, "step": 144660 }, { "epoch": 0.5506497263308542, "grad_norm": 0.14086973667144775, "learning_rate": 0.0005, "loss": 2.1242, "step": 144670 }, { "epoch": 0.5506877887989768, "grad_norm": 0.1304117888212204, "learning_rate": 0.0005, "loss": 2.1065, "step": 144680 }, { "epoch": 0.5507258512670996, "grad_norm": 0.12264396250247955, "learning_rate": 0.0005, "loss": 2.1258, "step": 144690 }, { "epoch": 0.5507639137352223, "grad_norm": 0.1307545155286789, "learning_rate": 0.0005, "loss": 2.1161, "step": 144700 }, { "epoch": 0.5508019762033449, "grad_norm": 0.12935005128383636, "learning_rate": 0.0005, "loss": 2.1217, "step": 144710 }, { "epoch": 0.5508400386714676, "grad_norm": 0.13241122663021088, "learning_rate": 0.0005, "loss": 2.1143, "step": 144720 }, { "epoch": 0.5508781011395903, "grad_norm": 0.11758055537939072, "learning_rate": 0.0005, "loss": 2.1027, "step": 144730 }, { "epoch": 0.550916163607713, "grad_norm": 0.12281011044979095, "learning_rate": 0.0005, "loss": 2.1145, "step": 144740 }, { "epoch": 0.5509542260758357, "grad_norm": 0.1272137612104416, "learning_rate": 0.0005, "loss": 2.1168, "step": 144750 }, { "epoch": 0.5509922885439583, "grad_norm": 0.12607891857624054, "learning_rate": 0.0005, "loss": 2.1231, "step": 144760 }, { "epoch": 0.551030351012081, "grad_norm": 0.12848597764968872, "learning_rate": 0.0005, "loss": 2.113, "step": 144770 }, { "epoch": 0.5510684134802037, "grad_norm": 0.13169367611408234, "learning_rate": 0.0005, "loss": 2.1113, "step": 144780 }, { "epoch": 0.5511064759483264, "grad_norm": 0.11452927440404892, "learning_rate": 0.0005, "loss": 2.1075, "step": 144790 }, { "epoch": 0.5511445384164491, "grad_norm": 0.12549881637096405, "learning_rate": 0.0005, "loss": 2.1132, "step": 144800 }, { "epoch": 0.5511826008845717, "grad_norm": 0.12136809527873993, "learning_rate": 0.0005, "loss": 2.1207, "step": 144810 }, { "epoch": 0.5512206633526945, "grad_norm": 0.12465189397335052, "learning_rate": 0.0005, "loss": 2.1129, "step": 144820 }, { "epoch": 0.5512587258208171, "grad_norm": 0.12369824945926666, "learning_rate": 0.0005, "loss": 2.1157, "step": 144830 }, { "epoch": 0.5512967882889398, "grad_norm": 0.11682531982660294, "learning_rate": 0.0005, "loss": 2.115, "step": 144840 }, { "epoch": 0.5513348507570625, "grad_norm": 0.13062603771686554, "learning_rate": 0.0005, "loss": 2.113, "step": 144850 }, { "epoch": 0.5513729132251852, "grad_norm": 0.11250148713588715, "learning_rate": 0.0005, "loss": 2.1164, "step": 144860 }, { "epoch": 0.5514109756933079, "grad_norm": 0.1234857439994812, "learning_rate": 0.0005, "loss": 2.1178, "step": 144870 }, { "epoch": 0.5514490381614305, "grad_norm": 0.12878267467021942, "learning_rate": 0.0005, "loss": 2.1159, "step": 144880 }, { "epoch": 0.5514871006295532, "grad_norm": 0.11966361850500107, "learning_rate": 0.0005, "loss": 2.1138, "step": 144890 }, { "epoch": 0.5515251630976759, "grad_norm": 0.11235833168029785, "learning_rate": 0.0005, "loss": 2.1256, "step": 144900 }, { "epoch": 0.5515632255657986, "grad_norm": 0.1246689185500145, "learning_rate": 0.0005, "loss": 2.1194, "step": 144910 }, { "epoch": 0.5516012880339213, "grad_norm": 0.12522974610328674, "learning_rate": 0.0005, "loss": 2.1241, "step": 144920 }, { "epoch": 0.5516393505020439, "grad_norm": 0.139484241604805, "learning_rate": 0.0005, "loss": 2.1214, "step": 144930 }, { "epoch": 0.5516774129701666, "grad_norm": 0.13401883840560913, "learning_rate": 0.0005, "loss": 2.1097, "step": 144940 }, { "epoch": 0.5517154754382894, "grad_norm": 0.12002041935920715, "learning_rate": 0.0005, "loss": 2.1102, "step": 144950 }, { "epoch": 0.551753537906412, "grad_norm": 0.11653312295675278, "learning_rate": 0.0005, "loss": 2.1151, "step": 144960 }, { "epoch": 0.5517916003745347, "grad_norm": 0.12327057123184204, "learning_rate": 0.0005, "loss": 2.1189, "step": 144970 }, { "epoch": 0.5518296628426573, "grad_norm": 0.1284448206424713, "learning_rate": 0.0005, "loss": 2.1233, "step": 144980 }, { "epoch": 0.5518677253107801, "grad_norm": 0.15450535714626312, "learning_rate": 0.0005, "loss": 2.1039, "step": 144990 }, { "epoch": 0.5519057877789028, "grad_norm": 0.12576664984226227, "learning_rate": 0.0005, "loss": 2.1105, "step": 145000 }, { "epoch": 0.5519438502470254, "grad_norm": 0.11548066139221191, "learning_rate": 0.0005, "loss": 2.0971, "step": 145010 }, { "epoch": 0.5519819127151481, "grad_norm": 0.12530051171779633, "learning_rate": 0.0005, "loss": 2.1169, "step": 145020 }, { "epoch": 0.5520199751832708, "grad_norm": 0.12279170006513596, "learning_rate": 0.0005, "loss": 2.1033, "step": 145030 }, { "epoch": 0.5520580376513935, "grad_norm": 0.12480257451534271, "learning_rate": 0.0005, "loss": 2.0981, "step": 145040 }, { "epoch": 0.5520961001195162, "grad_norm": 0.2465040534734726, "learning_rate": 0.0005, "loss": 2.0929, "step": 145050 }, { "epoch": 0.5521341625876388, "grad_norm": 0.12566375732421875, "learning_rate": 0.0005, "loss": 2.1277, "step": 145060 }, { "epoch": 0.5521722250557615, "grad_norm": 0.12638990581035614, "learning_rate": 0.0005, "loss": 2.1121, "step": 145070 }, { "epoch": 0.5522102875238842, "grad_norm": 0.12483334541320801, "learning_rate": 0.0005, "loss": 2.1111, "step": 145080 }, { "epoch": 0.5522483499920069, "grad_norm": 0.13051484525203705, "learning_rate": 0.0005, "loss": 2.1215, "step": 145090 }, { "epoch": 0.5522864124601295, "grad_norm": 0.13386259973049164, "learning_rate": 0.0005, "loss": 2.1053, "step": 145100 }, { "epoch": 0.5523244749282522, "grad_norm": 0.12317392975091934, "learning_rate": 0.0005, "loss": 2.1075, "step": 145110 }, { "epoch": 0.552362537396375, "grad_norm": 0.11211195588111877, "learning_rate": 0.0005, "loss": 2.109, "step": 145120 }, { "epoch": 0.5524005998644976, "grad_norm": 0.128127783536911, "learning_rate": 0.0005, "loss": 2.1114, "step": 145130 }, { "epoch": 0.5524386623326203, "grad_norm": 0.1159677729010582, "learning_rate": 0.0005, "loss": 2.1058, "step": 145140 }, { "epoch": 0.552476724800743, "grad_norm": 0.1285521537065506, "learning_rate": 0.0005, "loss": 2.1253, "step": 145150 }, { "epoch": 0.5525147872688657, "grad_norm": 0.11685515195131302, "learning_rate": 0.0005, "loss": 2.1179, "step": 145160 }, { "epoch": 0.5525528497369884, "grad_norm": 0.14790181815624237, "learning_rate": 0.0005, "loss": 2.1146, "step": 145170 }, { "epoch": 0.552590912205111, "grad_norm": 0.14180521667003632, "learning_rate": 0.0005, "loss": 2.1194, "step": 145180 }, { "epoch": 0.5526289746732337, "grad_norm": 0.12453516572713852, "learning_rate": 0.0005, "loss": 2.1108, "step": 145190 }, { "epoch": 0.5526670371413563, "grad_norm": 0.1201147809624672, "learning_rate": 0.0005, "loss": 2.1368, "step": 145200 }, { "epoch": 0.5527050996094791, "grad_norm": 0.12104005366563797, "learning_rate": 0.0005, "loss": 2.1057, "step": 145210 }, { "epoch": 0.5527431620776018, "grad_norm": 0.11533419787883759, "learning_rate": 0.0005, "loss": 2.1068, "step": 145220 }, { "epoch": 0.5527812245457244, "grad_norm": 0.12507320940494537, "learning_rate": 0.0005, "loss": 2.1201, "step": 145230 }, { "epoch": 0.5528192870138471, "grad_norm": 0.12377454340457916, "learning_rate": 0.0005, "loss": 2.1006, "step": 145240 }, { "epoch": 0.5528573494819699, "grad_norm": 0.13272665441036224, "learning_rate": 0.0005, "loss": 2.1079, "step": 145250 }, { "epoch": 0.5528954119500925, "grad_norm": 0.12817463278770447, "learning_rate": 0.0005, "loss": 2.1238, "step": 145260 }, { "epoch": 0.5529334744182152, "grad_norm": 0.11743728816509247, "learning_rate": 0.0005, "loss": 2.1014, "step": 145270 }, { "epoch": 0.5529715368863378, "grad_norm": 0.13543175160884857, "learning_rate": 0.0005, "loss": 2.1161, "step": 145280 }, { "epoch": 0.5530095993544606, "grad_norm": 0.13341118395328522, "learning_rate": 0.0005, "loss": 2.1195, "step": 145290 }, { "epoch": 0.5530476618225832, "grad_norm": 0.1958242803812027, "learning_rate": 0.0005, "loss": 2.1158, "step": 145300 }, { "epoch": 0.5530857242907059, "grad_norm": 0.12311431765556335, "learning_rate": 0.0005, "loss": 2.122, "step": 145310 }, { "epoch": 0.5531237867588286, "grad_norm": 0.11194714903831482, "learning_rate": 0.0005, "loss": 2.1244, "step": 145320 }, { "epoch": 0.5531618492269512, "grad_norm": 0.1213444247841835, "learning_rate": 0.0005, "loss": 2.098, "step": 145330 }, { "epoch": 0.553199911695074, "grad_norm": 0.11784545332193375, "learning_rate": 0.0005, "loss": 2.1177, "step": 145340 }, { "epoch": 0.5532379741631966, "grad_norm": 0.1280663013458252, "learning_rate": 0.0005, "loss": 2.1182, "step": 145350 }, { "epoch": 0.5532760366313193, "grad_norm": 0.13075262308120728, "learning_rate": 0.0005, "loss": 2.117, "step": 145360 }, { "epoch": 0.553314099099442, "grad_norm": 0.14177240431308746, "learning_rate": 0.0005, "loss": 2.1138, "step": 145370 }, { "epoch": 0.5533521615675647, "grad_norm": 0.17335639894008636, "learning_rate": 0.0005, "loss": 2.1034, "step": 145380 }, { "epoch": 0.5533902240356874, "grad_norm": 0.12055764347314835, "learning_rate": 0.0005, "loss": 2.1119, "step": 145390 }, { "epoch": 0.55342828650381, "grad_norm": 0.12622298300266266, "learning_rate": 0.0005, "loss": 2.1094, "step": 145400 }, { "epoch": 0.5534663489719327, "grad_norm": 0.13050603866577148, "learning_rate": 0.0005, "loss": 2.1116, "step": 145410 }, { "epoch": 0.5535044114400555, "grad_norm": 0.12804704904556274, "learning_rate": 0.0005, "loss": 2.0984, "step": 145420 }, { "epoch": 0.5535424739081781, "grad_norm": 0.13104109466075897, "learning_rate": 0.0005, "loss": 2.1191, "step": 145430 }, { "epoch": 0.5535805363763008, "grad_norm": 0.11165295541286469, "learning_rate": 0.0005, "loss": 2.1112, "step": 145440 }, { "epoch": 0.5536185988444234, "grad_norm": 0.12978315353393555, "learning_rate": 0.0005, "loss": 2.0952, "step": 145450 }, { "epoch": 0.5536566613125462, "grad_norm": 0.1414872109889984, "learning_rate": 0.0005, "loss": 2.1171, "step": 145460 }, { "epoch": 0.5536947237806689, "grad_norm": 0.12297790497541428, "learning_rate": 0.0005, "loss": 2.109, "step": 145470 }, { "epoch": 0.5537327862487915, "grad_norm": 0.12759721279144287, "learning_rate": 0.0005, "loss": 2.1227, "step": 145480 }, { "epoch": 0.5537708487169142, "grad_norm": 0.11224611848592758, "learning_rate": 0.0005, "loss": 2.103, "step": 145490 }, { "epoch": 0.5538089111850368, "grad_norm": 0.13177062571048737, "learning_rate": 0.0005, "loss": 2.1145, "step": 145500 }, { "epoch": 0.5538469736531596, "grad_norm": 0.12307994067668915, "learning_rate": 0.0005, "loss": 2.1142, "step": 145510 }, { "epoch": 0.5538850361212823, "grad_norm": 0.13711430132389069, "learning_rate": 0.0005, "loss": 2.1087, "step": 145520 }, { "epoch": 0.5539230985894049, "grad_norm": 0.12840867042541504, "learning_rate": 0.0005, "loss": 2.1172, "step": 145530 }, { "epoch": 0.5539611610575276, "grad_norm": 0.14088504016399384, "learning_rate": 0.0005, "loss": 2.1149, "step": 145540 }, { "epoch": 0.5539992235256503, "grad_norm": 0.1343139261007309, "learning_rate": 0.0005, "loss": 2.1208, "step": 145550 }, { "epoch": 0.554037285993773, "grad_norm": 0.11925197392702103, "learning_rate": 0.0005, "loss": 2.1242, "step": 145560 }, { "epoch": 0.5540753484618957, "grad_norm": 0.12442201375961304, "learning_rate": 0.0005, "loss": 2.1059, "step": 145570 }, { "epoch": 0.5541134109300183, "grad_norm": 0.11571851372718811, "learning_rate": 0.0005, "loss": 2.1083, "step": 145580 }, { "epoch": 0.5541514733981411, "grad_norm": 0.12373834103345871, "learning_rate": 0.0005, "loss": 2.109, "step": 145590 }, { "epoch": 0.5541895358662637, "grad_norm": 0.1327885538339615, "learning_rate": 0.0005, "loss": 2.11, "step": 145600 }, { "epoch": 0.5542275983343864, "grad_norm": 0.13805131614208221, "learning_rate": 0.0005, "loss": 2.1285, "step": 145610 }, { "epoch": 0.554265660802509, "grad_norm": 0.12871071696281433, "learning_rate": 0.0005, "loss": 2.1163, "step": 145620 }, { "epoch": 0.5543037232706317, "grad_norm": 0.3121941387653351, "learning_rate": 0.0005, "loss": 2.1193, "step": 145630 }, { "epoch": 0.5543417857387545, "grad_norm": 0.12596552073955536, "learning_rate": 0.0005, "loss": 2.1153, "step": 145640 }, { "epoch": 0.5543798482068771, "grad_norm": 0.12760427594184875, "learning_rate": 0.0005, "loss": 2.0993, "step": 145650 }, { "epoch": 0.5544179106749998, "grad_norm": 0.11807743459939957, "learning_rate": 0.0005, "loss": 2.1218, "step": 145660 }, { "epoch": 0.5544559731431224, "grad_norm": 0.12287381291389465, "learning_rate": 0.0005, "loss": 2.1164, "step": 145670 }, { "epoch": 0.5544940356112452, "grad_norm": 0.12048117816448212, "learning_rate": 0.0005, "loss": 2.1225, "step": 145680 }, { "epoch": 0.5545320980793679, "grad_norm": 0.12428100407123566, "learning_rate": 0.0005, "loss": 2.1202, "step": 145690 }, { "epoch": 0.5545701605474905, "grad_norm": 0.11523248255252838, "learning_rate": 0.0005, "loss": 2.1015, "step": 145700 }, { "epoch": 0.5546082230156132, "grad_norm": 0.11993500590324402, "learning_rate": 0.0005, "loss": 2.117, "step": 145710 }, { "epoch": 0.554646285483736, "grad_norm": 0.12582233548164368, "learning_rate": 0.0005, "loss": 2.1165, "step": 145720 }, { "epoch": 0.5546843479518586, "grad_norm": 0.13026317954063416, "learning_rate": 0.0005, "loss": 2.118, "step": 145730 }, { "epoch": 0.5547224104199813, "grad_norm": 0.13820697367191315, "learning_rate": 0.0005, "loss": 2.1055, "step": 145740 }, { "epoch": 0.5547604728881039, "grad_norm": 0.12032381445169449, "learning_rate": 0.0005, "loss": 2.1044, "step": 145750 }, { "epoch": 0.5547985353562266, "grad_norm": 0.12701494991779327, "learning_rate": 0.0005, "loss": 2.1034, "step": 145760 }, { "epoch": 0.5548365978243494, "grad_norm": 0.12934359908103943, "learning_rate": 0.0005, "loss": 2.0975, "step": 145770 }, { "epoch": 0.554874660292472, "grad_norm": 0.1251780092716217, "learning_rate": 0.0005, "loss": 2.1138, "step": 145780 }, { "epoch": 0.5549127227605947, "grad_norm": 0.12064220756292343, "learning_rate": 0.0005, "loss": 2.1098, "step": 145790 }, { "epoch": 0.5549507852287173, "grad_norm": 0.12095960229635239, "learning_rate": 0.0005, "loss": 2.1008, "step": 145800 }, { "epoch": 0.5549888476968401, "grad_norm": 0.11686307191848755, "learning_rate": 0.0005, "loss": 2.1156, "step": 145810 }, { "epoch": 0.5550269101649628, "grad_norm": 0.13717177510261536, "learning_rate": 0.0005, "loss": 2.1127, "step": 145820 }, { "epoch": 0.5550649726330854, "grad_norm": 0.13584278523921967, "learning_rate": 0.0005, "loss": 2.1177, "step": 145830 }, { "epoch": 0.5551030351012081, "grad_norm": 0.14092442393302917, "learning_rate": 0.0005, "loss": 2.1326, "step": 145840 }, { "epoch": 0.5551410975693308, "grad_norm": 0.13244536519050598, "learning_rate": 0.0005, "loss": 2.1163, "step": 145850 }, { "epoch": 0.5551791600374535, "grad_norm": 0.12420519441366196, "learning_rate": 0.0005, "loss": 2.1157, "step": 145860 }, { "epoch": 0.5552172225055761, "grad_norm": 0.14705169200897217, "learning_rate": 0.0005, "loss": 2.1161, "step": 145870 }, { "epoch": 0.5552552849736988, "grad_norm": 0.12188060581684113, "learning_rate": 0.0005, "loss": 2.1126, "step": 145880 }, { "epoch": 0.5552933474418216, "grad_norm": 0.14049747586250305, "learning_rate": 0.0005, "loss": 2.121, "step": 145890 }, { "epoch": 0.5553314099099442, "grad_norm": 0.12535642087459564, "learning_rate": 0.0005, "loss": 2.1249, "step": 145900 }, { "epoch": 0.5553694723780669, "grad_norm": 0.11171269416809082, "learning_rate": 0.0005, "loss": 2.125, "step": 145910 }, { "epoch": 0.5554075348461895, "grad_norm": 0.1396142989397049, "learning_rate": 0.0005, "loss": 2.109, "step": 145920 }, { "epoch": 0.5554455973143122, "grad_norm": 0.1270921230316162, "learning_rate": 0.0005, "loss": 2.1123, "step": 145930 }, { "epoch": 0.555483659782435, "grad_norm": 0.11808069795370102, "learning_rate": 0.0005, "loss": 2.1162, "step": 145940 }, { "epoch": 0.5555217222505576, "grad_norm": 0.12320335954427719, "learning_rate": 0.0005, "loss": 2.1068, "step": 145950 }, { "epoch": 0.5555597847186803, "grad_norm": 0.12735216319561005, "learning_rate": 0.0005, "loss": 2.1139, "step": 145960 }, { "epoch": 0.5555978471868029, "grad_norm": 0.1367887407541275, "learning_rate": 0.0005, "loss": 2.1088, "step": 145970 }, { "epoch": 0.5556359096549257, "grad_norm": 0.1248806044459343, "learning_rate": 0.0005, "loss": 2.1259, "step": 145980 }, { "epoch": 0.5556739721230484, "grad_norm": 0.13130532205104828, "learning_rate": 0.0005, "loss": 2.101, "step": 145990 }, { "epoch": 0.555712034591171, "grad_norm": 0.13192979991436005, "learning_rate": 0.0005, "loss": 2.103, "step": 146000 }, { "epoch": 0.5557500970592937, "grad_norm": 0.12497913092374802, "learning_rate": 0.0005, "loss": 2.1091, "step": 146010 }, { "epoch": 0.5557881595274164, "grad_norm": 0.1344774216413498, "learning_rate": 0.0005, "loss": 2.1257, "step": 146020 }, { "epoch": 0.5558262219955391, "grad_norm": 0.11685974895954132, "learning_rate": 0.0005, "loss": 2.1256, "step": 146030 }, { "epoch": 0.5558642844636618, "grad_norm": 0.11300405114889145, "learning_rate": 0.0005, "loss": 2.1105, "step": 146040 }, { "epoch": 0.5559023469317844, "grad_norm": 0.11893622577190399, "learning_rate": 0.0005, "loss": 2.1107, "step": 146050 }, { "epoch": 0.5559404093999071, "grad_norm": 0.1360451728105545, "learning_rate": 0.0005, "loss": 2.122, "step": 146060 }, { "epoch": 0.5559784718680298, "grad_norm": 0.1295030117034912, "learning_rate": 0.0005, "loss": 2.1229, "step": 146070 }, { "epoch": 0.5560165343361525, "grad_norm": 0.13259471952915192, "learning_rate": 0.0005, "loss": 2.1185, "step": 146080 }, { "epoch": 0.5560545968042752, "grad_norm": 0.1290517896413803, "learning_rate": 0.0005, "loss": 2.1322, "step": 146090 }, { "epoch": 0.5560926592723978, "grad_norm": 0.1371571272611618, "learning_rate": 0.0005, "loss": 2.1255, "step": 146100 }, { "epoch": 0.5561307217405206, "grad_norm": 0.1228359118103981, "learning_rate": 0.0005, "loss": 2.1103, "step": 146110 }, { "epoch": 0.5561687842086432, "grad_norm": 0.13620351254940033, "learning_rate": 0.0005, "loss": 2.1122, "step": 146120 }, { "epoch": 0.5562068466767659, "grad_norm": 0.12080255150794983, "learning_rate": 0.0005, "loss": 2.1154, "step": 146130 }, { "epoch": 0.5562449091448886, "grad_norm": 0.13528335094451904, "learning_rate": 0.0005, "loss": 2.1178, "step": 146140 }, { "epoch": 0.5562829716130113, "grad_norm": 0.1401769369840622, "learning_rate": 0.0005, "loss": 2.1158, "step": 146150 }, { "epoch": 0.556321034081134, "grad_norm": 0.14545312523841858, "learning_rate": 0.0005, "loss": 2.1173, "step": 146160 }, { "epoch": 0.5563590965492566, "grad_norm": 0.12032944709062576, "learning_rate": 0.0005, "loss": 2.1051, "step": 146170 }, { "epoch": 0.5563971590173793, "grad_norm": 0.13641439378261566, "learning_rate": 0.0005, "loss": 2.1183, "step": 146180 }, { "epoch": 0.556435221485502, "grad_norm": 0.12659136950969696, "learning_rate": 0.0005, "loss": 2.1212, "step": 146190 }, { "epoch": 0.5564732839536247, "grad_norm": 0.13044819235801697, "learning_rate": 0.0005, "loss": 2.1188, "step": 146200 }, { "epoch": 0.5565113464217474, "grad_norm": 0.12508030235767365, "learning_rate": 0.0005, "loss": 2.1136, "step": 146210 }, { "epoch": 0.55654940888987, "grad_norm": 0.12736937403678894, "learning_rate": 0.0005, "loss": 2.1061, "step": 146220 }, { "epoch": 0.5565874713579927, "grad_norm": 0.11774712800979614, "learning_rate": 0.0005, "loss": 2.1155, "step": 146230 }, { "epoch": 0.5566255338261155, "grad_norm": 0.1205480620265007, "learning_rate": 0.0005, "loss": 2.121, "step": 146240 }, { "epoch": 0.5566635962942381, "grad_norm": 0.13173139095306396, "learning_rate": 0.0005, "loss": 2.0966, "step": 146250 }, { "epoch": 0.5567016587623608, "grad_norm": 0.11364980787038803, "learning_rate": 0.0005, "loss": 2.1019, "step": 146260 }, { "epoch": 0.5567397212304834, "grad_norm": 0.12746919691562653, "learning_rate": 0.0005, "loss": 2.1155, "step": 146270 }, { "epoch": 0.5567777836986062, "grad_norm": 0.11526639014482498, "learning_rate": 0.0005, "loss": 2.0978, "step": 146280 }, { "epoch": 0.5568158461667289, "grad_norm": 0.12240004539489746, "learning_rate": 0.0005, "loss": 2.1107, "step": 146290 }, { "epoch": 0.5568539086348515, "grad_norm": 0.17326214909553528, "learning_rate": 0.0005, "loss": 2.1311, "step": 146300 }, { "epoch": 0.5568919711029742, "grad_norm": 0.12917761504650116, "learning_rate": 0.0005, "loss": 2.1097, "step": 146310 }, { "epoch": 0.5569300335710969, "grad_norm": 0.13682439923286438, "learning_rate": 0.0005, "loss": 2.0972, "step": 146320 }, { "epoch": 0.5569680960392196, "grad_norm": 0.12188564985990524, "learning_rate": 0.0005, "loss": 2.098, "step": 146330 }, { "epoch": 0.5570061585073423, "grad_norm": 0.14334385097026825, "learning_rate": 0.0005, "loss": 2.1091, "step": 146340 }, { "epoch": 0.5570442209754649, "grad_norm": 0.12378527969121933, "learning_rate": 0.0005, "loss": 2.0958, "step": 146350 }, { "epoch": 0.5570822834435876, "grad_norm": 0.11779329925775528, "learning_rate": 0.0005, "loss": 2.1174, "step": 146360 }, { "epoch": 0.5571203459117103, "grad_norm": 0.12320224195718765, "learning_rate": 0.0005, "loss": 2.1087, "step": 146370 }, { "epoch": 0.557158408379833, "grad_norm": 0.13029901683330536, "learning_rate": 0.0005, "loss": 2.0994, "step": 146380 }, { "epoch": 0.5571964708479556, "grad_norm": 0.12013163417577744, "learning_rate": 0.0005, "loss": 2.1112, "step": 146390 }, { "epoch": 0.5572345333160783, "grad_norm": 0.11994480341672897, "learning_rate": 0.0005, "loss": 2.1187, "step": 146400 }, { "epoch": 0.5572725957842011, "grad_norm": 0.1187630295753479, "learning_rate": 0.0005, "loss": 2.1109, "step": 146410 }, { "epoch": 0.5573106582523237, "grad_norm": 0.1293669044971466, "learning_rate": 0.0005, "loss": 2.1082, "step": 146420 }, { "epoch": 0.5573487207204464, "grad_norm": 0.11685261875391006, "learning_rate": 0.0005, "loss": 2.1212, "step": 146430 }, { "epoch": 0.557386783188569, "grad_norm": 0.12134901434183121, "learning_rate": 0.0005, "loss": 2.1298, "step": 146440 }, { "epoch": 0.5574248456566918, "grad_norm": 0.13090312480926514, "learning_rate": 0.0005, "loss": 2.114, "step": 146450 }, { "epoch": 0.5574629081248145, "grad_norm": 0.12619557976722717, "learning_rate": 0.0005, "loss": 2.1305, "step": 146460 }, { "epoch": 0.5575009705929371, "grad_norm": 0.13939683139324188, "learning_rate": 0.0005, "loss": 2.1169, "step": 146470 }, { "epoch": 0.5575390330610598, "grad_norm": 0.1275961995124817, "learning_rate": 0.0005, "loss": 2.1178, "step": 146480 }, { "epoch": 0.5575770955291824, "grad_norm": 0.13163341581821442, "learning_rate": 0.0005, "loss": 2.115, "step": 146490 }, { "epoch": 0.5576151579973052, "grad_norm": 0.1348702758550644, "learning_rate": 0.0005, "loss": 2.1176, "step": 146500 }, { "epoch": 0.5576532204654279, "grad_norm": 0.12150271236896515, "learning_rate": 0.0005, "loss": 2.1189, "step": 146510 }, { "epoch": 0.5576912829335505, "grad_norm": 0.12395389378070831, "learning_rate": 0.0005, "loss": 2.1073, "step": 146520 }, { "epoch": 0.5577293454016732, "grad_norm": 0.1396559327840805, "learning_rate": 0.0005, "loss": 2.1157, "step": 146530 }, { "epoch": 0.557767407869796, "grad_norm": 0.1329929679632187, "learning_rate": 0.0005, "loss": 2.1068, "step": 146540 }, { "epoch": 0.5578054703379186, "grad_norm": 0.12346814572811127, "learning_rate": 0.0005, "loss": 2.1296, "step": 146550 }, { "epoch": 0.5578435328060413, "grad_norm": 0.12914222478866577, "learning_rate": 0.0005, "loss": 2.1166, "step": 146560 }, { "epoch": 0.5578815952741639, "grad_norm": 0.14826074242591858, "learning_rate": 0.0005, "loss": 2.1158, "step": 146570 }, { "epoch": 0.5579196577422867, "grad_norm": 0.13070404529571533, "learning_rate": 0.0005, "loss": 2.1221, "step": 146580 }, { "epoch": 0.5579577202104093, "grad_norm": 0.12242259830236435, "learning_rate": 0.0005, "loss": 2.1082, "step": 146590 }, { "epoch": 0.557995782678532, "grad_norm": 0.12169618159532547, "learning_rate": 0.0005, "loss": 2.0966, "step": 146600 }, { "epoch": 0.5580338451466547, "grad_norm": 0.1209753081202507, "learning_rate": 0.0005, "loss": 2.1084, "step": 146610 }, { "epoch": 0.5580719076147773, "grad_norm": 0.12649326026439667, "learning_rate": 0.0005, "loss": 2.1332, "step": 146620 }, { "epoch": 0.5581099700829001, "grad_norm": 0.12578131258487701, "learning_rate": 0.0005, "loss": 2.1021, "step": 146630 }, { "epoch": 0.5581480325510227, "grad_norm": 0.12273520231246948, "learning_rate": 0.0005, "loss": 2.115, "step": 146640 }, { "epoch": 0.5581860950191454, "grad_norm": 0.12749134004116058, "learning_rate": 0.0005, "loss": 2.1078, "step": 146650 }, { "epoch": 0.5582241574872681, "grad_norm": 0.11976244300603867, "learning_rate": 0.0005, "loss": 2.1083, "step": 146660 }, { "epoch": 0.5582622199553908, "grad_norm": 0.12862452864646912, "learning_rate": 0.0005, "loss": 2.1093, "step": 146670 }, { "epoch": 0.5583002824235135, "grad_norm": 0.13949202001094818, "learning_rate": 0.0005, "loss": 2.1096, "step": 146680 }, { "epoch": 0.5583383448916361, "grad_norm": 0.13123486936092377, "learning_rate": 0.0005, "loss": 2.1114, "step": 146690 }, { "epoch": 0.5583764073597588, "grad_norm": 0.1350037157535553, "learning_rate": 0.0005, "loss": 2.1262, "step": 146700 }, { "epoch": 0.5584144698278816, "grad_norm": 0.1257314532995224, "learning_rate": 0.0005, "loss": 2.1005, "step": 146710 }, { "epoch": 0.5584525322960042, "grad_norm": 0.12179521471261978, "learning_rate": 0.0005, "loss": 2.1161, "step": 146720 }, { "epoch": 0.5584905947641269, "grad_norm": 0.11762476712465286, "learning_rate": 0.0005, "loss": 2.1059, "step": 146730 }, { "epoch": 0.5585286572322495, "grad_norm": 0.13207073509693146, "learning_rate": 0.0005, "loss": 2.122, "step": 146740 }, { "epoch": 0.5585667197003723, "grad_norm": 0.12246715277433395, "learning_rate": 0.0005, "loss": 2.1143, "step": 146750 }, { "epoch": 0.558604782168495, "grad_norm": 0.1236710399389267, "learning_rate": 0.0005, "loss": 2.1089, "step": 146760 }, { "epoch": 0.5586428446366176, "grad_norm": 0.1190788596868515, "learning_rate": 0.0005, "loss": 2.103, "step": 146770 }, { "epoch": 0.5586809071047403, "grad_norm": 0.11813774704933167, "learning_rate": 0.0005, "loss": 2.1182, "step": 146780 }, { "epoch": 0.5587189695728629, "grad_norm": 0.13377505540847778, "learning_rate": 0.0005, "loss": 2.1131, "step": 146790 }, { "epoch": 0.5587570320409857, "grad_norm": 0.12410465627908707, "learning_rate": 0.0005, "loss": 2.1125, "step": 146800 }, { "epoch": 0.5587950945091084, "grad_norm": 0.1365984082221985, "learning_rate": 0.0005, "loss": 2.1151, "step": 146810 }, { "epoch": 0.558833156977231, "grad_norm": 0.12833617627620697, "learning_rate": 0.0005, "loss": 2.1105, "step": 146820 }, { "epoch": 0.5588712194453537, "grad_norm": 0.12997734546661377, "learning_rate": 0.0005, "loss": 2.1125, "step": 146830 }, { "epoch": 0.5589092819134764, "grad_norm": 0.11692407727241516, "learning_rate": 0.0005, "loss": 2.109, "step": 146840 }, { "epoch": 0.5589473443815991, "grad_norm": 0.1247381642460823, "learning_rate": 0.0005, "loss": 2.1071, "step": 146850 }, { "epoch": 0.5589854068497218, "grad_norm": 0.12471222877502441, "learning_rate": 0.0005, "loss": 2.1134, "step": 146860 }, { "epoch": 0.5590234693178444, "grad_norm": 0.1211012452840805, "learning_rate": 0.0005, "loss": 2.1108, "step": 146870 }, { "epoch": 0.5590615317859672, "grad_norm": 0.13737298548221588, "learning_rate": 0.0005, "loss": 2.1081, "step": 146880 }, { "epoch": 0.5590995942540898, "grad_norm": 0.13684974610805511, "learning_rate": 0.0005, "loss": 2.1103, "step": 146890 }, { "epoch": 0.5591376567222125, "grad_norm": 0.12505057454109192, "learning_rate": 0.0005, "loss": 2.1063, "step": 146900 }, { "epoch": 0.5591757191903352, "grad_norm": 0.1270734667778015, "learning_rate": 0.0005, "loss": 2.1143, "step": 146910 }, { "epoch": 0.5592137816584578, "grad_norm": 0.13607655465602875, "learning_rate": 0.0005, "loss": 2.1092, "step": 146920 }, { "epoch": 0.5592518441265806, "grad_norm": 0.13621363043785095, "learning_rate": 0.0005, "loss": 2.1134, "step": 146930 }, { "epoch": 0.5592899065947032, "grad_norm": 0.13767369091510773, "learning_rate": 0.0005, "loss": 2.1088, "step": 146940 }, { "epoch": 0.5593279690628259, "grad_norm": 0.12254630774259567, "learning_rate": 0.0005, "loss": 2.1143, "step": 146950 }, { "epoch": 0.5593660315309485, "grad_norm": 0.1332453489303589, "learning_rate": 0.0005, "loss": 2.108, "step": 146960 }, { "epoch": 0.5594040939990713, "grad_norm": 0.1357196569442749, "learning_rate": 0.0005, "loss": 2.1225, "step": 146970 }, { "epoch": 0.559442156467194, "grad_norm": 0.1272253692150116, "learning_rate": 0.0005, "loss": 2.1156, "step": 146980 }, { "epoch": 0.5594802189353166, "grad_norm": 0.11661429703235626, "learning_rate": 0.0005, "loss": 2.0916, "step": 146990 }, { "epoch": 0.5595182814034393, "grad_norm": 0.14878034591674805, "learning_rate": 0.0005, "loss": 2.1111, "step": 147000 }, { "epoch": 0.5595563438715621, "grad_norm": 0.12362154573202133, "learning_rate": 0.0005, "loss": 2.1109, "step": 147010 }, { "epoch": 0.5595944063396847, "grad_norm": 0.1282384991645813, "learning_rate": 0.0005, "loss": 2.1191, "step": 147020 }, { "epoch": 0.5596324688078074, "grad_norm": 0.13063131272792816, "learning_rate": 0.0005, "loss": 2.1238, "step": 147030 }, { "epoch": 0.55967053127593, "grad_norm": 0.1230950877070427, "learning_rate": 0.0005, "loss": 2.1279, "step": 147040 }, { "epoch": 0.5597085937440527, "grad_norm": 0.12868686020374298, "learning_rate": 0.0005, "loss": 2.1162, "step": 147050 }, { "epoch": 0.5597466562121755, "grad_norm": 0.11605887115001678, "learning_rate": 0.0005, "loss": 2.1134, "step": 147060 }, { "epoch": 0.5597847186802981, "grad_norm": 0.12333370745182037, "learning_rate": 0.0005, "loss": 2.115, "step": 147070 }, { "epoch": 0.5598227811484208, "grad_norm": 0.13515153527259827, "learning_rate": 0.0005, "loss": 2.1224, "step": 147080 }, { "epoch": 0.5598608436165434, "grad_norm": 0.12148472666740417, "learning_rate": 0.0005, "loss": 2.1164, "step": 147090 }, { "epoch": 0.5598989060846662, "grad_norm": 0.13153775036334991, "learning_rate": 0.0005, "loss": 2.128, "step": 147100 }, { "epoch": 0.5599369685527888, "grad_norm": 0.2740803360939026, "learning_rate": 0.0005, "loss": 2.1205, "step": 147110 }, { "epoch": 0.5599750310209115, "grad_norm": 0.13909249007701874, "learning_rate": 0.0005, "loss": 2.1001, "step": 147120 }, { "epoch": 0.5600130934890342, "grad_norm": 0.12917034327983856, "learning_rate": 0.0005, "loss": 2.109, "step": 147130 }, { "epoch": 0.5600511559571569, "grad_norm": 0.12254467606544495, "learning_rate": 0.0005, "loss": 2.123, "step": 147140 }, { "epoch": 0.5600892184252796, "grad_norm": 0.12786157429218292, "learning_rate": 0.0005, "loss": 2.1156, "step": 147150 }, { "epoch": 0.5601272808934022, "grad_norm": 0.12340975552797318, "learning_rate": 0.0005, "loss": 2.1073, "step": 147160 }, { "epoch": 0.5601653433615249, "grad_norm": 0.12096070498228073, "learning_rate": 0.0005, "loss": 2.1316, "step": 147170 }, { "epoch": 0.5602034058296477, "grad_norm": 0.11434274911880493, "learning_rate": 0.0005, "loss": 2.1036, "step": 147180 }, { "epoch": 0.5602414682977703, "grad_norm": 0.1330031007528305, "learning_rate": 0.0005, "loss": 2.1144, "step": 147190 }, { "epoch": 0.560279530765893, "grad_norm": 0.12533064186573029, "learning_rate": 0.0005, "loss": 2.1016, "step": 147200 }, { "epoch": 0.5603175932340156, "grad_norm": 0.12945376336574554, "learning_rate": 0.0005, "loss": 2.1107, "step": 147210 }, { "epoch": 0.5603556557021383, "grad_norm": 0.12761011719703674, "learning_rate": 0.0005, "loss": 2.1156, "step": 147220 }, { "epoch": 0.5603937181702611, "grad_norm": 0.1295965164899826, "learning_rate": 0.0005, "loss": 2.1077, "step": 147230 }, { "epoch": 0.5604317806383837, "grad_norm": 0.12540018558502197, "learning_rate": 0.0005, "loss": 2.1288, "step": 147240 }, { "epoch": 0.5604698431065064, "grad_norm": 0.11819574236869812, "learning_rate": 0.0005, "loss": 2.1081, "step": 147250 }, { "epoch": 0.560507905574629, "grad_norm": 0.12074295431375504, "learning_rate": 0.0005, "loss": 2.1047, "step": 147260 }, { "epoch": 0.5605459680427518, "grad_norm": 0.1224203109741211, "learning_rate": 0.0005, "loss": 2.1053, "step": 147270 }, { "epoch": 0.5605840305108745, "grad_norm": 0.11983948200941086, "learning_rate": 0.0005, "loss": 2.1305, "step": 147280 }, { "epoch": 0.5606220929789971, "grad_norm": 0.12321861833333969, "learning_rate": 0.0005, "loss": 2.1106, "step": 147290 }, { "epoch": 0.5606601554471198, "grad_norm": 0.1276361495256424, "learning_rate": 0.0005, "loss": 2.1204, "step": 147300 }, { "epoch": 0.5606982179152425, "grad_norm": 0.1272909790277481, "learning_rate": 0.0005, "loss": 2.0989, "step": 147310 }, { "epoch": 0.5607362803833652, "grad_norm": 0.1353839486837387, "learning_rate": 0.0005, "loss": 2.1207, "step": 147320 }, { "epoch": 0.5607743428514879, "grad_norm": 0.11415134370326996, "learning_rate": 0.0005, "loss": 2.1144, "step": 147330 }, { "epoch": 0.5608124053196105, "grad_norm": 0.13879957795143127, "learning_rate": 0.0005, "loss": 2.1093, "step": 147340 }, { "epoch": 0.5608504677877332, "grad_norm": 0.11635793745517731, "learning_rate": 0.0005, "loss": 2.0991, "step": 147350 }, { "epoch": 0.5608885302558559, "grad_norm": 0.12324373424053192, "learning_rate": 0.0005, "loss": 2.1266, "step": 147360 }, { "epoch": 0.5609265927239786, "grad_norm": 0.12618057429790497, "learning_rate": 0.0005, "loss": 2.1105, "step": 147370 }, { "epoch": 0.5609646551921013, "grad_norm": 0.13643816113471985, "learning_rate": 0.0005, "loss": 2.098, "step": 147380 }, { "epoch": 0.5610027176602239, "grad_norm": 0.11734917014837265, "learning_rate": 0.0005, "loss": 2.1145, "step": 147390 }, { "epoch": 0.5610407801283467, "grad_norm": 0.12413015961647034, "learning_rate": 0.0005, "loss": 2.125, "step": 147400 }, { "epoch": 0.5610788425964693, "grad_norm": 0.11574844270944595, "learning_rate": 0.0005, "loss": 2.1213, "step": 147410 }, { "epoch": 0.561116905064592, "grad_norm": 0.11728766560554504, "learning_rate": 0.0005, "loss": 2.1137, "step": 147420 }, { "epoch": 0.5611549675327147, "grad_norm": 0.1308804303407669, "learning_rate": 0.0005, "loss": 2.1108, "step": 147430 }, { "epoch": 0.5611930300008374, "grad_norm": 0.12309185415506363, "learning_rate": 0.0005, "loss": 2.115, "step": 147440 }, { "epoch": 0.5612310924689601, "grad_norm": 0.13514117896556854, "learning_rate": 0.0005, "loss": 2.1238, "step": 147450 }, { "epoch": 0.5612691549370827, "grad_norm": 0.14108632504940033, "learning_rate": 0.0005, "loss": 2.1028, "step": 147460 }, { "epoch": 0.5613072174052054, "grad_norm": 0.12130303680896759, "learning_rate": 0.0005, "loss": 2.1111, "step": 147470 }, { "epoch": 0.561345279873328, "grad_norm": 0.124043770134449, "learning_rate": 0.0005, "loss": 2.1331, "step": 147480 }, { "epoch": 0.5613833423414508, "grad_norm": 0.12163038551807404, "learning_rate": 0.0005, "loss": 2.1033, "step": 147490 }, { "epoch": 0.5614214048095735, "grad_norm": 0.11444005370140076, "learning_rate": 0.0005, "loss": 2.1289, "step": 147500 }, { "epoch": 0.5614594672776961, "grad_norm": 0.12023451179265976, "learning_rate": 0.0005, "loss": 2.0972, "step": 147510 }, { "epoch": 0.5614975297458188, "grad_norm": 0.12952552735805511, "learning_rate": 0.0005, "loss": 2.1243, "step": 147520 }, { "epoch": 0.5615355922139416, "grad_norm": 0.14416095614433289, "learning_rate": 0.0005, "loss": 2.1169, "step": 147530 }, { "epoch": 0.5615736546820642, "grad_norm": 0.14359638094902039, "learning_rate": 0.0005, "loss": 2.1129, "step": 147540 }, { "epoch": 0.5616117171501869, "grad_norm": 0.13709759712219238, "learning_rate": 0.0005, "loss": 2.1088, "step": 147550 }, { "epoch": 0.5616497796183095, "grad_norm": 0.11415136605501175, "learning_rate": 0.0005, "loss": 2.1193, "step": 147560 }, { "epoch": 0.5616878420864323, "grad_norm": 0.13422194123268127, "learning_rate": 0.0005, "loss": 2.1172, "step": 147570 }, { "epoch": 0.561725904554555, "grad_norm": 0.12092873454093933, "learning_rate": 0.0005, "loss": 2.1048, "step": 147580 }, { "epoch": 0.5617639670226776, "grad_norm": 0.12244142591953278, "learning_rate": 0.0005, "loss": 2.1177, "step": 147590 }, { "epoch": 0.5618020294908003, "grad_norm": 0.12528356909751892, "learning_rate": 0.0005, "loss": 2.1057, "step": 147600 }, { "epoch": 0.561840091958923, "grad_norm": 0.12519213557243347, "learning_rate": 0.0005, "loss": 2.1087, "step": 147610 }, { "epoch": 0.5618781544270457, "grad_norm": 0.13673320412635803, "learning_rate": 0.0005, "loss": 2.1253, "step": 147620 }, { "epoch": 0.5619162168951684, "grad_norm": 0.12346883863210678, "learning_rate": 0.0005, "loss": 2.1188, "step": 147630 }, { "epoch": 0.561954279363291, "grad_norm": 0.1161339059472084, "learning_rate": 0.0005, "loss": 2.1205, "step": 147640 }, { "epoch": 0.5619923418314137, "grad_norm": 0.11981845647096634, "learning_rate": 0.0005, "loss": 2.1268, "step": 147650 }, { "epoch": 0.5620304042995364, "grad_norm": 0.12061472982168198, "learning_rate": 0.0005, "loss": 2.122, "step": 147660 }, { "epoch": 0.5620684667676591, "grad_norm": 0.12434431165456772, "learning_rate": 0.0005, "loss": 2.097, "step": 147670 }, { "epoch": 0.5621065292357817, "grad_norm": 0.11918957531452179, "learning_rate": 0.0005, "loss": 2.1112, "step": 147680 }, { "epoch": 0.5621445917039044, "grad_norm": 0.12601889669895172, "learning_rate": 0.0005, "loss": 2.1045, "step": 147690 }, { "epoch": 0.5621826541720272, "grad_norm": 0.12405750900506973, "learning_rate": 0.0005, "loss": 2.1171, "step": 147700 }, { "epoch": 0.5622207166401498, "grad_norm": 0.14284063875675201, "learning_rate": 0.0005, "loss": 2.1178, "step": 147710 }, { "epoch": 0.5622587791082725, "grad_norm": 0.11972171068191528, "learning_rate": 0.0005, "loss": 2.1224, "step": 147720 }, { "epoch": 0.5622968415763951, "grad_norm": 0.1217430904507637, "learning_rate": 0.0005, "loss": 2.13, "step": 147730 }, { "epoch": 0.5623349040445179, "grad_norm": 0.11169090121984482, "learning_rate": 0.0005, "loss": 2.1213, "step": 147740 }, { "epoch": 0.5623729665126406, "grad_norm": 0.1246361956000328, "learning_rate": 0.0005, "loss": 2.0933, "step": 147750 }, { "epoch": 0.5624110289807632, "grad_norm": 0.11976980417966843, "learning_rate": 0.0005, "loss": 2.1027, "step": 147760 }, { "epoch": 0.5624490914488859, "grad_norm": 0.14023859798908234, "learning_rate": 0.0005, "loss": 2.1142, "step": 147770 }, { "epoch": 0.5624871539170085, "grad_norm": 0.12163596600294113, "learning_rate": 0.0005, "loss": 2.1109, "step": 147780 }, { "epoch": 0.5625252163851313, "grad_norm": 0.11780829727649689, "learning_rate": 0.0005, "loss": 2.1295, "step": 147790 }, { "epoch": 0.562563278853254, "grad_norm": 0.12326356023550034, "learning_rate": 0.0005, "loss": 2.1083, "step": 147800 }, { "epoch": 0.5626013413213766, "grad_norm": 0.13496778905391693, "learning_rate": 0.0005, "loss": 2.1013, "step": 147810 }, { "epoch": 0.5626394037894993, "grad_norm": 0.12595048546791077, "learning_rate": 0.0005, "loss": 2.1026, "step": 147820 }, { "epoch": 0.562677466257622, "grad_norm": 0.13264413177967072, "learning_rate": 0.0005, "loss": 2.0981, "step": 147830 }, { "epoch": 0.5627155287257447, "grad_norm": 0.13527928292751312, "learning_rate": 0.0005, "loss": 2.1285, "step": 147840 }, { "epoch": 0.5627535911938674, "grad_norm": 0.12819808721542358, "learning_rate": 0.0005, "loss": 2.0945, "step": 147850 }, { "epoch": 0.56279165366199, "grad_norm": 0.12501229345798492, "learning_rate": 0.0005, "loss": 2.1227, "step": 147860 }, { "epoch": 0.5628297161301128, "grad_norm": 0.12441595643758774, "learning_rate": 0.0005, "loss": 2.1275, "step": 147870 }, { "epoch": 0.5628677785982354, "grad_norm": 0.12710030376911163, "learning_rate": 0.0005, "loss": 2.1124, "step": 147880 }, { "epoch": 0.5629058410663581, "grad_norm": 0.14186739921569824, "learning_rate": 0.0005, "loss": 2.1331, "step": 147890 }, { "epoch": 0.5629439035344808, "grad_norm": 0.14794635772705078, "learning_rate": 0.0005, "loss": 2.1152, "step": 147900 }, { "epoch": 0.5629819660026034, "grad_norm": 0.13675430417060852, "learning_rate": 0.0005, "loss": 2.1037, "step": 147910 }, { "epoch": 0.5630200284707262, "grad_norm": 0.11546150594949722, "learning_rate": 0.0005, "loss": 2.1229, "step": 147920 }, { "epoch": 0.5630580909388488, "grad_norm": 0.1285984367132187, "learning_rate": 0.0005, "loss": 2.12, "step": 147930 }, { "epoch": 0.5630961534069715, "grad_norm": 0.12132029980421066, "learning_rate": 0.0005, "loss": 2.1184, "step": 147940 }, { "epoch": 0.5631342158750942, "grad_norm": 0.12189318239688873, "learning_rate": 0.0005, "loss": 2.1106, "step": 147950 }, { "epoch": 0.5631722783432169, "grad_norm": 0.11721985042095184, "learning_rate": 0.0005, "loss": 2.0968, "step": 147960 }, { "epoch": 0.5632103408113396, "grad_norm": 0.11647754162549973, "learning_rate": 0.0005, "loss": 2.1039, "step": 147970 }, { "epoch": 0.5632484032794622, "grad_norm": 0.12019306421279907, "learning_rate": 0.0005, "loss": 2.1078, "step": 147980 }, { "epoch": 0.5632864657475849, "grad_norm": 0.12273525446653366, "learning_rate": 0.0005, "loss": 2.1201, "step": 147990 }, { "epoch": 0.5633245282157077, "grad_norm": 0.13190962374210358, "learning_rate": 0.0005, "loss": 2.1093, "step": 148000 }, { "epoch": 0.5633625906838303, "grad_norm": 0.12351113557815552, "learning_rate": 0.0005, "loss": 2.1193, "step": 148010 }, { "epoch": 0.563400653151953, "grad_norm": 0.1404343992471695, "learning_rate": 0.0005, "loss": 2.1205, "step": 148020 }, { "epoch": 0.5634387156200756, "grad_norm": 0.11794932931661606, "learning_rate": 0.0005, "loss": 2.1108, "step": 148030 }, { "epoch": 0.5634767780881984, "grad_norm": 0.13164691627025604, "learning_rate": 0.0005, "loss": 2.117, "step": 148040 }, { "epoch": 0.5635148405563211, "grad_norm": 0.10908240079879761, "learning_rate": 0.0005, "loss": 2.0936, "step": 148050 }, { "epoch": 0.5635529030244437, "grad_norm": 0.12172287702560425, "learning_rate": 0.0005, "loss": 2.1258, "step": 148060 }, { "epoch": 0.5635909654925664, "grad_norm": 0.12808212637901306, "learning_rate": 0.0005, "loss": 2.1149, "step": 148070 }, { "epoch": 0.563629027960689, "grad_norm": 0.11926170438528061, "learning_rate": 0.0005, "loss": 2.1112, "step": 148080 }, { "epoch": 0.5636670904288118, "grad_norm": 0.13597628474235535, "learning_rate": 0.0005, "loss": 2.1206, "step": 148090 }, { "epoch": 0.5637051528969345, "grad_norm": 0.1296033412218094, "learning_rate": 0.0005, "loss": 2.1163, "step": 148100 }, { "epoch": 0.5637432153650571, "grad_norm": 0.12396147847175598, "learning_rate": 0.0005, "loss": 2.0986, "step": 148110 }, { "epoch": 0.5637812778331798, "grad_norm": 0.11653433740139008, "learning_rate": 0.0005, "loss": 2.0996, "step": 148120 }, { "epoch": 0.5638193403013025, "grad_norm": 0.14345014095306396, "learning_rate": 0.0005, "loss": 2.1143, "step": 148130 }, { "epoch": 0.5638574027694252, "grad_norm": 0.11987555772066116, "learning_rate": 0.0005, "loss": 2.105, "step": 148140 }, { "epoch": 0.5638954652375479, "grad_norm": 0.12629404664039612, "learning_rate": 0.0005, "loss": 2.1073, "step": 148150 }, { "epoch": 0.5639335277056705, "grad_norm": 0.12913820147514343, "learning_rate": 0.0005, "loss": 2.1192, "step": 148160 }, { "epoch": 0.5639715901737933, "grad_norm": 0.1157715693116188, "learning_rate": 0.0005, "loss": 2.1167, "step": 148170 }, { "epoch": 0.5640096526419159, "grad_norm": 0.12015217542648315, "learning_rate": 0.0005, "loss": 2.1176, "step": 148180 }, { "epoch": 0.5640477151100386, "grad_norm": 0.12450938671827316, "learning_rate": 0.0005, "loss": 2.1004, "step": 148190 }, { "epoch": 0.5640857775781613, "grad_norm": 0.12859109044075012, "learning_rate": 0.0005, "loss": 2.116, "step": 148200 }, { "epoch": 0.5641238400462839, "grad_norm": 0.11546629667282104, "learning_rate": 0.0005, "loss": 2.1132, "step": 148210 }, { "epoch": 0.5641619025144067, "grad_norm": 0.1237310841679573, "learning_rate": 0.0005, "loss": 2.1241, "step": 148220 }, { "epoch": 0.5641999649825293, "grad_norm": 0.1317415088415146, "learning_rate": 0.0005, "loss": 2.1128, "step": 148230 }, { "epoch": 0.564238027450652, "grad_norm": 0.12628450989723206, "learning_rate": 0.0005, "loss": 2.1202, "step": 148240 }, { "epoch": 0.5642760899187746, "grad_norm": 0.12909241020679474, "learning_rate": 0.0005, "loss": 2.113, "step": 148250 }, { "epoch": 0.5643141523868974, "grad_norm": 0.13340359926223755, "learning_rate": 0.0005, "loss": 2.1022, "step": 148260 }, { "epoch": 0.5643522148550201, "grad_norm": 0.14159567654132843, "learning_rate": 0.0005, "loss": 2.1084, "step": 148270 }, { "epoch": 0.5643902773231427, "grad_norm": 0.13016550242900848, "learning_rate": 0.0005, "loss": 2.1097, "step": 148280 }, { "epoch": 0.5644283397912654, "grad_norm": 0.11598911881446838, "learning_rate": 0.0005, "loss": 2.1154, "step": 148290 }, { "epoch": 0.5644664022593882, "grad_norm": 0.12047997862100601, "learning_rate": 0.0005, "loss": 2.094, "step": 148300 }, { "epoch": 0.5645044647275108, "grad_norm": 0.12963908910751343, "learning_rate": 0.0005, "loss": 2.1072, "step": 148310 }, { "epoch": 0.5645425271956335, "grad_norm": 0.12290691584348679, "learning_rate": 0.0005, "loss": 2.1198, "step": 148320 }, { "epoch": 0.5645805896637561, "grad_norm": 0.11939645558595657, "learning_rate": 0.0005, "loss": 2.1219, "step": 148330 }, { "epoch": 0.5646186521318788, "grad_norm": 0.11704593151807785, "learning_rate": 0.0005, "loss": 2.1256, "step": 148340 }, { "epoch": 0.5646567146000016, "grad_norm": 0.12034741789102554, "learning_rate": 0.0005, "loss": 2.1179, "step": 148350 }, { "epoch": 0.5646947770681242, "grad_norm": 0.12463753670454025, "learning_rate": 0.0005, "loss": 2.109, "step": 148360 }, { "epoch": 0.5647328395362469, "grad_norm": 0.12364073097705841, "learning_rate": 0.0005, "loss": 2.1111, "step": 148370 }, { "epoch": 0.5647709020043695, "grad_norm": 0.11811131983995438, "learning_rate": 0.0005, "loss": 2.1076, "step": 148380 }, { "epoch": 0.5648089644724923, "grad_norm": 0.12201013416051865, "learning_rate": 0.0005, "loss": 2.1237, "step": 148390 }, { "epoch": 0.564847026940615, "grad_norm": 0.11526204645633698, "learning_rate": 0.0005, "loss": 2.0983, "step": 148400 }, { "epoch": 0.5648850894087376, "grad_norm": 0.6108805537223816, "learning_rate": 0.0005, "loss": 2.1038, "step": 148410 }, { "epoch": 0.5649231518768603, "grad_norm": 0.12102359533309937, "learning_rate": 0.0005, "loss": 2.0993, "step": 148420 }, { "epoch": 0.564961214344983, "grad_norm": 0.12056120485067368, "learning_rate": 0.0005, "loss": 2.1137, "step": 148430 }, { "epoch": 0.5649992768131057, "grad_norm": 0.1187172681093216, "learning_rate": 0.0005, "loss": 2.1064, "step": 148440 }, { "epoch": 0.5650373392812283, "grad_norm": 0.11827126145362854, "learning_rate": 0.0005, "loss": 2.1144, "step": 148450 }, { "epoch": 0.565075401749351, "grad_norm": 0.11755429953336716, "learning_rate": 0.0005, "loss": 2.1228, "step": 148460 }, { "epoch": 0.5651134642174738, "grad_norm": 0.12420319765806198, "learning_rate": 0.0005, "loss": 2.1099, "step": 148470 }, { "epoch": 0.5651515266855964, "grad_norm": 0.1221015527844429, "learning_rate": 0.0005, "loss": 2.1131, "step": 148480 }, { "epoch": 0.5651895891537191, "grad_norm": 0.14070142805576324, "learning_rate": 0.0005, "loss": 2.1076, "step": 148490 }, { "epoch": 0.5652276516218417, "grad_norm": 0.12251606583595276, "learning_rate": 0.0005, "loss": 2.1088, "step": 148500 }, { "epoch": 0.5652657140899644, "grad_norm": 0.11810902506113052, "learning_rate": 0.0005, "loss": 2.1153, "step": 148510 }, { "epoch": 0.5653037765580872, "grad_norm": 0.13781926035881042, "learning_rate": 0.0005, "loss": 2.0982, "step": 148520 }, { "epoch": 0.5653418390262098, "grad_norm": 0.1113312691450119, "learning_rate": 0.0005, "loss": 2.1193, "step": 148530 }, { "epoch": 0.5653799014943325, "grad_norm": 0.12936291098594666, "learning_rate": 0.0005, "loss": 2.1276, "step": 148540 }, { "epoch": 0.5654179639624551, "grad_norm": 0.1260511428117752, "learning_rate": 0.0005, "loss": 2.113, "step": 148550 }, { "epoch": 0.5654560264305779, "grad_norm": 0.12644599378108978, "learning_rate": 0.0005, "loss": 2.1262, "step": 148560 }, { "epoch": 0.5654940888987006, "grad_norm": 0.11645796149969101, "learning_rate": 0.0005, "loss": 2.1234, "step": 148570 }, { "epoch": 0.5655321513668232, "grad_norm": 0.1230463981628418, "learning_rate": 0.0005, "loss": 2.1197, "step": 148580 }, { "epoch": 0.5655702138349459, "grad_norm": 0.128582164645195, "learning_rate": 0.0005, "loss": 2.1094, "step": 148590 }, { "epoch": 0.5656082763030686, "grad_norm": 0.11796011030673981, "learning_rate": 0.0005, "loss": 2.1031, "step": 148600 }, { "epoch": 0.5656463387711913, "grad_norm": 0.12276263535022736, "learning_rate": 0.0005, "loss": 2.1161, "step": 148610 }, { "epoch": 0.565684401239314, "grad_norm": 0.12811756134033203, "learning_rate": 0.0005, "loss": 2.1209, "step": 148620 }, { "epoch": 0.5657224637074366, "grad_norm": 0.12802930176258087, "learning_rate": 0.0005, "loss": 2.1228, "step": 148630 }, { "epoch": 0.5657605261755593, "grad_norm": 0.12110596895217896, "learning_rate": 0.0005, "loss": 2.1204, "step": 148640 }, { "epoch": 0.565798588643682, "grad_norm": 0.12279154360294342, "learning_rate": 0.0005, "loss": 2.1076, "step": 148650 }, { "epoch": 0.5658366511118047, "grad_norm": 0.1164965108036995, "learning_rate": 0.0005, "loss": 2.1232, "step": 148660 }, { "epoch": 0.5658747135799274, "grad_norm": 0.12058350443840027, "learning_rate": 0.0005, "loss": 2.1271, "step": 148670 }, { "epoch": 0.56591277604805, "grad_norm": 0.12220282107591629, "learning_rate": 0.0005, "loss": 2.0994, "step": 148680 }, { "epoch": 0.5659508385161728, "grad_norm": 0.11955209821462631, "learning_rate": 0.0005, "loss": 2.1148, "step": 148690 }, { "epoch": 0.5659889009842954, "grad_norm": 0.12738743424415588, "learning_rate": 0.0005, "loss": 2.1254, "step": 148700 }, { "epoch": 0.5660269634524181, "grad_norm": 0.13051724433898926, "learning_rate": 0.0005, "loss": 2.1272, "step": 148710 }, { "epoch": 0.5660650259205408, "grad_norm": 0.12039772421121597, "learning_rate": 0.0005, "loss": 2.1107, "step": 148720 }, { "epoch": 0.5661030883886635, "grad_norm": 0.11901012808084488, "learning_rate": 0.0005, "loss": 2.105, "step": 148730 }, { "epoch": 0.5661411508567862, "grad_norm": 0.11077872663736343, "learning_rate": 0.0005, "loss": 2.1133, "step": 148740 }, { "epoch": 0.5661792133249088, "grad_norm": 0.14071735739707947, "learning_rate": 0.0005, "loss": 2.1193, "step": 148750 }, { "epoch": 0.5662172757930315, "grad_norm": 0.14127027988433838, "learning_rate": 0.0005, "loss": 2.1296, "step": 148760 }, { "epoch": 0.5662553382611542, "grad_norm": 0.1209738478064537, "learning_rate": 0.0005, "loss": 2.1076, "step": 148770 }, { "epoch": 0.5662934007292769, "grad_norm": 0.13998672366142273, "learning_rate": 0.0005, "loss": 2.1387, "step": 148780 }, { "epoch": 0.5663314631973996, "grad_norm": 0.14784862101078033, "learning_rate": 0.0005, "loss": 2.1222, "step": 148790 }, { "epoch": 0.5663695256655222, "grad_norm": 0.11674334108829498, "learning_rate": 0.0005, "loss": 2.1039, "step": 148800 }, { "epoch": 0.5664075881336449, "grad_norm": 0.1463528871536255, "learning_rate": 0.0005, "loss": 2.1128, "step": 148810 }, { "epoch": 0.5664456506017677, "grad_norm": 0.1308635026216507, "learning_rate": 0.0005, "loss": 2.1217, "step": 148820 }, { "epoch": 0.5664837130698903, "grad_norm": 0.12974272668361664, "learning_rate": 0.0005, "loss": 2.1115, "step": 148830 }, { "epoch": 0.566521775538013, "grad_norm": 0.1228189468383789, "learning_rate": 0.0005, "loss": 2.1115, "step": 148840 }, { "epoch": 0.5665598380061356, "grad_norm": 0.1328430473804474, "learning_rate": 0.0005, "loss": 2.1238, "step": 148850 }, { "epoch": 0.5665979004742584, "grad_norm": 0.12181210517883301, "learning_rate": 0.0005, "loss": 2.1098, "step": 148860 }, { "epoch": 0.566635962942381, "grad_norm": 0.13458064198493958, "learning_rate": 0.0005, "loss": 2.1121, "step": 148870 }, { "epoch": 0.5666740254105037, "grad_norm": 0.12336578220129013, "learning_rate": 0.0005, "loss": 2.1201, "step": 148880 }, { "epoch": 0.5667120878786264, "grad_norm": 0.11937737464904785, "learning_rate": 0.0005, "loss": 2.1216, "step": 148890 }, { "epoch": 0.5667501503467491, "grad_norm": 0.13529595732688904, "learning_rate": 0.0005, "loss": 2.0974, "step": 148900 }, { "epoch": 0.5667882128148718, "grad_norm": 0.1208646222949028, "learning_rate": 0.0005, "loss": 2.1084, "step": 148910 }, { "epoch": 0.5668262752829945, "grad_norm": 0.13353469967842102, "learning_rate": 0.0005, "loss": 2.1085, "step": 148920 }, { "epoch": 0.5668643377511171, "grad_norm": 0.13542310893535614, "learning_rate": 0.0005, "loss": 2.1164, "step": 148930 }, { "epoch": 0.5669024002192398, "grad_norm": 0.11974187195301056, "learning_rate": 0.0005, "loss": 2.1123, "step": 148940 }, { "epoch": 0.5669404626873625, "grad_norm": 0.11809533834457397, "learning_rate": 0.0005, "loss": 2.0951, "step": 148950 }, { "epoch": 0.5669785251554852, "grad_norm": 0.12082237005233765, "learning_rate": 0.0005, "loss": 2.1193, "step": 148960 }, { "epoch": 0.5670165876236078, "grad_norm": 0.13240136206150055, "learning_rate": 0.0005, "loss": 2.1123, "step": 148970 }, { "epoch": 0.5670546500917305, "grad_norm": 0.13582897186279297, "learning_rate": 0.0005, "loss": 2.1091, "step": 148980 }, { "epoch": 0.5670927125598533, "grad_norm": 0.12459953874349594, "learning_rate": 0.0005, "loss": 2.1145, "step": 148990 }, { "epoch": 0.5671307750279759, "grad_norm": 0.13695721328258514, "learning_rate": 0.0005, "loss": 2.1181, "step": 149000 }, { "epoch": 0.5671688374960986, "grad_norm": 0.12011035531759262, "learning_rate": 0.0005, "loss": 2.0968, "step": 149010 }, { "epoch": 0.5672068999642212, "grad_norm": 0.12467087060213089, "learning_rate": 0.0005, "loss": 2.1139, "step": 149020 }, { "epoch": 0.567244962432344, "grad_norm": 0.1225501075387001, "learning_rate": 0.0005, "loss": 2.1022, "step": 149030 }, { "epoch": 0.5672830249004667, "grad_norm": 0.1462046056985855, "learning_rate": 0.0005, "loss": 2.1083, "step": 149040 }, { "epoch": 0.5673210873685893, "grad_norm": 0.1293679028749466, "learning_rate": 0.0005, "loss": 2.1202, "step": 149050 }, { "epoch": 0.567359149836712, "grad_norm": 0.13014128804206848, "learning_rate": 0.0005, "loss": 2.1199, "step": 149060 }, { "epoch": 0.5673972123048346, "grad_norm": 0.13542695343494415, "learning_rate": 0.0005, "loss": 2.1198, "step": 149070 }, { "epoch": 0.5674352747729574, "grad_norm": 0.12484169751405716, "learning_rate": 0.0005, "loss": 2.1157, "step": 149080 }, { "epoch": 0.5674733372410801, "grad_norm": 0.1358722299337387, "learning_rate": 0.0005, "loss": 2.117, "step": 149090 }, { "epoch": 0.5675113997092027, "grad_norm": 0.12429749220609665, "learning_rate": 0.0005, "loss": 2.1111, "step": 149100 }, { "epoch": 0.5675494621773254, "grad_norm": 0.12064723670482635, "learning_rate": 0.0005, "loss": 2.1068, "step": 149110 }, { "epoch": 0.5675875246454481, "grad_norm": 0.13320963084697723, "learning_rate": 0.0005, "loss": 2.1074, "step": 149120 }, { "epoch": 0.5676255871135708, "grad_norm": 0.12744002044200897, "learning_rate": 0.0005, "loss": 2.1193, "step": 149130 }, { "epoch": 0.5676636495816935, "grad_norm": 0.12755407392978668, "learning_rate": 0.0005, "loss": 2.1081, "step": 149140 }, { "epoch": 0.5677017120498161, "grad_norm": 0.13250961899757385, "learning_rate": 0.0005, "loss": 2.1238, "step": 149150 }, { "epoch": 0.5677397745179389, "grad_norm": 0.1300535649061203, "learning_rate": 0.0005, "loss": 2.1137, "step": 149160 }, { "epoch": 0.5677778369860615, "grad_norm": 0.11809708178043365, "learning_rate": 0.0005, "loss": 2.1177, "step": 149170 }, { "epoch": 0.5678158994541842, "grad_norm": 0.11867474019527435, "learning_rate": 0.0005, "loss": 2.1068, "step": 149180 }, { "epoch": 0.5678539619223069, "grad_norm": 0.13715608417987823, "learning_rate": 0.0005, "loss": 2.1132, "step": 149190 }, { "epoch": 0.5678920243904296, "grad_norm": 0.12556719779968262, "learning_rate": 0.0005, "loss": 2.1153, "step": 149200 }, { "epoch": 0.5679300868585523, "grad_norm": 0.12606534361839294, "learning_rate": 0.0005, "loss": 2.116, "step": 149210 }, { "epoch": 0.5679681493266749, "grad_norm": 0.13479286432266235, "learning_rate": 0.0005, "loss": 2.116, "step": 149220 }, { "epoch": 0.5680062117947976, "grad_norm": 0.12721773982048035, "learning_rate": 0.0005, "loss": 2.1183, "step": 149230 }, { "epoch": 0.5680442742629203, "grad_norm": 0.12476199865341187, "learning_rate": 0.0005, "loss": 2.1213, "step": 149240 }, { "epoch": 0.568082336731043, "grad_norm": 0.13572070002555847, "learning_rate": 0.0005, "loss": 2.1132, "step": 149250 }, { "epoch": 0.5681203991991657, "grad_norm": 0.11942504346370697, "learning_rate": 0.0005, "loss": 2.1142, "step": 149260 }, { "epoch": 0.5681584616672883, "grad_norm": 0.1328095942735672, "learning_rate": 0.0005, "loss": 2.1193, "step": 149270 }, { "epoch": 0.568196524135411, "grad_norm": 0.12975753843784332, "learning_rate": 0.0005, "loss": 2.1084, "step": 149280 }, { "epoch": 0.5682345866035338, "grad_norm": 0.12333561480045319, "learning_rate": 0.0005, "loss": 2.1069, "step": 149290 }, { "epoch": 0.5682726490716564, "grad_norm": 0.1283501535654068, "learning_rate": 0.0005, "loss": 2.1188, "step": 149300 }, { "epoch": 0.5683107115397791, "grad_norm": 0.11721879988908768, "learning_rate": 0.0005, "loss": 2.1133, "step": 149310 }, { "epoch": 0.5683487740079017, "grad_norm": 0.11698118597269058, "learning_rate": 0.0005, "loss": 2.1249, "step": 149320 }, { "epoch": 0.5683868364760245, "grad_norm": 0.11460036784410477, "learning_rate": 0.0005, "loss": 2.1097, "step": 149330 }, { "epoch": 0.5684248989441472, "grad_norm": 0.12026268243789673, "learning_rate": 0.0005, "loss": 2.1012, "step": 149340 }, { "epoch": 0.5684629614122698, "grad_norm": 0.12232708930969238, "learning_rate": 0.0005, "loss": 2.105, "step": 149350 }, { "epoch": 0.5685010238803925, "grad_norm": 0.12763771414756775, "learning_rate": 0.0005, "loss": 2.1059, "step": 149360 }, { "epoch": 0.5685390863485151, "grad_norm": 0.12021525949239731, "learning_rate": 0.0005, "loss": 2.1171, "step": 149370 }, { "epoch": 0.5685771488166379, "grad_norm": 0.11759550124406815, "learning_rate": 0.0005, "loss": 2.1115, "step": 149380 }, { "epoch": 0.5686152112847606, "grad_norm": 0.12076186388731003, "learning_rate": 0.0005, "loss": 2.1141, "step": 149390 }, { "epoch": 0.5686532737528832, "grad_norm": 0.12155576795339584, "learning_rate": 0.0005, "loss": 2.0969, "step": 149400 }, { "epoch": 0.5686913362210059, "grad_norm": 0.11553096771240234, "learning_rate": 0.0005, "loss": 2.1106, "step": 149410 }, { "epoch": 0.5687293986891286, "grad_norm": 0.11827583611011505, "learning_rate": 0.0005, "loss": 2.1118, "step": 149420 }, { "epoch": 0.5687674611572513, "grad_norm": 0.12528096139431, "learning_rate": 0.0005, "loss": 2.1303, "step": 149430 }, { "epoch": 0.568805523625374, "grad_norm": 0.12339337915182114, "learning_rate": 0.0005, "loss": 2.1175, "step": 149440 }, { "epoch": 0.5688435860934966, "grad_norm": 0.12378998845815659, "learning_rate": 0.0005, "loss": 2.1062, "step": 149450 }, { "epoch": 0.5688816485616194, "grad_norm": 0.14907345175743103, "learning_rate": 0.0005, "loss": 2.1053, "step": 149460 }, { "epoch": 0.568919711029742, "grad_norm": 0.11773096024990082, "learning_rate": 0.0005, "loss": 2.1067, "step": 149470 }, { "epoch": 0.5689577734978647, "grad_norm": 0.12182561308145523, "learning_rate": 0.0005, "loss": 2.1136, "step": 149480 }, { "epoch": 0.5689958359659874, "grad_norm": 0.11594950407743454, "learning_rate": 0.0005, "loss": 2.1113, "step": 149490 }, { "epoch": 0.56903389843411, "grad_norm": 0.1339421570301056, "learning_rate": 0.0005, "loss": 2.1283, "step": 149500 }, { "epoch": 0.5690719609022328, "grad_norm": 0.1360144019126892, "learning_rate": 0.0005, "loss": 2.1084, "step": 149510 }, { "epoch": 0.5691100233703554, "grad_norm": 0.21229194104671478, "learning_rate": 0.0005, "loss": 2.1137, "step": 149520 }, { "epoch": 0.5691480858384781, "grad_norm": 0.11902911961078644, "learning_rate": 0.0005, "loss": 2.1136, "step": 149530 }, { "epoch": 0.5691861483066007, "grad_norm": 0.1262020617723465, "learning_rate": 0.0005, "loss": 2.1316, "step": 149540 }, { "epoch": 0.5692242107747235, "grad_norm": 0.130680650472641, "learning_rate": 0.0005, "loss": 2.1114, "step": 149550 }, { "epoch": 0.5692622732428462, "grad_norm": 0.1344127506017685, "learning_rate": 0.0005, "loss": 2.1051, "step": 149560 }, { "epoch": 0.5693003357109688, "grad_norm": 0.1387372463941574, "learning_rate": 0.0005, "loss": 2.1034, "step": 149570 }, { "epoch": 0.5693383981790915, "grad_norm": 0.12453319132328033, "learning_rate": 0.0005, "loss": 2.1098, "step": 149580 }, { "epoch": 0.5693764606472143, "grad_norm": 0.11809080839157104, "learning_rate": 0.0005, "loss": 2.1145, "step": 149590 }, { "epoch": 0.5694145231153369, "grad_norm": 0.11634735763072968, "learning_rate": 0.0005, "loss": 2.0968, "step": 149600 }, { "epoch": 0.5694525855834596, "grad_norm": 0.13118545711040497, "learning_rate": 0.0005, "loss": 2.1052, "step": 149610 }, { "epoch": 0.5694906480515822, "grad_norm": 0.13438338041305542, "learning_rate": 0.0005, "loss": 2.1361, "step": 149620 }, { "epoch": 0.569528710519705, "grad_norm": 0.12148258090019226, "learning_rate": 0.0005, "loss": 2.1128, "step": 149630 }, { "epoch": 0.5695667729878277, "grad_norm": 0.1469801664352417, "learning_rate": 0.0005, "loss": 2.1096, "step": 149640 }, { "epoch": 0.5696048354559503, "grad_norm": 0.11927749961614609, "learning_rate": 0.0005, "loss": 2.1151, "step": 149650 }, { "epoch": 0.569642897924073, "grad_norm": 0.11742527037858963, "learning_rate": 0.0005, "loss": 2.1045, "step": 149660 }, { "epoch": 0.5696809603921956, "grad_norm": 0.1266251653432846, "learning_rate": 0.0005, "loss": 2.1072, "step": 149670 }, { "epoch": 0.5697190228603184, "grad_norm": 0.13379418849945068, "learning_rate": 0.0005, "loss": 2.1024, "step": 149680 }, { "epoch": 0.569757085328441, "grad_norm": 0.120334193110466, "learning_rate": 0.0005, "loss": 2.114, "step": 149690 }, { "epoch": 0.5697951477965637, "grad_norm": 0.1235523521900177, "learning_rate": 0.0005, "loss": 2.0905, "step": 149700 }, { "epoch": 0.5698332102646864, "grad_norm": 0.12247676402330399, "learning_rate": 0.0005, "loss": 2.0915, "step": 149710 }, { "epoch": 0.5698712727328091, "grad_norm": 0.12162892520427704, "learning_rate": 0.0005, "loss": 2.1004, "step": 149720 }, { "epoch": 0.5699093352009318, "grad_norm": 0.12016452848911285, "learning_rate": 0.0005, "loss": 2.1113, "step": 149730 }, { "epoch": 0.5699473976690544, "grad_norm": 0.1280735582113266, "learning_rate": 0.0005, "loss": 2.1278, "step": 149740 }, { "epoch": 0.5699854601371771, "grad_norm": 0.1145968809723854, "learning_rate": 0.0005, "loss": 2.1026, "step": 149750 }, { "epoch": 0.5700235226052999, "grad_norm": 0.12235980480909348, "learning_rate": 0.0005, "loss": 2.1146, "step": 149760 }, { "epoch": 0.5700615850734225, "grad_norm": 0.13232506811618805, "learning_rate": 0.0005, "loss": 2.1001, "step": 149770 }, { "epoch": 0.5700996475415452, "grad_norm": 0.1253451555967331, "learning_rate": 0.0005, "loss": 2.124, "step": 149780 }, { "epoch": 0.5701377100096678, "grad_norm": 0.12646843492984772, "learning_rate": 0.0005, "loss": 2.1181, "step": 149790 }, { "epoch": 0.5701757724777905, "grad_norm": 0.13931933045387268, "learning_rate": 0.0005, "loss": 2.1222, "step": 149800 }, { "epoch": 0.5702138349459133, "grad_norm": 0.11552035808563232, "learning_rate": 0.0005, "loss": 2.1071, "step": 149810 }, { "epoch": 0.5702518974140359, "grad_norm": 0.12433992326259613, "learning_rate": 0.0005, "loss": 2.1341, "step": 149820 }, { "epoch": 0.5702899598821586, "grad_norm": 0.13327881693840027, "learning_rate": 0.0005, "loss": 2.1236, "step": 149830 }, { "epoch": 0.5703280223502812, "grad_norm": 0.13314664363861084, "learning_rate": 0.0005, "loss": 2.1049, "step": 149840 }, { "epoch": 0.570366084818404, "grad_norm": 0.906789243221283, "learning_rate": 0.0005, "loss": 2.1101, "step": 149850 }, { "epoch": 0.5704041472865267, "grad_norm": 0.11846122145652771, "learning_rate": 0.0005, "loss": 2.1043, "step": 149860 }, { "epoch": 0.5704422097546493, "grad_norm": 0.12619948387145996, "learning_rate": 0.0005, "loss": 2.1176, "step": 149870 }, { "epoch": 0.570480272222772, "grad_norm": 0.11806615442037582, "learning_rate": 0.0005, "loss": 2.1205, "step": 149880 }, { "epoch": 0.5705183346908947, "grad_norm": 0.1296517252922058, "learning_rate": 0.0005, "loss": 2.1078, "step": 149890 }, { "epoch": 0.5705563971590174, "grad_norm": 0.12680789828300476, "learning_rate": 0.0005, "loss": 2.1108, "step": 149900 }, { "epoch": 0.5705944596271401, "grad_norm": 0.12104959785938263, "learning_rate": 0.0005, "loss": 2.1083, "step": 149910 }, { "epoch": 0.5706325220952627, "grad_norm": 0.12184807658195496, "learning_rate": 0.0005, "loss": 2.1067, "step": 149920 }, { "epoch": 0.5706705845633854, "grad_norm": 0.12121890485286713, "learning_rate": 0.0005, "loss": 2.1009, "step": 149930 }, { "epoch": 0.5707086470315081, "grad_norm": 0.13234730064868927, "learning_rate": 0.0005, "loss": 2.1079, "step": 149940 }, { "epoch": 0.5707467094996308, "grad_norm": 0.13042353093624115, "learning_rate": 0.0005, "loss": 2.1125, "step": 149950 }, { "epoch": 0.5707847719677535, "grad_norm": 0.12504348158836365, "learning_rate": 0.0005, "loss": 2.1207, "step": 149960 }, { "epoch": 0.5708228344358761, "grad_norm": 0.1169368252158165, "learning_rate": 0.0005, "loss": 2.1316, "step": 149970 }, { "epoch": 0.5708608969039989, "grad_norm": 0.13686639070510864, "learning_rate": 0.0005, "loss": 2.1016, "step": 149980 }, { "epoch": 0.5708989593721215, "grad_norm": 0.12890039384365082, "learning_rate": 0.0005, "loss": 2.108, "step": 149990 }, { "epoch": 0.5709370218402442, "grad_norm": 0.13867227733135223, "learning_rate": 0.0005, "loss": 2.1117, "step": 150000 }, { "epoch": 0.5709750843083669, "grad_norm": 0.1201515644788742, "learning_rate": 0.0005, "loss": 2.1221, "step": 150010 }, { "epoch": 0.5710131467764896, "grad_norm": 0.13026781380176544, "learning_rate": 0.0005, "loss": 2.117, "step": 150020 }, { "epoch": 0.5710512092446123, "grad_norm": 0.1175355464220047, "learning_rate": 0.0005, "loss": 2.1303, "step": 150030 }, { "epoch": 0.5710892717127349, "grad_norm": 0.11978862434625626, "learning_rate": 0.0005, "loss": 2.1079, "step": 150040 }, { "epoch": 0.5711273341808576, "grad_norm": 0.12562698125839233, "learning_rate": 0.0005, "loss": 2.106, "step": 150050 }, { "epoch": 0.5711653966489804, "grad_norm": 0.14393503963947296, "learning_rate": 0.0005, "loss": 2.124, "step": 150060 }, { "epoch": 0.571203459117103, "grad_norm": 0.11995064467191696, "learning_rate": 0.0005, "loss": 2.0987, "step": 150070 }, { "epoch": 0.5712415215852257, "grad_norm": 0.15165968239307404, "learning_rate": 0.0005, "loss": 2.1092, "step": 150080 }, { "epoch": 0.5712795840533483, "grad_norm": 0.1265028566122055, "learning_rate": 0.0005, "loss": 2.114, "step": 150090 }, { "epoch": 0.571317646521471, "grad_norm": 0.13700735569000244, "learning_rate": 0.0005, "loss": 2.1153, "step": 150100 }, { "epoch": 0.5713557089895938, "grad_norm": 0.11874990165233612, "learning_rate": 0.0005, "loss": 2.1165, "step": 150110 }, { "epoch": 0.5713937714577164, "grad_norm": 0.1361815482378006, "learning_rate": 0.0005, "loss": 2.1199, "step": 150120 }, { "epoch": 0.5714318339258391, "grad_norm": 0.1455676257610321, "learning_rate": 0.0005, "loss": 2.1049, "step": 150130 }, { "epoch": 0.5714698963939617, "grad_norm": 0.1240849643945694, "learning_rate": 0.0005, "loss": 2.1116, "step": 150140 }, { "epoch": 0.5715079588620845, "grad_norm": 0.1253289133310318, "learning_rate": 0.0005, "loss": 2.0927, "step": 150150 }, { "epoch": 0.5715460213302072, "grad_norm": 0.11646021902561188, "learning_rate": 0.0005, "loss": 2.1064, "step": 150160 }, { "epoch": 0.5715840837983298, "grad_norm": 0.12506163120269775, "learning_rate": 0.0005, "loss": 2.1121, "step": 150170 }, { "epoch": 0.5716221462664525, "grad_norm": 0.12780094146728516, "learning_rate": 0.0005, "loss": 2.1125, "step": 150180 }, { "epoch": 0.5716602087345752, "grad_norm": 0.13137106597423553, "learning_rate": 0.0005, "loss": 2.1089, "step": 150190 }, { "epoch": 0.5716982712026979, "grad_norm": 0.1429845094680786, "learning_rate": 0.0005, "loss": 2.1045, "step": 150200 }, { "epoch": 0.5717363336708206, "grad_norm": 0.1283874809741974, "learning_rate": 0.0005, "loss": 2.1172, "step": 150210 }, { "epoch": 0.5717743961389432, "grad_norm": 0.13848842680454254, "learning_rate": 0.0005, "loss": 2.1078, "step": 150220 }, { "epoch": 0.5718124586070659, "grad_norm": 0.1264006644487381, "learning_rate": 0.0005, "loss": 2.0935, "step": 150230 }, { "epoch": 0.5718505210751886, "grad_norm": 0.12379968911409378, "learning_rate": 0.0005, "loss": 2.1204, "step": 150240 }, { "epoch": 0.5718885835433113, "grad_norm": 0.11110774427652359, "learning_rate": 0.0005, "loss": 2.1089, "step": 150250 }, { "epoch": 0.571926646011434, "grad_norm": 0.12956595420837402, "learning_rate": 0.0005, "loss": 2.0975, "step": 150260 }, { "epoch": 0.5719647084795566, "grad_norm": 0.1179521456360817, "learning_rate": 0.0005, "loss": 2.1082, "step": 150270 }, { "epoch": 0.5720027709476794, "grad_norm": 0.13738635182380676, "learning_rate": 0.0005, "loss": 2.1126, "step": 150280 }, { "epoch": 0.572040833415802, "grad_norm": 0.13342367112636566, "learning_rate": 0.0005, "loss": 2.0974, "step": 150290 }, { "epoch": 0.5720788958839247, "grad_norm": 0.12412730604410172, "learning_rate": 0.0005, "loss": 2.1177, "step": 150300 }, { "epoch": 0.5721169583520473, "grad_norm": 0.1309875100851059, "learning_rate": 0.0005, "loss": 2.1203, "step": 150310 }, { "epoch": 0.5721550208201701, "grad_norm": 0.1166936457157135, "learning_rate": 0.0005, "loss": 2.1044, "step": 150320 }, { "epoch": 0.5721930832882928, "grad_norm": 0.12807297706604004, "learning_rate": 0.0005, "loss": 2.1122, "step": 150330 }, { "epoch": 0.5722311457564154, "grad_norm": 0.12080655992031097, "learning_rate": 0.0005, "loss": 2.115, "step": 150340 }, { "epoch": 0.5722692082245381, "grad_norm": 0.12636420130729675, "learning_rate": 0.0005, "loss": 2.101, "step": 150350 }, { "epoch": 0.5723072706926607, "grad_norm": 0.12918971478939056, "learning_rate": 0.0005, "loss": 2.1141, "step": 150360 }, { "epoch": 0.5723453331607835, "grad_norm": 0.1290965974330902, "learning_rate": 0.0005, "loss": 2.1107, "step": 150370 }, { "epoch": 0.5723833956289062, "grad_norm": 0.12524893879890442, "learning_rate": 0.0005, "loss": 2.104, "step": 150380 }, { "epoch": 0.5724214580970288, "grad_norm": 0.12515844404697418, "learning_rate": 0.0005, "loss": 2.1064, "step": 150390 }, { "epoch": 0.5724595205651515, "grad_norm": 0.1358836144208908, "learning_rate": 0.0005, "loss": 2.1299, "step": 150400 }, { "epoch": 0.5724975830332742, "grad_norm": 0.13036629557609558, "learning_rate": 0.0005, "loss": 2.1276, "step": 150410 }, { "epoch": 0.5725356455013969, "grad_norm": 0.12266593426465988, "learning_rate": 0.0005, "loss": 2.1236, "step": 150420 }, { "epoch": 0.5725737079695196, "grad_norm": 0.12452898919582367, "learning_rate": 0.0005, "loss": 2.1226, "step": 150430 }, { "epoch": 0.5726117704376422, "grad_norm": 0.152368426322937, "learning_rate": 0.0005, "loss": 2.1178, "step": 150440 }, { "epoch": 0.572649832905765, "grad_norm": 0.13339364528656006, "learning_rate": 0.0005, "loss": 2.1103, "step": 150450 }, { "epoch": 0.5726878953738876, "grad_norm": 0.12512865662574768, "learning_rate": 0.0005, "loss": 2.1188, "step": 150460 }, { "epoch": 0.5727259578420103, "grad_norm": 0.12616005539894104, "learning_rate": 0.0005, "loss": 2.1172, "step": 150470 }, { "epoch": 0.572764020310133, "grad_norm": 0.12529578804969788, "learning_rate": 0.0005, "loss": 2.1161, "step": 150480 }, { "epoch": 0.5728020827782557, "grad_norm": 0.11737383157014847, "learning_rate": 0.0005, "loss": 2.1162, "step": 150490 }, { "epoch": 0.5728401452463784, "grad_norm": 0.1222836822271347, "learning_rate": 0.0005, "loss": 2.1318, "step": 150500 }, { "epoch": 0.572878207714501, "grad_norm": 0.1139824390411377, "learning_rate": 0.0005, "loss": 2.1196, "step": 150510 }, { "epoch": 0.5729162701826237, "grad_norm": 0.11608431488275528, "learning_rate": 0.0005, "loss": 2.1204, "step": 150520 }, { "epoch": 0.5729543326507464, "grad_norm": 0.136452317237854, "learning_rate": 0.0005, "loss": 2.1015, "step": 150530 }, { "epoch": 0.5729923951188691, "grad_norm": 0.12363389134407043, "learning_rate": 0.0005, "loss": 2.1153, "step": 150540 }, { "epoch": 0.5730304575869918, "grad_norm": 0.13430823385715485, "learning_rate": 0.0005, "loss": 2.1241, "step": 150550 }, { "epoch": 0.5730685200551144, "grad_norm": 0.12809452414512634, "learning_rate": 0.0005, "loss": 2.1052, "step": 150560 }, { "epoch": 0.5731065825232371, "grad_norm": 0.1187937781214714, "learning_rate": 0.0005, "loss": 2.1172, "step": 150570 }, { "epoch": 0.5731446449913599, "grad_norm": 0.12435567378997803, "learning_rate": 0.0005, "loss": 2.1122, "step": 150580 }, { "epoch": 0.5731827074594825, "grad_norm": 0.13549238443374634, "learning_rate": 0.0005, "loss": 2.1151, "step": 150590 }, { "epoch": 0.5732207699276052, "grad_norm": 0.13395282626152039, "learning_rate": 0.0005, "loss": 2.1089, "step": 150600 }, { "epoch": 0.5732588323957278, "grad_norm": 0.1308007836341858, "learning_rate": 0.0005, "loss": 2.0858, "step": 150610 }, { "epoch": 0.5732968948638506, "grad_norm": 0.13134954869747162, "learning_rate": 0.0005, "loss": 2.1118, "step": 150620 }, { "epoch": 0.5733349573319733, "grad_norm": 0.11117885261774063, "learning_rate": 0.0005, "loss": 2.1168, "step": 150630 }, { "epoch": 0.5733730198000959, "grad_norm": 0.13431069254875183, "learning_rate": 0.0005, "loss": 2.1122, "step": 150640 }, { "epoch": 0.5734110822682186, "grad_norm": 0.1294468343257904, "learning_rate": 0.0005, "loss": 2.114, "step": 150650 }, { "epoch": 0.5734491447363412, "grad_norm": 0.107212133705616, "learning_rate": 0.0005, "loss": 2.1082, "step": 150660 }, { "epoch": 0.573487207204464, "grad_norm": 0.1195271760225296, "learning_rate": 0.0005, "loss": 2.1074, "step": 150670 }, { "epoch": 0.5735252696725867, "grad_norm": 0.12692059576511383, "learning_rate": 0.0005, "loss": 2.1214, "step": 150680 }, { "epoch": 0.5735633321407093, "grad_norm": 0.1342695653438568, "learning_rate": 0.0005, "loss": 2.1081, "step": 150690 }, { "epoch": 0.573601394608832, "grad_norm": 0.12712764739990234, "learning_rate": 0.0005, "loss": 2.1084, "step": 150700 }, { "epoch": 0.5736394570769547, "grad_norm": 0.12305767834186554, "learning_rate": 0.0005, "loss": 2.1059, "step": 150710 }, { "epoch": 0.5736775195450774, "grad_norm": 0.11675681918859482, "learning_rate": 0.0005, "loss": 2.0955, "step": 150720 }, { "epoch": 0.5737155820132, "grad_norm": 0.12459848076105118, "learning_rate": 0.0005, "loss": 2.1125, "step": 150730 }, { "epoch": 0.5737536444813227, "grad_norm": 0.12979640066623688, "learning_rate": 0.0005, "loss": 2.1299, "step": 150740 }, { "epoch": 0.5737917069494455, "grad_norm": 0.11147118359804153, "learning_rate": 0.0005, "loss": 2.1212, "step": 150750 }, { "epoch": 0.5738297694175681, "grad_norm": 0.1222359836101532, "learning_rate": 0.0005, "loss": 2.1125, "step": 150760 }, { "epoch": 0.5738678318856908, "grad_norm": 0.11825738102197647, "learning_rate": 0.0005, "loss": 2.1195, "step": 150770 }, { "epoch": 0.5739058943538135, "grad_norm": 0.1274542659521103, "learning_rate": 0.0005, "loss": 2.1153, "step": 150780 }, { "epoch": 0.5739439568219361, "grad_norm": 0.12503868341445923, "learning_rate": 0.0005, "loss": 2.1009, "step": 150790 }, { "epoch": 0.5739820192900589, "grad_norm": 0.12383761256933212, "learning_rate": 0.0005, "loss": 2.1043, "step": 150800 }, { "epoch": 0.5740200817581815, "grad_norm": 0.17884153127670288, "learning_rate": 0.0005, "loss": 2.1208, "step": 150810 }, { "epoch": 0.5740581442263042, "grad_norm": 0.13055235147476196, "learning_rate": 0.0005, "loss": 2.1274, "step": 150820 }, { "epoch": 0.5740962066944268, "grad_norm": 0.11566507816314697, "learning_rate": 0.0005, "loss": 2.1114, "step": 150830 }, { "epoch": 0.5741342691625496, "grad_norm": 0.11708749830722809, "learning_rate": 0.0005, "loss": 2.1187, "step": 150840 }, { "epoch": 0.5741723316306723, "grad_norm": 0.1316896378993988, "learning_rate": 0.0005, "loss": 2.1017, "step": 150850 }, { "epoch": 0.5742103940987949, "grad_norm": 0.12467554956674576, "learning_rate": 0.0005, "loss": 2.1085, "step": 150860 }, { "epoch": 0.5742484565669176, "grad_norm": 0.13408036530017853, "learning_rate": 0.0005, "loss": 2.1145, "step": 150870 }, { "epoch": 0.5742865190350404, "grad_norm": 0.123175248503685, "learning_rate": 0.0005, "loss": 2.1153, "step": 150880 }, { "epoch": 0.574324581503163, "grad_norm": 0.14702090620994568, "learning_rate": 0.0005, "loss": 2.1166, "step": 150890 }, { "epoch": 0.5743626439712857, "grad_norm": 0.13698521256446838, "learning_rate": 0.0005, "loss": 2.1204, "step": 150900 }, { "epoch": 0.5744007064394083, "grad_norm": 0.12514759600162506, "learning_rate": 0.0005, "loss": 2.1131, "step": 150910 }, { "epoch": 0.5744387689075311, "grad_norm": 0.1253451555967331, "learning_rate": 0.0005, "loss": 2.1115, "step": 150920 }, { "epoch": 0.5744768313756538, "grad_norm": 0.1406048834323883, "learning_rate": 0.0005, "loss": 2.1177, "step": 150930 }, { "epoch": 0.5745148938437764, "grad_norm": 0.11642050743103027, "learning_rate": 0.0005, "loss": 2.1018, "step": 150940 }, { "epoch": 0.5745529563118991, "grad_norm": 0.1270553022623062, "learning_rate": 0.0005, "loss": 2.1167, "step": 150950 }, { "epoch": 0.5745910187800217, "grad_norm": 0.11792125552892685, "learning_rate": 0.0005, "loss": 2.098, "step": 150960 }, { "epoch": 0.5746290812481445, "grad_norm": 0.10952485352754593, "learning_rate": 0.0005, "loss": 2.1073, "step": 150970 }, { "epoch": 0.5746671437162671, "grad_norm": 0.12044594436883926, "learning_rate": 0.0005, "loss": 2.1087, "step": 150980 }, { "epoch": 0.5747052061843898, "grad_norm": 0.13403725624084473, "learning_rate": 0.0005, "loss": 2.1066, "step": 150990 }, { "epoch": 0.5747432686525125, "grad_norm": 0.1265270859003067, "learning_rate": 0.0005, "loss": 2.1194, "step": 151000 }, { "epoch": 0.5747813311206352, "grad_norm": 0.12403400987386703, "learning_rate": 0.0005, "loss": 2.1275, "step": 151010 }, { "epoch": 0.5748193935887579, "grad_norm": 0.14083850383758545, "learning_rate": 0.0005, "loss": 2.1112, "step": 151020 }, { "epoch": 0.5748574560568805, "grad_norm": 0.12334656715393066, "learning_rate": 0.0005, "loss": 2.107, "step": 151030 }, { "epoch": 0.5748955185250032, "grad_norm": 0.1339842528104782, "learning_rate": 0.0005, "loss": 2.1109, "step": 151040 }, { "epoch": 0.574933580993126, "grad_norm": 0.12079505622386932, "learning_rate": 0.0005, "loss": 2.1207, "step": 151050 }, { "epoch": 0.5749716434612486, "grad_norm": 0.11383721977472305, "learning_rate": 0.0005, "loss": 2.1126, "step": 151060 }, { "epoch": 0.5750097059293713, "grad_norm": 0.142286479473114, "learning_rate": 0.0005, "loss": 2.109, "step": 151070 }, { "epoch": 0.5750477683974939, "grad_norm": 0.12102207541465759, "learning_rate": 0.0005, "loss": 2.1101, "step": 151080 }, { "epoch": 0.5750858308656166, "grad_norm": 0.12138934433460236, "learning_rate": 0.0005, "loss": 2.1036, "step": 151090 }, { "epoch": 0.5751238933337394, "grad_norm": 0.11563186347484589, "learning_rate": 0.0005, "loss": 2.1143, "step": 151100 }, { "epoch": 0.575161955801862, "grad_norm": 0.11683789640665054, "learning_rate": 0.0005, "loss": 2.1271, "step": 151110 }, { "epoch": 0.5752000182699847, "grad_norm": 0.1332668960094452, "learning_rate": 0.0005, "loss": 2.1179, "step": 151120 }, { "epoch": 0.5752380807381073, "grad_norm": 0.134083092212677, "learning_rate": 0.0005, "loss": 2.116, "step": 151130 }, { "epoch": 0.5752761432062301, "grad_norm": 0.12192686647176743, "learning_rate": 0.0005, "loss": 2.1071, "step": 151140 }, { "epoch": 0.5753142056743528, "grad_norm": 0.11395518481731415, "learning_rate": 0.0005, "loss": 2.1106, "step": 151150 }, { "epoch": 0.5753522681424754, "grad_norm": 0.126944437623024, "learning_rate": 0.0005, "loss": 2.1151, "step": 151160 }, { "epoch": 0.5753903306105981, "grad_norm": 0.132183238863945, "learning_rate": 0.0005, "loss": 2.0985, "step": 151170 }, { "epoch": 0.5754283930787208, "grad_norm": 0.12324900925159454, "learning_rate": 0.0005, "loss": 2.1128, "step": 151180 }, { "epoch": 0.5754664555468435, "grad_norm": 0.1156749501824379, "learning_rate": 0.0005, "loss": 2.1083, "step": 151190 }, { "epoch": 0.5755045180149662, "grad_norm": 0.12462375313043594, "learning_rate": 0.0005, "loss": 2.1104, "step": 151200 }, { "epoch": 0.5755425804830888, "grad_norm": 0.12586110830307007, "learning_rate": 0.0005, "loss": 2.105, "step": 151210 }, { "epoch": 0.5755806429512115, "grad_norm": 0.13233725726604462, "learning_rate": 0.0005, "loss": 2.133, "step": 151220 }, { "epoch": 0.5756187054193342, "grad_norm": 0.14042454957962036, "learning_rate": 0.0005, "loss": 2.1284, "step": 151230 }, { "epoch": 0.5756567678874569, "grad_norm": 0.11982891708612442, "learning_rate": 0.0005, "loss": 2.0985, "step": 151240 }, { "epoch": 0.5756948303555796, "grad_norm": 0.12568190693855286, "learning_rate": 0.0005, "loss": 2.1066, "step": 151250 }, { "epoch": 0.5757328928237022, "grad_norm": 0.11588189005851746, "learning_rate": 0.0005, "loss": 2.1107, "step": 151260 }, { "epoch": 0.575770955291825, "grad_norm": 0.1222311481833458, "learning_rate": 0.0005, "loss": 2.1098, "step": 151270 }, { "epoch": 0.5758090177599476, "grad_norm": 0.12930968403816223, "learning_rate": 0.0005, "loss": 2.1265, "step": 151280 }, { "epoch": 0.5758470802280703, "grad_norm": 0.12175658345222473, "learning_rate": 0.0005, "loss": 2.1104, "step": 151290 }, { "epoch": 0.575885142696193, "grad_norm": 0.11445169150829315, "learning_rate": 0.0005, "loss": 2.1139, "step": 151300 }, { "epoch": 0.5759232051643157, "grad_norm": 0.12847721576690674, "learning_rate": 0.0005, "loss": 2.0985, "step": 151310 }, { "epoch": 0.5759612676324384, "grad_norm": 0.12533120810985565, "learning_rate": 0.0005, "loss": 2.1193, "step": 151320 }, { "epoch": 0.575999330100561, "grad_norm": 0.12310317158699036, "learning_rate": 0.0005, "loss": 2.1069, "step": 151330 }, { "epoch": 0.5760373925686837, "grad_norm": 0.12116267532110214, "learning_rate": 0.0005, "loss": 2.1144, "step": 151340 }, { "epoch": 0.5760754550368065, "grad_norm": 0.12053951621055603, "learning_rate": 0.0005, "loss": 2.1014, "step": 151350 }, { "epoch": 0.5761135175049291, "grad_norm": 0.11923123896121979, "learning_rate": 0.0005, "loss": 2.1155, "step": 151360 }, { "epoch": 0.5761515799730518, "grad_norm": 0.13338451087474823, "learning_rate": 0.0005, "loss": 2.1001, "step": 151370 }, { "epoch": 0.5761896424411744, "grad_norm": 0.12539881467819214, "learning_rate": 0.0005, "loss": 2.0893, "step": 151380 }, { "epoch": 0.5762277049092971, "grad_norm": 0.15315894782543182, "learning_rate": 0.0005, "loss": 2.1144, "step": 151390 }, { "epoch": 0.5762657673774199, "grad_norm": 0.1272190511226654, "learning_rate": 0.0005, "loss": 2.1187, "step": 151400 }, { "epoch": 0.5763038298455425, "grad_norm": 0.12473262846469879, "learning_rate": 0.0005, "loss": 2.1152, "step": 151410 }, { "epoch": 0.5763418923136652, "grad_norm": 0.11976677924394608, "learning_rate": 0.0005, "loss": 2.1237, "step": 151420 }, { "epoch": 0.5763799547817878, "grad_norm": 0.12231112271547318, "learning_rate": 0.0005, "loss": 2.1274, "step": 151430 }, { "epoch": 0.5764180172499106, "grad_norm": 0.12565696239471436, "learning_rate": 0.0005, "loss": 2.1196, "step": 151440 }, { "epoch": 0.5764560797180333, "grad_norm": 0.11815255135297775, "learning_rate": 0.0005, "loss": 2.101, "step": 151450 }, { "epoch": 0.5764941421861559, "grad_norm": 0.14175409078598022, "learning_rate": 0.0005, "loss": 2.1169, "step": 151460 }, { "epoch": 0.5765322046542786, "grad_norm": 0.11824507266283035, "learning_rate": 0.0005, "loss": 2.1096, "step": 151470 }, { "epoch": 0.5765702671224013, "grad_norm": 0.11940937489271164, "learning_rate": 0.0005, "loss": 2.1061, "step": 151480 }, { "epoch": 0.576608329590524, "grad_norm": 0.1218341588973999, "learning_rate": 0.0005, "loss": 2.1281, "step": 151490 }, { "epoch": 0.5766463920586467, "grad_norm": 0.12804977595806122, "learning_rate": 0.0005, "loss": 2.1075, "step": 151500 }, { "epoch": 0.5766844545267693, "grad_norm": 0.1206967905163765, "learning_rate": 0.0005, "loss": 2.1165, "step": 151510 }, { "epoch": 0.576722516994892, "grad_norm": 0.13532279431819916, "learning_rate": 0.0005, "loss": 2.1059, "step": 151520 }, { "epoch": 0.5767605794630147, "grad_norm": 0.12815798819065094, "learning_rate": 0.0005, "loss": 2.1216, "step": 151530 }, { "epoch": 0.5767986419311374, "grad_norm": 0.12100932747125626, "learning_rate": 0.0005, "loss": 2.1076, "step": 151540 }, { "epoch": 0.57683670439926, "grad_norm": 0.14143440127372742, "learning_rate": 0.0005, "loss": 2.1184, "step": 151550 }, { "epoch": 0.5768747668673827, "grad_norm": 0.139065220952034, "learning_rate": 0.0005, "loss": 2.1189, "step": 151560 }, { "epoch": 0.5769128293355055, "grad_norm": 0.1327260434627533, "learning_rate": 0.0005, "loss": 2.1048, "step": 151570 }, { "epoch": 0.5769508918036281, "grad_norm": 0.1227652058005333, "learning_rate": 0.0005, "loss": 2.1079, "step": 151580 }, { "epoch": 0.5769889542717508, "grad_norm": 0.1407083421945572, "learning_rate": 0.0005, "loss": 2.1178, "step": 151590 }, { "epoch": 0.5770270167398734, "grad_norm": 0.11829644441604614, "learning_rate": 0.0005, "loss": 2.1133, "step": 151600 }, { "epoch": 0.5770650792079962, "grad_norm": 0.12382587790489197, "learning_rate": 0.0005, "loss": 2.1182, "step": 151610 }, { "epoch": 0.5771031416761189, "grad_norm": 0.13066115975379944, "learning_rate": 0.0005, "loss": 2.1063, "step": 151620 }, { "epoch": 0.5771412041442415, "grad_norm": 0.12449169158935547, "learning_rate": 0.0005, "loss": 2.1029, "step": 151630 }, { "epoch": 0.5771792666123642, "grad_norm": 0.12625348567962646, "learning_rate": 0.0005, "loss": 2.1174, "step": 151640 }, { "epoch": 0.5772173290804868, "grad_norm": 0.11762882024049759, "learning_rate": 0.0005, "loss": 2.1153, "step": 151650 }, { "epoch": 0.5772553915486096, "grad_norm": 0.13215084373950958, "learning_rate": 0.0005, "loss": 2.127, "step": 151660 }, { "epoch": 0.5772934540167323, "grad_norm": 0.11425194144248962, "learning_rate": 0.0005, "loss": 2.1195, "step": 151670 }, { "epoch": 0.5773315164848549, "grad_norm": 0.1309116780757904, "learning_rate": 0.0005, "loss": 2.1137, "step": 151680 }, { "epoch": 0.5773695789529776, "grad_norm": 0.12787970900535583, "learning_rate": 0.0005, "loss": 2.1331, "step": 151690 }, { "epoch": 0.5774076414211003, "grad_norm": 0.1286764293909073, "learning_rate": 0.0005, "loss": 2.1178, "step": 151700 }, { "epoch": 0.577445703889223, "grad_norm": 0.13170640170574188, "learning_rate": 0.0005, "loss": 2.1253, "step": 151710 }, { "epoch": 0.5774837663573457, "grad_norm": 0.13535332679748535, "learning_rate": 0.0005, "loss": 2.1284, "step": 151720 }, { "epoch": 0.5775218288254683, "grad_norm": 0.12930703163146973, "learning_rate": 0.0005, "loss": 2.108, "step": 151730 }, { "epoch": 0.5775598912935911, "grad_norm": 0.13405849039554596, "learning_rate": 0.0005, "loss": 2.1161, "step": 151740 }, { "epoch": 0.5775979537617137, "grad_norm": 0.11642023921012878, "learning_rate": 0.0005, "loss": 2.1248, "step": 151750 }, { "epoch": 0.5776360162298364, "grad_norm": 0.13674704730510712, "learning_rate": 0.0005, "loss": 2.1096, "step": 151760 }, { "epoch": 0.5776740786979591, "grad_norm": 0.12092318385839462, "learning_rate": 0.0005, "loss": 2.1212, "step": 151770 }, { "epoch": 0.5777121411660818, "grad_norm": 0.12388814240694046, "learning_rate": 0.0005, "loss": 2.1189, "step": 151780 }, { "epoch": 0.5777502036342045, "grad_norm": 0.12979772686958313, "learning_rate": 0.0005, "loss": 2.1154, "step": 151790 }, { "epoch": 0.5777882661023271, "grad_norm": 0.1481570303440094, "learning_rate": 0.0005, "loss": 2.1255, "step": 151800 }, { "epoch": 0.5778263285704498, "grad_norm": 0.12397785484790802, "learning_rate": 0.0005, "loss": 2.1033, "step": 151810 }, { "epoch": 0.5778643910385725, "grad_norm": 0.11837328225374222, "learning_rate": 0.0005, "loss": 2.1089, "step": 151820 }, { "epoch": 0.5779024535066952, "grad_norm": 0.13646014034748077, "learning_rate": 0.0005, "loss": 2.1133, "step": 151830 }, { "epoch": 0.5779405159748179, "grad_norm": 0.13520261645317078, "learning_rate": 0.0005, "loss": 2.1043, "step": 151840 }, { "epoch": 0.5779785784429405, "grad_norm": 0.1331622302532196, "learning_rate": 0.0005, "loss": 2.1228, "step": 151850 }, { "epoch": 0.5780166409110632, "grad_norm": 0.13102315366268158, "learning_rate": 0.0005, "loss": 2.1296, "step": 151860 }, { "epoch": 0.578054703379186, "grad_norm": 0.12880952656269073, "learning_rate": 0.0005, "loss": 2.127, "step": 151870 }, { "epoch": 0.5780927658473086, "grad_norm": 0.12405695021152496, "learning_rate": 0.0005, "loss": 2.1084, "step": 151880 }, { "epoch": 0.5781308283154313, "grad_norm": 0.12300620228052139, "learning_rate": 0.0005, "loss": 2.1094, "step": 151890 }, { "epoch": 0.5781688907835539, "grad_norm": 0.13016889989376068, "learning_rate": 0.0005, "loss": 2.1122, "step": 151900 }, { "epoch": 0.5782069532516767, "grad_norm": 0.11752153187990189, "learning_rate": 0.0005, "loss": 2.1075, "step": 151910 }, { "epoch": 0.5782450157197994, "grad_norm": 0.12173010408878326, "learning_rate": 0.0005, "loss": 2.1123, "step": 151920 }, { "epoch": 0.578283078187922, "grad_norm": 0.12354975193738937, "learning_rate": 0.0005, "loss": 2.1098, "step": 151930 }, { "epoch": 0.5783211406560447, "grad_norm": 0.11892983317375183, "learning_rate": 0.0005, "loss": 2.1305, "step": 151940 }, { "epoch": 0.5783592031241673, "grad_norm": 0.11993703246116638, "learning_rate": 0.0005, "loss": 2.1009, "step": 151950 }, { "epoch": 0.5783972655922901, "grad_norm": 0.14302973449230194, "learning_rate": 0.0005, "loss": 2.1145, "step": 151960 }, { "epoch": 0.5784353280604128, "grad_norm": 0.12831313908100128, "learning_rate": 0.0005, "loss": 2.1207, "step": 151970 }, { "epoch": 0.5784733905285354, "grad_norm": 0.13994410634040833, "learning_rate": 0.0005, "loss": 2.1251, "step": 151980 }, { "epoch": 0.5785114529966581, "grad_norm": 0.13094666600227356, "learning_rate": 0.0005, "loss": 2.1239, "step": 151990 }, { "epoch": 0.5785495154647808, "grad_norm": 0.13194623589515686, "learning_rate": 0.0005, "loss": 2.0952, "step": 152000 }, { "epoch": 0.5785875779329035, "grad_norm": 0.11673175543546677, "learning_rate": 0.0005, "loss": 2.1008, "step": 152010 }, { "epoch": 0.5786256404010262, "grad_norm": 0.11440134048461914, "learning_rate": 0.0005, "loss": 2.1116, "step": 152020 }, { "epoch": 0.5786637028691488, "grad_norm": 0.1264093518257141, "learning_rate": 0.0005, "loss": 2.1035, "step": 152030 }, { "epoch": 0.5787017653372716, "grad_norm": 0.11688932776451111, "learning_rate": 0.0005, "loss": 2.1095, "step": 152040 }, { "epoch": 0.5787398278053942, "grad_norm": 0.13633820414543152, "learning_rate": 0.0005, "loss": 2.1071, "step": 152050 }, { "epoch": 0.5787778902735169, "grad_norm": 0.13115862011909485, "learning_rate": 0.0005, "loss": 2.1151, "step": 152060 }, { "epoch": 0.5788159527416396, "grad_norm": 0.14492659270763397, "learning_rate": 0.0005, "loss": 2.1133, "step": 152070 }, { "epoch": 0.5788540152097622, "grad_norm": 0.12465827912092209, "learning_rate": 0.0005, "loss": 2.1044, "step": 152080 }, { "epoch": 0.578892077677885, "grad_norm": 0.11936473846435547, "learning_rate": 0.0005, "loss": 2.1051, "step": 152090 }, { "epoch": 0.5789301401460076, "grad_norm": 0.12346568703651428, "learning_rate": 0.0005, "loss": 2.1309, "step": 152100 }, { "epoch": 0.5789682026141303, "grad_norm": 0.12095118314027786, "learning_rate": 0.0005, "loss": 2.119, "step": 152110 }, { "epoch": 0.579006265082253, "grad_norm": 0.12287425994873047, "learning_rate": 0.0005, "loss": 2.1142, "step": 152120 }, { "epoch": 0.5790443275503757, "grad_norm": 0.154531329870224, "learning_rate": 0.0005, "loss": 2.1039, "step": 152130 }, { "epoch": 0.5790823900184984, "grad_norm": 0.1167474314570427, "learning_rate": 0.0005, "loss": 2.112, "step": 152140 }, { "epoch": 0.579120452486621, "grad_norm": 0.1318303495645523, "learning_rate": 0.0005, "loss": 2.1014, "step": 152150 }, { "epoch": 0.5791585149547437, "grad_norm": 0.13696379959583282, "learning_rate": 0.0005, "loss": 2.1066, "step": 152160 }, { "epoch": 0.5791965774228665, "grad_norm": 0.12334418296813965, "learning_rate": 0.0005, "loss": 2.1106, "step": 152170 }, { "epoch": 0.5792346398909891, "grad_norm": 0.11851691454648972, "learning_rate": 0.0005, "loss": 2.1008, "step": 152180 }, { "epoch": 0.5792727023591118, "grad_norm": 0.11992210894823074, "learning_rate": 0.0005, "loss": 2.1055, "step": 152190 }, { "epoch": 0.5793107648272344, "grad_norm": 0.11987617611885071, "learning_rate": 0.0005, "loss": 2.1159, "step": 152200 }, { "epoch": 0.5793488272953572, "grad_norm": 0.12984754145145416, "learning_rate": 0.0005, "loss": 2.0902, "step": 152210 }, { "epoch": 0.5793868897634799, "grad_norm": 0.12692391872406006, "learning_rate": 0.0005, "loss": 2.1152, "step": 152220 }, { "epoch": 0.5794249522316025, "grad_norm": 0.12092795968055725, "learning_rate": 0.0005, "loss": 2.1177, "step": 152230 }, { "epoch": 0.5794630146997252, "grad_norm": 0.13350768387317657, "learning_rate": 0.0005, "loss": 2.1096, "step": 152240 }, { "epoch": 0.5795010771678478, "grad_norm": 0.12127964943647385, "learning_rate": 0.0005, "loss": 2.1072, "step": 152250 }, { "epoch": 0.5795391396359706, "grad_norm": 0.13222403824329376, "learning_rate": 0.0005, "loss": 2.1006, "step": 152260 }, { "epoch": 0.5795772021040932, "grad_norm": 0.11993005126714706, "learning_rate": 0.0005, "loss": 2.1197, "step": 152270 }, { "epoch": 0.5796152645722159, "grad_norm": 0.12395907193422318, "learning_rate": 0.0005, "loss": 2.0988, "step": 152280 }, { "epoch": 0.5796533270403386, "grad_norm": 0.1194024533033371, "learning_rate": 0.0005, "loss": 2.1234, "step": 152290 }, { "epoch": 0.5796913895084613, "grad_norm": 0.12345626950263977, "learning_rate": 0.0005, "loss": 2.1202, "step": 152300 }, { "epoch": 0.579729451976584, "grad_norm": 0.123421810567379, "learning_rate": 0.0005, "loss": 2.1352, "step": 152310 }, { "epoch": 0.5797675144447066, "grad_norm": 0.11834047734737396, "learning_rate": 0.0005, "loss": 2.1136, "step": 152320 }, { "epoch": 0.5798055769128293, "grad_norm": 0.12674610316753387, "learning_rate": 0.0005, "loss": 2.1064, "step": 152330 }, { "epoch": 0.5798436393809521, "grad_norm": 0.1250268816947937, "learning_rate": 0.0005, "loss": 2.0967, "step": 152340 }, { "epoch": 0.5798817018490747, "grad_norm": 0.122011199593544, "learning_rate": 0.0005, "loss": 2.109, "step": 152350 }, { "epoch": 0.5799197643171974, "grad_norm": 0.11669352650642395, "learning_rate": 0.0005, "loss": 2.0964, "step": 152360 }, { "epoch": 0.57995782678532, "grad_norm": 0.13394352793693542, "learning_rate": 0.0005, "loss": 2.1066, "step": 152370 }, { "epoch": 0.5799958892534427, "grad_norm": 0.13123464584350586, "learning_rate": 0.0005, "loss": 2.1164, "step": 152380 }, { "epoch": 0.5800339517215655, "grad_norm": 0.12727457284927368, "learning_rate": 0.0005, "loss": 2.1094, "step": 152390 }, { "epoch": 0.5800720141896881, "grad_norm": 0.12583385407924652, "learning_rate": 0.0005, "loss": 2.0968, "step": 152400 }, { "epoch": 0.5801100766578108, "grad_norm": 0.12469319999217987, "learning_rate": 0.0005, "loss": 2.1049, "step": 152410 }, { "epoch": 0.5801481391259334, "grad_norm": 0.12815004587173462, "learning_rate": 0.0005, "loss": 2.108, "step": 152420 }, { "epoch": 0.5801862015940562, "grad_norm": 0.1281806230545044, "learning_rate": 0.0005, "loss": 2.1328, "step": 152430 }, { "epoch": 0.5802242640621789, "grad_norm": 0.11810749024152756, "learning_rate": 0.0005, "loss": 2.0892, "step": 152440 }, { "epoch": 0.5802623265303015, "grad_norm": 0.12065868079662323, "learning_rate": 0.0005, "loss": 2.1146, "step": 152450 }, { "epoch": 0.5803003889984242, "grad_norm": 0.1311550885438919, "learning_rate": 0.0005, "loss": 2.13, "step": 152460 }, { "epoch": 0.580338451466547, "grad_norm": 0.12211163341999054, "learning_rate": 0.0005, "loss": 2.114, "step": 152470 }, { "epoch": 0.5803765139346696, "grad_norm": 0.13015727698802948, "learning_rate": 0.0005, "loss": 2.1166, "step": 152480 }, { "epoch": 0.5804145764027923, "grad_norm": 0.12457870692014694, "learning_rate": 0.0005, "loss": 2.1134, "step": 152490 }, { "epoch": 0.5804526388709149, "grad_norm": 0.12478785961866379, "learning_rate": 0.0005, "loss": 2.1259, "step": 152500 }, { "epoch": 0.5804907013390376, "grad_norm": 0.12055505812168121, "learning_rate": 0.0005, "loss": 2.1091, "step": 152510 }, { "epoch": 0.5805287638071603, "grad_norm": 0.13347235321998596, "learning_rate": 0.0005, "loss": 2.1064, "step": 152520 }, { "epoch": 0.580566826275283, "grad_norm": 0.11684930324554443, "learning_rate": 0.0005, "loss": 2.104, "step": 152530 }, { "epoch": 0.5806048887434057, "grad_norm": 0.1279844045639038, "learning_rate": 0.0005, "loss": 2.1173, "step": 152540 }, { "epoch": 0.5806429512115283, "grad_norm": 0.12912394106388092, "learning_rate": 0.0005, "loss": 2.1119, "step": 152550 }, { "epoch": 0.5806810136796511, "grad_norm": 0.12052441388368607, "learning_rate": 0.0005, "loss": 2.1104, "step": 152560 }, { "epoch": 0.5807190761477737, "grad_norm": 0.12511587142944336, "learning_rate": 0.0005, "loss": 2.0996, "step": 152570 }, { "epoch": 0.5807571386158964, "grad_norm": 0.12099773436784744, "learning_rate": 0.0005, "loss": 2.1044, "step": 152580 }, { "epoch": 0.580795201084019, "grad_norm": 0.1495269387960434, "learning_rate": 0.0005, "loss": 2.1185, "step": 152590 }, { "epoch": 0.5808332635521418, "grad_norm": 0.1222032830119133, "learning_rate": 0.0005, "loss": 2.1113, "step": 152600 }, { "epoch": 0.5808713260202645, "grad_norm": 0.11979856342077255, "learning_rate": 0.0005, "loss": 2.1284, "step": 152610 }, { "epoch": 0.5809093884883871, "grad_norm": 0.13302768766880035, "learning_rate": 0.0005, "loss": 2.1036, "step": 152620 }, { "epoch": 0.5809474509565098, "grad_norm": 0.14392118155956268, "learning_rate": 0.0005, "loss": 2.1138, "step": 152630 }, { "epoch": 0.5809855134246326, "grad_norm": 0.11564590781927109, "learning_rate": 0.0005, "loss": 2.0939, "step": 152640 }, { "epoch": 0.5810235758927552, "grad_norm": 0.13273461163043976, "learning_rate": 0.0005, "loss": 2.1187, "step": 152650 }, { "epoch": 0.5810616383608779, "grad_norm": 0.1334458738565445, "learning_rate": 0.0005, "loss": 2.1202, "step": 152660 }, { "epoch": 0.5810997008290005, "grad_norm": 0.13047702610492706, "learning_rate": 0.0005, "loss": 2.128, "step": 152670 }, { "epoch": 0.5811377632971232, "grad_norm": 0.12065224349498749, "learning_rate": 0.0005, "loss": 2.105, "step": 152680 }, { "epoch": 0.581175825765246, "grad_norm": 0.12787656486034393, "learning_rate": 0.0005, "loss": 2.1214, "step": 152690 }, { "epoch": 0.5812138882333686, "grad_norm": 0.1257835030555725, "learning_rate": 0.0005, "loss": 2.1049, "step": 152700 }, { "epoch": 0.5812519507014913, "grad_norm": 0.12944728136062622, "learning_rate": 0.0005, "loss": 2.111, "step": 152710 }, { "epoch": 0.5812900131696139, "grad_norm": 0.12401887774467468, "learning_rate": 0.0005, "loss": 2.1114, "step": 152720 }, { "epoch": 0.5813280756377367, "grad_norm": 0.12678106129169464, "learning_rate": 0.0005, "loss": 2.1139, "step": 152730 }, { "epoch": 0.5813661381058594, "grad_norm": 0.12950730323791504, "learning_rate": 0.0005, "loss": 2.0996, "step": 152740 }, { "epoch": 0.581404200573982, "grad_norm": 0.1311284750699997, "learning_rate": 0.0005, "loss": 2.1012, "step": 152750 }, { "epoch": 0.5814422630421047, "grad_norm": 0.11955790221691132, "learning_rate": 0.0005, "loss": 2.1036, "step": 152760 }, { "epoch": 0.5814803255102274, "grad_norm": 0.13516071438789368, "learning_rate": 0.0005, "loss": 2.1051, "step": 152770 }, { "epoch": 0.5815183879783501, "grad_norm": 0.12173061817884445, "learning_rate": 0.0005, "loss": 2.1313, "step": 152780 }, { "epoch": 0.5815564504464728, "grad_norm": 0.11228878051042557, "learning_rate": 0.0005, "loss": 2.1149, "step": 152790 }, { "epoch": 0.5815945129145954, "grad_norm": 0.11856763064861298, "learning_rate": 0.0005, "loss": 2.1038, "step": 152800 }, { "epoch": 0.5816325753827181, "grad_norm": 0.1394842565059662, "learning_rate": 0.0005, "loss": 2.1141, "step": 152810 }, { "epoch": 0.5816706378508408, "grad_norm": 0.11592503637075424, "learning_rate": 0.0005, "loss": 2.1162, "step": 152820 }, { "epoch": 0.5817087003189635, "grad_norm": 0.1304943561553955, "learning_rate": 0.0005, "loss": 2.1072, "step": 152830 }, { "epoch": 0.5817467627870861, "grad_norm": 0.12011189758777618, "learning_rate": 0.0005, "loss": 2.1207, "step": 152840 }, { "epoch": 0.5817848252552088, "grad_norm": 0.12784723937511444, "learning_rate": 0.0005, "loss": 2.1236, "step": 152850 }, { "epoch": 0.5818228877233316, "grad_norm": 0.13046471774578094, "learning_rate": 0.0005, "loss": 2.1065, "step": 152860 }, { "epoch": 0.5818609501914542, "grad_norm": 0.13343758881092072, "learning_rate": 0.0005, "loss": 2.1, "step": 152870 }, { "epoch": 0.5818990126595769, "grad_norm": 0.11725534498691559, "learning_rate": 0.0005, "loss": 2.1002, "step": 152880 }, { "epoch": 0.5819370751276995, "grad_norm": 0.12435596436262131, "learning_rate": 0.0005, "loss": 2.1275, "step": 152890 }, { "epoch": 0.5819751375958223, "grad_norm": 0.12380015105009079, "learning_rate": 0.0005, "loss": 2.1103, "step": 152900 }, { "epoch": 0.582013200063945, "grad_norm": 0.124103844165802, "learning_rate": 0.0005, "loss": 2.1098, "step": 152910 }, { "epoch": 0.5820512625320676, "grad_norm": 0.137775719165802, "learning_rate": 0.0005, "loss": 2.0892, "step": 152920 }, { "epoch": 0.5820893250001903, "grad_norm": 0.12004926800727844, "learning_rate": 0.0005, "loss": 2.0993, "step": 152930 }, { "epoch": 0.5821273874683129, "grad_norm": 0.12947256863117218, "learning_rate": 0.0005, "loss": 2.1044, "step": 152940 }, { "epoch": 0.5821654499364357, "grad_norm": 0.12591451406478882, "learning_rate": 0.0005, "loss": 2.1059, "step": 152950 }, { "epoch": 0.5822035124045584, "grad_norm": 0.11985679715871811, "learning_rate": 0.0005, "loss": 2.1179, "step": 152960 }, { "epoch": 0.582241574872681, "grad_norm": 0.1333392709493637, "learning_rate": 0.0005, "loss": 2.1111, "step": 152970 }, { "epoch": 0.5822796373408037, "grad_norm": 0.125688835978508, "learning_rate": 0.0005, "loss": 2.1257, "step": 152980 }, { "epoch": 0.5823176998089264, "grad_norm": 0.12332892417907715, "learning_rate": 0.0005, "loss": 2.1096, "step": 152990 }, { "epoch": 0.5823557622770491, "grad_norm": 0.12826409935951233, "learning_rate": 0.0005, "loss": 2.1081, "step": 153000 }, { "epoch": 0.5823938247451718, "grad_norm": 0.11257761716842651, "learning_rate": 0.0005, "loss": 2.1037, "step": 153010 }, { "epoch": 0.5824318872132944, "grad_norm": 0.13841615617275238, "learning_rate": 0.0005, "loss": 2.1014, "step": 153020 }, { "epoch": 0.5824699496814172, "grad_norm": 0.12583406269550323, "learning_rate": 0.0005, "loss": 2.1181, "step": 153030 }, { "epoch": 0.5825080121495398, "grad_norm": 0.13003213703632355, "learning_rate": 0.0005, "loss": 2.1259, "step": 153040 }, { "epoch": 0.5825460746176625, "grad_norm": 0.11432237178087234, "learning_rate": 0.0005, "loss": 2.1031, "step": 153050 }, { "epoch": 0.5825841370857852, "grad_norm": 0.1389862596988678, "learning_rate": 0.0005, "loss": 2.1114, "step": 153060 }, { "epoch": 0.5826221995539079, "grad_norm": 0.12699520587921143, "learning_rate": 0.0005, "loss": 2.0884, "step": 153070 }, { "epoch": 0.5826602620220306, "grad_norm": 0.1173095852136612, "learning_rate": 0.0005, "loss": 2.1084, "step": 153080 }, { "epoch": 0.5826983244901532, "grad_norm": 0.1409156620502472, "learning_rate": 0.0005, "loss": 2.1077, "step": 153090 }, { "epoch": 0.5827363869582759, "grad_norm": 0.12233584374189377, "learning_rate": 0.0005, "loss": 2.1091, "step": 153100 }, { "epoch": 0.5827744494263986, "grad_norm": 0.13279376924037933, "learning_rate": 0.0005, "loss": 2.1156, "step": 153110 }, { "epoch": 0.5828125118945213, "grad_norm": 0.11767486482858658, "learning_rate": 0.0005, "loss": 2.1098, "step": 153120 }, { "epoch": 0.582850574362644, "grad_norm": 0.12577876448631287, "learning_rate": 0.0005, "loss": 2.1043, "step": 153130 }, { "epoch": 0.5828886368307666, "grad_norm": 0.12067107856273651, "learning_rate": 0.0005, "loss": 2.0991, "step": 153140 }, { "epoch": 0.5829266992988893, "grad_norm": 0.12723997235298157, "learning_rate": 0.0005, "loss": 2.1216, "step": 153150 }, { "epoch": 0.5829647617670121, "grad_norm": 0.1412838250398636, "learning_rate": 0.0005, "loss": 2.1249, "step": 153160 }, { "epoch": 0.5830028242351347, "grad_norm": 0.1244267076253891, "learning_rate": 0.0005, "loss": 2.1225, "step": 153170 }, { "epoch": 0.5830408867032574, "grad_norm": 0.1265404224395752, "learning_rate": 0.0005, "loss": 2.1064, "step": 153180 }, { "epoch": 0.58307894917138, "grad_norm": 0.12896214425563812, "learning_rate": 0.0005, "loss": 2.1225, "step": 153190 }, { "epoch": 0.5831170116395028, "grad_norm": 0.11987591534852982, "learning_rate": 0.0005, "loss": 2.11, "step": 153200 }, { "epoch": 0.5831550741076255, "grad_norm": 0.1190297082066536, "learning_rate": 0.0005, "loss": 2.1208, "step": 153210 }, { "epoch": 0.5831931365757481, "grad_norm": 0.12007693946361542, "learning_rate": 0.0005, "loss": 2.1051, "step": 153220 }, { "epoch": 0.5832311990438708, "grad_norm": 0.1179143488407135, "learning_rate": 0.0005, "loss": 2.1151, "step": 153230 }, { "epoch": 0.5832692615119934, "grad_norm": 0.11957690119743347, "learning_rate": 0.0005, "loss": 2.1107, "step": 153240 }, { "epoch": 0.5833073239801162, "grad_norm": 0.13799679279327393, "learning_rate": 0.0005, "loss": 2.1066, "step": 153250 }, { "epoch": 0.5833453864482389, "grad_norm": 0.1221759244799614, "learning_rate": 0.0005, "loss": 2.1132, "step": 153260 }, { "epoch": 0.5833834489163615, "grad_norm": 0.12267787009477615, "learning_rate": 0.0005, "loss": 2.1245, "step": 153270 }, { "epoch": 0.5834215113844842, "grad_norm": 0.1274324506521225, "learning_rate": 0.0005, "loss": 2.1164, "step": 153280 }, { "epoch": 0.5834595738526069, "grad_norm": 0.1125502660870552, "learning_rate": 0.0005, "loss": 2.1069, "step": 153290 }, { "epoch": 0.5834976363207296, "grad_norm": 0.13437913358211517, "learning_rate": 0.0005, "loss": 2.114, "step": 153300 }, { "epoch": 0.5835356987888523, "grad_norm": 0.11928600072860718, "learning_rate": 0.0005, "loss": 2.0965, "step": 153310 }, { "epoch": 0.5835737612569749, "grad_norm": 0.1146225705742836, "learning_rate": 0.0005, "loss": 2.1152, "step": 153320 }, { "epoch": 0.5836118237250977, "grad_norm": 0.11559824645519257, "learning_rate": 0.0005, "loss": 2.0829, "step": 153330 }, { "epoch": 0.5836498861932203, "grad_norm": 0.12081331014633179, "learning_rate": 0.0005, "loss": 2.1353, "step": 153340 }, { "epoch": 0.583687948661343, "grad_norm": 0.1321122795343399, "learning_rate": 0.0005, "loss": 2.1122, "step": 153350 }, { "epoch": 0.5837260111294656, "grad_norm": 0.12141875922679901, "learning_rate": 0.0005, "loss": 2.1079, "step": 153360 }, { "epoch": 0.5837640735975883, "grad_norm": 0.13479618728160858, "learning_rate": 0.0005, "loss": 2.1003, "step": 153370 }, { "epoch": 0.5838021360657111, "grad_norm": 0.11798585206270218, "learning_rate": 0.0005, "loss": 2.1327, "step": 153380 }, { "epoch": 0.5838401985338337, "grad_norm": 0.14240063726902008, "learning_rate": 0.0005, "loss": 2.1241, "step": 153390 }, { "epoch": 0.5838782610019564, "grad_norm": 0.12214536219835281, "learning_rate": 0.0005, "loss": 2.1155, "step": 153400 }, { "epoch": 0.583916323470079, "grad_norm": 0.12596198916435242, "learning_rate": 0.0005, "loss": 2.1112, "step": 153410 }, { "epoch": 0.5839543859382018, "grad_norm": 0.13761389255523682, "learning_rate": 0.0005, "loss": 2.0951, "step": 153420 }, { "epoch": 0.5839924484063245, "grad_norm": 0.1374100297689438, "learning_rate": 0.0005, "loss": 2.1225, "step": 153430 }, { "epoch": 0.5840305108744471, "grad_norm": 0.11680902540683746, "learning_rate": 0.0005, "loss": 2.1233, "step": 153440 }, { "epoch": 0.5840685733425698, "grad_norm": 0.12362001091241837, "learning_rate": 0.0005, "loss": 2.1106, "step": 153450 }, { "epoch": 0.5841066358106926, "grad_norm": 0.1341400444507599, "learning_rate": 0.0005, "loss": 2.0963, "step": 153460 }, { "epoch": 0.5841446982788152, "grad_norm": 0.12792478501796722, "learning_rate": 0.0005, "loss": 2.1069, "step": 153470 }, { "epoch": 0.5841827607469379, "grad_norm": 0.1239253506064415, "learning_rate": 0.0005, "loss": 2.1151, "step": 153480 }, { "epoch": 0.5842208232150605, "grad_norm": 0.12595131993293762, "learning_rate": 0.0005, "loss": 2.1049, "step": 153490 }, { "epoch": 0.5842588856831833, "grad_norm": 0.11936382949352264, "learning_rate": 0.0005, "loss": 2.1159, "step": 153500 }, { "epoch": 0.584296948151306, "grad_norm": 0.11475072056055069, "learning_rate": 0.0005, "loss": 2.1124, "step": 153510 }, { "epoch": 0.5843350106194286, "grad_norm": 0.12244194000959396, "learning_rate": 0.0005, "loss": 2.106, "step": 153520 }, { "epoch": 0.5843730730875513, "grad_norm": 0.13092495501041412, "learning_rate": 0.0005, "loss": 2.1131, "step": 153530 }, { "epoch": 0.5844111355556739, "grad_norm": 0.126319020986557, "learning_rate": 0.0005, "loss": 2.1235, "step": 153540 }, { "epoch": 0.5844491980237967, "grad_norm": 0.12426517903804779, "learning_rate": 0.0005, "loss": 2.1093, "step": 153550 }, { "epoch": 0.5844872604919193, "grad_norm": 0.12947340309619904, "learning_rate": 0.0005, "loss": 2.1235, "step": 153560 }, { "epoch": 0.584525322960042, "grad_norm": 0.11483649164438248, "learning_rate": 0.0005, "loss": 2.1135, "step": 153570 }, { "epoch": 0.5845633854281647, "grad_norm": 0.12365952879190445, "learning_rate": 0.0005, "loss": 2.1177, "step": 153580 }, { "epoch": 0.5846014478962874, "grad_norm": 0.1213454157114029, "learning_rate": 0.0005, "loss": 2.1053, "step": 153590 }, { "epoch": 0.5846395103644101, "grad_norm": 0.14581279456615448, "learning_rate": 0.0005, "loss": 2.1136, "step": 153600 }, { "epoch": 0.5846775728325327, "grad_norm": 0.13556072115898132, "learning_rate": 0.0005, "loss": 2.1009, "step": 153610 }, { "epoch": 0.5847156353006554, "grad_norm": 0.11281615495681763, "learning_rate": 0.0005, "loss": 2.1107, "step": 153620 }, { "epoch": 0.5847536977687782, "grad_norm": 0.13184192776679993, "learning_rate": 0.0005, "loss": 2.1224, "step": 153630 }, { "epoch": 0.5847917602369008, "grad_norm": 0.1341467797756195, "learning_rate": 0.0005, "loss": 2.1062, "step": 153640 }, { "epoch": 0.5848298227050235, "grad_norm": 0.12088645249605179, "learning_rate": 0.0005, "loss": 2.1228, "step": 153650 }, { "epoch": 0.5848678851731461, "grad_norm": 0.13409413397312164, "learning_rate": 0.0005, "loss": 2.1221, "step": 153660 }, { "epoch": 0.5849059476412688, "grad_norm": 0.1399022787809372, "learning_rate": 0.0005, "loss": 2.1025, "step": 153670 }, { "epoch": 0.5849440101093916, "grad_norm": 0.12620849907398224, "learning_rate": 0.0005, "loss": 2.1172, "step": 153680 }, { "epoch": 0.5849820725775142, "grad_norm": 0.12294651567935944, "learning_rate": 0.0005, "loss": 2.1108, "step": 153690 }, { "epoch": 0.5850201350456369, "grad_norm": 0.14084553718566895, "learning_rate": 0.0005, "loss": 2.1243, "step": 153700 }, { "epoch": 0.5850581975137595, "grad_norm": 0.11670193821191788, "learning_rate": 0.0005, "loss": 2.097, "step": 153710 }, { "epoch": 0.5850962599818823, "grad_norm": 0.14080242812633514, "learning_rate": 0.0005, "loss": 2.1161, "step": 153720 }, { "epoch": 0.585134322450005, "grad_norm": 0.1202685683965683, "learning_rate": 0.0005, "loss": 2.1051, "step": 153730 }, { "epoch": 0.5851723849181276, "grad_norm": 0.13900840282440186, "learning_rate": 0.0005, "loss": 2.1142, "step": 153740 }, { "epoch": 0.5852104473862503, "grad_norm": 0.12486742436885834, "learning_rate": 0.0005, "loss": 2.1115, "step": 153750 }, { "epoch": 0.585248509854373, "grad_norm": 0.11640941351652145, "learning_rate": 0.0005, "loss": 2.1149, "step": 153760 }, { "epoch": 0.5852865723224957, "grad_norm": 0.12414953112602234, "learning_rate": 0.0005, "loss": 2.101, "step": 153770 }, { "epoch": 0.5853246347906184, "grad_norm": 0.12600675225257874, "learning_rate": 0.0005, "loss": 2.1107, "step": 153780 }, { "epoch": 0.585362697258741, "grad_norm": 0.5305410623550415, "learning_rate": 0.0005, "loss": 2.1138, "step": 153790 }, { "epoch": 0.5854007597268638, "grad_norm": 0.1254570037126541, "learning_rate": 0.0005, "loss": 2.0942, "step": 153800 }, { "epoch": 0.5854388221949864, "grad_norm": 0.12492625415325165, "learning_rate": 0.0005, "loss": 2.1146, "step": 153810 }, { "epoch": 0.5854768846631091, "grad_norm": 0.13412101566791534, "learning_rate": 0.0005, "loss": 2.1069, "step": 153820 }, { "epoch": 0.5855149471312318, "grad_norm": 0.1295090913772583, "learning_rate": 0.0005, "loss": 2.1313, "step": 153830 }, { "epoch": 0.5855530095993544, "grad_norm": 0.12706872820854187, "learning_rate": 0.0005, "loss": 2.1186, "step": 153840 }, { "epoch": 0.5855910720674772, "grad_norm": 0.1238255500793457, "learning_rate": 0.0005, "loss": 2.1061, "step": 153850 }, { "epoch": 0.5856291345355998, "grad_norm": 0.11371153593063354, "learning_rate": 0.0005, "loss": 2.1093, "step": 153860 }, { "epoch": 0.5856671970037225, "grad_norm": 0.11320296674966812, "learning_rate": 0.0005, "loss": 2.1208, "step": 153870 }, { "epoch": 0.5857052594718452, "grad_norm": 0.12613484263420105, "learning_rate": 0.0005, "loss": 2.0967, "step": 153880 }, { "epoch": 0.5857433219399679, "grad_norm": 0.12188831716775894, "learning_rate": 0.0005, "loss": 2.1032, "step": 153890 }, { "epoch": 0.5857813844080906, "grad_norm": 0.1488693207502365, "learning_rate": 0.0005, "loss": 2.1045, "step": 153900 }, { "epoch": 0.5858194468762132, "grad_norm": 0.12705174088478088, "learning_rate": 0.0005, "loss": 2.108, "step": 153910 }, { "epoch": 0.5858575093443359, "grad_norm": 0.11664033681154251, "learning_rate": 0.0005, "loss": 2.1107, "step": 153920 }, { "epoch": 0.5858955718124587, "grad_norm": 0.12219222635030746, "learning_rate": 0.0005, "loss": 2.1176, "step": 153930 }, { "epoch": 0.5859336342805813, "grad_norm": 0.12206865847110748, "learning_rate": 0.0005, "loss": 2.1174, "step": 153940 }, { "epoch": 0.585971696748704, "grad_norm": 0.12492918223142624, "learning_rate": 0.0005, "loss": 2.1083, "step": 153950 }, { "epoch": 0.5860097592168266, "grad_norm": 0.12702591717243195, "learning_rate": 0.0005, "loss": 2.1143, "step": 153960 }, { "epoch": 0.5860478216849493, "grad_norm": 0.11352322995662689, "learning_rate": 0.0005, "loss": 2.1123, "step": 153970 }, { "epoch": 0.5860858841530721, "grad_norm": 0.14898304641246796, "learning_rate": 0.0005, "loss": 2.1263, "step": 153980 }, { "epoch": 0.5861239466211947, "grad_norm": 0.12120857834815979, "learning_rate": 0.0005, "loss": 2.1109, "step": 153990 }, { "epoch": 0.5861620090893174, "grad_norm": 0.12267789989709854, "learning_rate": 0.0005, "loss": 2.1071, "step": 154000 }, { "epoch": 0.58620007155744, "grad_norm": 0.1255556046962738, "learning_rate": 0.0005, "loss": 2.1289, "step": 154010 }, { "epoch": 0.5862381340255628, "grad_norm": 0.11012211441993713, "learning_rate": 0.0005, "loss": 2.1179, "step": 154020 }, { "epoch": 0.5862761964936855, "grad_norm": 0.11161710321903229, "learning_rate": 0.0005, "loss": 2.1077, "step": 154030 }, { "epoch": 0.5863142589618081, "grad_norm": 0.11873649060726166, "learning_rate": 0.0005, "loss": 2.1143, "step": 154040 }, { "epoch": 0.5863523214299308, "grad_norm": 0.12429730594158173, "learning_rate": 0.0005, "loss": 2.1268, "step": 154050 }, { "epoch": 0.5863903838980535, "grad_norm": 0.11891800910234451, "learning_rate": 0.0005, "loss": 2.1077, "step": 154060 }, { "epoch": 0.5864284463661762, "grad_norm": 0.13123571872711182, "learning_rate": 0.0005, "loss": 2.1162, "step": 154070 }, { "epoch": 0.5864665088342988, "grad_norm": 0.12377672642469406, "learning_rate": 0.0005, "loss": 2.0882, "step": 154080 }, { "epoch": 0.5865045713024215, "grad_norm": 0.1367446929216385, "learning_rate": 0.0005, "loss": 2.1152, "step": 154090 }, { "epoch": 0.5865426337705442, "grad_norm": 0.13042481243610382, "learning_rate": 0.0005, "loss": 2.1162, "step": 154100 }, { "epoch": 0.5865806962386669, "grad_norm": 0.1151302307844162, "learning_rate": 0.0005, "loss": 2.1194, "step": 154110 }, { "epoch": 0.5866187587067896, "grad_norm": 0.12772217392921448, "learning_rate": 0.0005, "loss": 2.1163, "step": 154120 }, { "epoch": 0.5866568211749122, "grad_norm": 0.12427042424678802, "learning_rate": 0.0005, "loss": 2.1041, "step": 154130 }, { "epoch": 0.5866948836430349, "grad_norm": 0.12787887454032898, "learning_rate": 0.0005, "loss": 2.1201, "step": 154140 }, { "epoch": 0.5867329461111577, "grad_norm": 0.12783733010292053, "learning_rate": 0.0005, "loss": 2.1277, "step": 154150 }, { "epoch": 0.5867710085792803, "grad_norm": 0.12757271528244019, "learning_rate": 0.0005, "loss": 2.1127, "step": 154160 }, { "epoch": 0.586809071047403, "grad_norm": 0.12278042733669281, "learning_rate": 0.0005, "loss": 2.1089, "step": 154170 }, { "epoch": 0.5868471335155256, "grad_norm": 0.12007766216993332, "learning_rate": 0.0005, "loss": 2.1131, "step": 154180 }, { "epoch": 0.5868851959836484, "grad_norm": 0.11267594248056412, "learning_rate": 0.0005, "loss": 2.1293, "step": 154190 }, { "epoch": 0.5869232584517711, "grad_norm": 0.13086989521980286, "learning_rate": 0.0005, "loss": 2.1064, "step": 154200 }, { "epoch": 0.5869613209198937, "grad_norm": 0.13795329630374908, "learning_rate": 0.0005, "loss": 2.1118, "step": 154210 }, { "epoch": 0.5869993833880164, "grad_norm": 0.1180015504360199, "learning_rate": 0.0005, "loss": 2.1065, "step": 154220 }, { "epoch": 0.5870374458561392, "grad_norm": 0.13229142129421234, "learning_rate": 0.0005, "loss": 2.0986, "step": 154230 }, { "epoch": 0.5870755083242618, "grad_norm": 0.11965411901473999, "learning_rate": 0.0005, "loss": 2.1127, "step": 154240 }, { "epoch": 0.5871135707923845, "grad_norm": 0.11663006246089935, "learning_rate": 0.0005, "loss": 2.1212, "step": 154250 }, { "epoch": 0.5871516332605071, "grad_norm": 0.12126167863607407, "learning_rate": 0.0005, "loss": 2.1273, "step": 154260 }, { "epoch": 0.5871896957286298, "grad_norm": 0.12216583639383316, "learning_rate": 0.0005, "loss": 2.1107, "step": 154270 }, { "epoch": 0.5872277581967525, "grad_norm": 0.11939745396375656, "learning_rate": 0.0005, "loss": 2.1207, "step": 154280 }, { "epoch": 0.5872658206648752, "grad_norm": 0.11422568559646606, "learning_rate": 0.0005, "loss": 2.117, "step": 154290 }, { "epoch": 0.5873038831329979, "grad_norm": 0.12077447026968002, "learning_rate": 0.0005, "loss": 2.101, "step": 154300 }, { "epoch": 0.5873419456011205, "grad_norm": 0.11959082633256912, "learning_rate": 0.0005, "loss": 2.1123, "step": 154310 }, { "epoch": 0.5873800080692433, "grad_norm": 0.12821198999881744, "learning_rate": 0.0005, "loss": 2.1082, "step": 154320 }, { "epoch": 0.5874180705373659, "grad_norm": 0.12846586108207703, "learning_rate": 0.0005, "loss": 2.1177, "step": 154330 }, { "epoch": 0.5874561330054886, "grad_norm": 0.12448491156101227, "learning_rate": 0.0005, "loss": 2.1123, "step": 154340 }, { "epoch": 0.5874941954736113, "grad_norm": 0.12642216682434082, "learning_rate": 0.0005, "loss": 2.107, "step": 154350 }, { "epoch": 0.587532257941734, "grad_norm": 0.11989720165729523, "learning_rate": 0.0005, "loss": 2.1008, "step": 154360 }, { "epoch": 0.5875703204098567, "grad_norm": 0.14378298819065094, "learning_rate": 0.0005, "loss": 2.107, "step": 154370 }, { "epoch": 0.5876083828779793, "grad_norm": 0.20334845781326294, "learning_rate": 0.0005, "loss": 2.123, "step": 154380 }, { "epoch": 0.587646445346102, "grad_norm": 0.12918692827224731, "learning_rate": 0.0005, "loss": 2.1201, "step": 154390 }, { "epoch": 0.5876845078142247, "grad_norm": 0.12609420716762543, "learning_rate": 0.0005, "loss": 2.1105, "step": 154400 }, { "epoch": 0.5877225702823474, "grad_norm": 0.13024812936782837, "learning_rate": 0.0005, "loss": 2.1208, "step": 154410 }, { "epoch": 0.5877606327504701, "grad_norm": 0.13436540961265564, "learning_rate": 0.0005, "loss": 2.1129, "step": 154420 }, { "epoch": 0.5877986952185927, "grad_norm": 0.1369498074054718, "learning_rate": 0.0005, "loss": 2.1124, "step": 154430 }, { "epoch": 0.5878367576867154, "grad_norm": 0.12045515328645706, "learning_rate": 0.0005, "loss": 2.1114, "step": 154440 }, { "epoch": 0.5878748201548382, "grad_norm": 0.12609994411468506, "learning_rate": 0.0005, "loss": 2.1333, "step": 154450 }, { "epoch": 0.5879128826229608, "grad_norm": 0.1296570599079132, "learning_rate": 0.0005, "loss": 2.1141, "step": 154460 }, { "epoch": 0.5879509450910835, "grad_norm": 0.13209514319896698, "learning_rate": 0.0005, "loss": 2.1176, "step": 154470 }, { "epoch": 0.5879890075592061, "grad_norm": 0.12957362830638885, "learning_rate": 0.0005, "loss": 2.1187, "step": 154480 }, { "epoch": 0.5880270700273289, "grad_norm": 0.888014554977417, "learning_rate": 0.0005, "loss": 2.1034, "step": 154490 }, { "epoch": 0.5880651324954516, "grad_norm": 0.1320250779390335, "learning_rate": 0.0005, "loss": 2.1117, "step": 154500 }, { "epoch": 0.5881031949635742, "grad_norm": 0.12680445611476898, "learning_rate": 0.0005, "loss": 2.1044, "step": 154510 }, { "epoch": 0.5881412574316969, "grad_norm": 0.12884357571601868, "learning_rate": 0.0005, "loss": 2.118, "step": 154520 }, { "epoch": 0.5881793198998195, "grad_norm": 0.11659844219684601, "learning_rate": 0.0005, "loss": 2.123, "step": 154530 }, { "epoch": 0.5882173823679423, "grad_norm": 0.1260477602481842, "learning_rate": 0.0005, "loss": 2.1066, "step": 154540 }, { "epoch": 0.588255444836065, "grad_norm": 0.1149340271949768, "learning_rate": 0.0005, "loss": 2.1, "step": 154550 }, { "epoch": 0.5882935073041876, "grad_norm": 0.12525539100170135, "learning_rate": 0.0005, "loss": 2.1122, "step": 154560 }, { "epoch": 0.5883315697723103, "grad_norm": 0.12114496529102325, "learning_rate": 0.0005, "loss": 2.1036, "step": 154570 }, { "epoch": 0.588369632240433, "grad_norm": 0.13838450610637665, "learning_rate": 0.0005, "loss": 2.1027, "step": 154580 }, { "epoch": 0.5884076947085557, "grad_norm": 0.12621724605560303, "learning_rate": 0.0005, "loss": 2.1307, "step": 154590 }, { "epoch": 0.5884457571766784, "grad_norm": 0.12438689172267914, "learning_rate": 0.0005, "loss": 2.1115, "step": 154600 }, { "epoch": 0.588483819644801, "grad_norm": 0.15652117133140564, "learning_rate": 0.0005, "loss": 2.1027, "step": 154610 }, { "epoch": 0.5885218821129238, "grad_norm": 0.12901882827281952, "learning_rate": 0.0005, "loss": 2.1275, "step": 154620 }, { "epoch": 0.5885599445810464, "grad_norm": 0.12268058955669403, "learning_rate": 0.0005, "loss": 2.1175, "step": 154630 }, { "epoch": 0.5885980070491691, "grad_norm": 0.13506019115447998, "learning_rate": 0.0005, "loss": 2.1092, "step": 154640 }, { "epoch": 0.5886360695172917, "grad_norm": 0.12235263735055923, "learning_rate": 0.0005, "loss": 2.1015, "step": 154650 }, { "epoch": 0.5886741319854145, "grad_norm": 0.11945250630378723, "learning_rate": 0.0005, "loss": 2.096, "step": 154660 }, { "epoch": 0.5887121944535372, "grad_norm": 0.12435851246118546, "learning_rate": 0.0005, "loss": 2.1146, "step": 154670 }, { "epoch": 0.5887502569216598, "grad_norm": 0.12978994846343994, "learning_rate": 0.0005, "loss": 2.1082, "step": 154680 }, { "epoch": 0.5887883193897825, "grad_norm": 0.12437500059604645, "learning_rate": 0.0005, "loss": 2.1119, "step": 154690 }, { "epoch": 0.5888263818579051, "grad_norm": 0.1315934807062149, "learning_rate": 0.0005, "loss": 2.1084, "step": 154700 }, { "epoch": 0.5888644443260279, "grad_norm": 0.1189320832490921, "learning_rate": 0.0005, "loss": 2.1101, "step": 154710 }, { "epoch": 0.5889025067941506, "grad_norm": 0.13439911603927612, "learning_rate": 0.0005, "loss": 2.1275, "step": 154720 }, { "epoch": 0.5889405692622732, "grad_norm": 0.12734562158584595, "learning_rate": 0.0005, "loss": 2.1084, "step": 154730 }, { "epoch": 0.5889786317303959, "grad_norm": 0.11846303939819336, "learning_rate": 0.0005, "loss": 2.1295, "step": 154740 }, { "epoch": 0.5890166941985187, "grad_norm": 0.11798400431871414, "learning_rate": 0.0005, "loss": 2.1039, "step": 154750 }, { "epoch": 0.5890547566666413, "grad_norm": 0.13610303401947021, "learning_rate": 0.0005, "loss": 2.1204, "step": 154760 }, { "epoch": 0.589092819134764, "grad_norm": 0.12588945031166077, "learning_rate": 0.0005, "loss": 2.1144, "step": 154770 }, { "epoch": 0.5891308816028866, "grad_norm": 0.12449779361486435, "learning_rate": 0.0005, "loss": 2.1291, "step": 154780 }, { "epoch": 0.5891689440710094, "grad_norm": 0.17011813819408417, "learning_rate": 0.0005, "loss": 2.1155, "step": 154790 }, { "epoch": 0.589207006539132, "grad_norm": 0.12432485073804855, "learning_rate": 0.0005, "loss": 2.11, "step": 154800 }, { "epoch": 0.5892450690072547, "grad_norm": 0.10910047590732574, "learning_rate": 0.0005, "loss": 2.1212, "step": 154810 }, { "epoch": 0.5892831314753774, "grad_norm": 0.11259827762842178, "learning_rate": 0.0005, "loss": 2.1212, "step": 154820 }, { "epoch": 0.5893211939435, "grad_norm": 0.11866344511508942, "learning_rate": 0.0005, "loss": 2.1165, "step": 154830 }, { "epoch": 0.5893592564116228, "grad_norm": 0.12497690320014954, "learning_rate": 0.0005, "loss": 2.1063, "step": 154840 }, { "epoch": 0.5893973188797454, "grad_norm": 0.12016210705041885, "learning_rate": 0.0005, "loss": 2.1251, "step": 154850 }, { "epoch": 0.5894353813478681, "grad_norm": 0.12100519984960556, "learning_rate": 0.0005, "loss": 2.097, "step": 154860 }, { "epoch": 0.5894734438159908, "grad_norm": 0.12728513777256012, "learning_rate": 0.0005, "loss": 2.1138, "step": 154870 }, { "epoch": 0.5895115062841135, "grad_norm": 0.12269476801156998, "learning_rate": 0.0005, "loss": 2.1049, "step": 154880 }, { "epoch": 0.5895495687522362, "grad_norm": 0.1447187215089798, "learning_rate": 0.0005, "loss": 2.1178, "step": 154890 }, { "epoch": 0.5895876312203588, "grad_norm": 0.1288946568965912, "learning_rate": 0.0005, "loss": 2.1176, "step": 154900 }, { "epoch": 0.5896256936884815, "grad_norm": 0.1268695890903473, "learning_rate": 0.0005, "loss": 2.1122, "step": 154910 }, { "epoch": 0.5896637561566043, "grad_norm": 0.13210426270961761, "learning_rate": 0.0005, "loss": 2.0881, "step": 154920 }, { "epoch": 0.5897018186247269, "grad_norm": 0.1955825239419937, "learning_rate": 0.0005, "loss": 2.1074, "step": 154930 }, { "epoch": 0.5897398810928496, "grad_norm": 0.13660775125026703, "learning_rate": 0.0005, "loss": 2.1031, "step": 154940 }, { "epoch": 0.5897779435609722, "grad_norm": 0.12983176112174988, "learning_rate": 0.0005, "loss": 2.1078, "step": 154950 }, { "epoch": 0.5898160060290949, "grad_norm": 0.12133912742137909, "learning_rate": 0.0005, "loss": 2.1068, "step": 154960 }, { "epoch": 0.5898540684972177, "grad_norm": 0.12246081233024597, "learning_rate": 0.0005, "loss": 2.1131, "step": 154970 }, { "epoch": 0.5898921309653403, "grad_norm": 0.12103147059679031, "learning_rate": 0.0005, "loss": 2.0996, "step": 154980 }, { "epoch": 0.589930193433463, "grad_norm": 0.12457051128149033, "learning_rate": 0.0005, "loss": 2.1171, "step": 154990 }, { "epoch": 0.5899682559015856, "grad_norm": 0.1302313357591629, "learning_rate": 0.0005, "loss": 2.1118, "step": 155000 }, { "epoch": 0.5900063183697084, "grad_norm": 0.1370365023612976, "learning_rate": 0.0005, "loss": 2.1206, "step": 155010 }, { "epoch": 0.5900443808378311, "grad_norm": 0.11973226815462112, "learning_rate": 0.0005, "loss": 2.1078, "step": 155020 }, { "epoch": 0.5900824433059537, "grad_norm": 0.1383923441171646, "learning_rate": 0.0005, "loss": 2.1175, "step": 155030 }, { "epoch": 0.5901205057740764, "grad_norm": 0.11702127754688263, "learning_rate": 0.0005, "loss": 2.0953, "step": 155040 }, { "epoch": 0.5901585682421991, "grad_norm": 0.12547564506530762, "learning_rate": 0.0005, "loss": 2.1216, "step": 155050 }, { "epoch": 0.5901966307103218, "grad_norm": 0.11866992712020874, "learning_rate": 0.0005, "loss": 2.1205, "step": 155060 }, { "epoch": 0.5902346931784445, "grad_norm": 0.1276715099811554, "learning_rate": 0.0005, "loss": 2.1162, "step": 155070 }, { "epoch": 0.5902727556465671, "grad_norm": 0.12169170379638672, "learning_rate": 0.0005, "loss": 2.1066, "step": 155080 }, { "epoch": 0.5903108181146899, "grad_norm": 0.13266567885875702, "learning_rate": 0.0005, "loss": 2.1317, "step": 155090 }, { "epoch": 0.5903488805828125, "grad_norm": 0.12670375406742096, "learning_rate": 0.0005, "loss": 2.1284, "step": 155100 }, { "epoch": 0.5903869430509352, "grad_norm": 0.1242232397198677, "learning_rate": 0.0005, "loss": 2.1292, "step": 155110 }, { "epoch": 0.5904250055190579, "grad_norm": 0.13165102899074554, "learning_rate": 0.0005, "loss": 2.1153, "step": 155120 }, { "epoch": 0.5904630679871805, "grad_norm": 0.11474651843309402, "learning_rate": 0.0005, "loss": 2.1185, "step": 155130 }, { "epoch": 0.5905011304553033, "grad_norm": 0.1368701159954071, "learning_rate": 0.0005, "loss": 2.1062, "step": 155140 }, { "epoch": 0.5905391929234259, "grad_norm": 0.1212804764509201, "learning_rate": 0.0005, "loss": 2.1038, "step": 155150 }, { "epoch": 0.5905772553915486, "grad_norm": 0.11533062160015106, "learning_rate": 0.0005, "loss": 2.1068, "step": 155160 }, { "epoch": 0.5906153178596713, "grad_norm": 0.12023679167032242, "learning_rate": 0.0005, "loss": 2.1153, "step": 155170 }, { "epoch": 0.590653380327794, "grad_norm": 0.11425898969173431, "learning_rate": 0.0005, "loss": 2.1048, "step": 155180 }, { "epoch": 0.5906914427959167, "grad_norm": 0.13516448438167572, "learning_rate": 0.0005, "loss": 2.1106, "step": 155190 }, { "epoch": 0.5907295052640393, "grad_norm": 0.11822634190320969, "learning_rate": 0.0005, "loss": 2.117, "step": 155200 }, { "epoch": 0.590767567732162, "grad_norm": 0.12545858323574066, "learning_rate": 0.0005, "loss": 2.0899, "step": 155210 }, { "epoch": 0.5908056302002848, "grad_norm": 0.1313958764076233, "learning_rate": 0.0005, "loss": 2.1003, "step": 155220 }, { "epoch": 0.5908436926684074, "grad_norm": 0.13253335654735565, "learning_rate": 0.0005, "loss": 2.1294, "step": 155230 }, { "epoch": 0.5908817551365301, "grad_norm": 0.11414457857608795, "learning_rate": 0.0005, "loss": 2.1143, "step": 155240 }, { "epoch": 0.5909198176046527, "grad_norm": 0.12125179171562195, "learning_rate": 0.0005, "loss": 2.1082, "step": 155250 }, { "epoch": 0.5909578800727754, "grad_norm": 0.1280829906463623, "learning_rate": 0.0005, "loss": 2.1286, "step": 155260 }, { "epoch": 0.5909959425408982, "grad_norm": 0.12440607696771622, "learning_rate": 0.0005, "loss": 2.1085, "step": 155270 }, { "epoch": 0.5910340050090208, "grad_norm": 0.13153356313705444, "learning_rate": 0.0005, "loss": 2.1168, "step": 155280 }, { "epoch": 0.5910720674771435, "grad_norm": 0.1424436718225479, "learning_rate": 0.0005, "loss": 2.114, "step": 155290 }, { "epoch": 0.5911101299452661, "grad_norm": 0.12767179310321808, "learning_rate": 0.0005, "loss": 2.1074, "step": 155300 }, { "epoch": 0.5911481924133889, "grad_norm": 0.1162237673997879, "learning_rate": 0.0005, "loss": 2.1068, "step": 155310 }, { "epoch": 0.5911862548815116, "grad_norm": 0.13033299148082733, "learning_rate": 0.0005, "loss": 2.1136, "step": 155320 }, { "epoch": 0.5912243173496342, "grad_norm": 0.11099372804164886, "learning_rate": 0.0005, "loss": 2.1024, "step": 155330 }, { "epoch": 0.5912623798177569, "grad_norm": 0.12529684603214264, "learning_rate": 0.0005, "loss": 2.1054, "step": 155340 }, { "epoch": 0.5913004422858796, "grad_norm": 0.12372294068336487, "learning_rate": 0.0005, "loss": 2.1093, "step": 155350 }, { "epoch": 0.5913385047540023, "grad_norm": 0.13699477910995483, "learning_rate": 0.0005, "loss": 2.1225, "step": 155360 }, { "epoch": 0.591376567222125, "grad_norm": 0.12353967875242233, "learning_rate": 0.0005, "loss": 2.1191, "step": 155370 }, { "epoch": 0.5914146296902476, "grad_norm": 0.1406717151403427, "learning_rate": 0.0005, "loss": 2.1205, "step": 155380 }, { "epoch": 0.5914526921583703, "grad_norm": 0.1308826506137848, "learning_rate": 0.0005, "loss": 2.0959, "step": 155390 }, { "epoch": 0.591490754626493, "grad_norm": 0.12167200446128845, "learning_rate": 0.0005, "loss": 2.1293, "step": 155400 }, { "epoch": 0.5915288170946157, "grad_norm": 0.12970460951328278, "learning_rate": 0.0005, "loss": 2.1161, "step": 155410 }, { "epoch": 0.5915668795627383, "grad_norm": 0.13693182170391083, "learning_rate": 0.0005, "loss": 2.0963, "step": 155420 }, { "epoch": 0.591604942030861, "grad_norm": 0.13417673110961914, "learning_rate": 0.0005, "loss": 2.108, "step": 155430 }, { "epoch": 0.5916430044989838, "grad_norm": 0.12016027420759201, "learning_rate": 0.0005, "loss": 2.1114, "step": 155440 }, { "epoch": 0.5916810669671064, "grad_norm": 0.11692225933074951, "learning_rate": 0.0005, "loss": 2.1074, "step": 155450 }, { "epoch": 0.5917191294352291, "grad_norm": 0.126420795917511, "learning_rate": 0.0005, "loss": 2.0982, "step": 155460 }, { "epoch": 0.5917571919033517, "grad_norm": 0.11544563621282578, "learning_rate": 0.0005, "loss": 2.1036, "step": 155470 }, { "epoch": 0.5917952543714745, "grad_norm": 0.11593781411647797, "learning_rate": 0.0005, "loss": 2.1135, "step": 155480 }, { "epoch": 0.5918333168395972, "grad_norm": 0.11717119067907333, "learning_rate": 0.0005, "loss": 2.1174, "step": 155490 }, { "epoch": 0.5918713793077198, "grad_norm": 0.1266764998435974, "learning_rate": 0.0005, "loss": 2.1153, "step": 155500 }, { "epoch": 0.5919094417758425, "grad_norm": 0.12252969294786453, "learning_rate": 0.0005, "loss": 2.1169, "step": 155510 }, { "epoch": 0.5919475042439653, "grad_norm": 0.12028811126947403, "learning_rate": 0.0005, "loss": 2.1008, "step": 155520 }, { "epoch": 0.5919855667120879, "grad_norm": 0.1271073818206787, "learning_rate": 0.0005, "loss": 2.111, "step": 155530 }, { "epoch": 0.5920236291802106, "grad_norm": 0.12291359901428223, "learning_rate": 0.0005, "loss": 2.1014, "step": 155540 }, { "epoch": 0.5920616916483332, "grad_norm": 0.12409337610006332, "learning_rate": 0.0005, "loss": 2.1189, "step": 155550 }, { "epoch": 0.5920997541164559, "grad_norm": 0.11245585978031158, "learning_rate": 0.0005, "loss": 2.0998, "step": 155560 }, { "epoch": 0.5921378165845786, "grad_norm": 0.115641288459301, "learning_rate": 0.0005, "loss": 2.1028, "step": 155570 }, { "epoch": 0.5921758790527013, "grad_norm": 0.12326592206954956, "learning_rate": 0.0005, "loss": 2.1159, "step": 155580 }, { "epoch": 0.592213941520824, "grad_norm": 0.11967624723911285, "learning_rate": 0.0005, "loss": 2.1146, "step": 155590 }, { "epoch": 0.5922520039889466, "grad_norm": 0.1309567093849182, "learning_rate": 0.0005, "loss": 2.1115, "step": 155600 }, { "epoch": 0.5922900664570694, "grad_norm": 0.12665249407291412, "learning_rate": 0.0005, "loss": 2.1149, "step": 155610 }, { "epoch": 0.592328128925192, "grad_norm": 0.12019842863082886, "learning_rate": 0.0005, "loss": 2.1064, "step": 155620 }, { "epoch": 0.5923661913933147, "grad_norm": 0.12676399946212769, "learning_rate": 0.0005, "loss": 2.1205, "step": 155630 }, { "epoch": 0.5924042538614374, "grad_norm": 0.13679906725883484, "learning_rate": 0.0005, "loss": 2.1163, "step": 155640 }, { "epoch": 0.5924423163295601, "grad_norm": 0.12272468209266663, "learning_rate": 0.0005, "loss": 2.1032, "step": 155650 }, { "epoch": 0.5924803787976828, "grad_norm": 0.11839216947555542, "learning_rate": 0.0005, "loss": 2.0965, "step": 155660 }, { "epoch": 0.5925184412658054, "grad_norm": 0.1262093186378479, "learning_rate": 0.0005, "loss": 2.1084, "step": 155670 }, { "epoch": 0.5925565037339281, "grad_norm": 0.1315896213054657, "learning_rate": 0.0005, "loss": 2.1209, "step": 155680 }, { "epoch": 0.5925945662020508, "grad_norm": 0.12056088447570801, "learning_rate": 0.0005, "loss": 2.1158, "step": 155690 }, { "epoch": 0.5926326286701735, "grad_norm": 0.13733994960784912, "learning_rate": 0.0005, "loss": 2.1021, "step": 155700 }, { "epoch": 0.5926706911382962, "grad_norm": 0.11890329420566559, "learning_rate": 0.0005, "loss": 2.1081, "step": 155710 }, { "epoch": 0.5927087536064188, "grad_norm": 0.13701745867729187, "learning_rate": 0.0005, "loss": 2.1263, "step": 155720 }, { "epoch": 0.5927468160745415, "grad_norm": 0.130995512008667, "learning_rate": 0.0005, "loss": 2.1071, "step": 155730 }, { "epoch": 0.5927848785426643, "grad_norm": 0.12714290618896484, "learning_rate": 0.0005, "loss": 2.1201, "step": 155740 }, { "epoch": 0.5928229410107869, "grad_norm": 0.12613579630851746, "learning_rate": 0.0005, "loss": 2.1062, "step": 155750 }, { "epoch": 0.5928610034789096, "grad_norm": 0.12419883906841278, "learning_rate": 0.0005, "loss": 2.0857, "step": 155760 }, { "epoch": 0.5928990659470322, "grad_norm": 0.13133420050144196, "learning_rate": 0.0005, "loss": 2.1139, "step": 155770 }, { "epoch": 0.592937128415155, "grad_norm": 0.1103355661034584, "learning_rate": 0.0005, "loss": 2.1191, "step": 155780 }, { "epoch": 0.5929751908832777, "grad_norm": 0.12126592546701431, "learning_rate": 0.0005, "loss": 2.1136, "step": 155790 }, { "epoch": 0.5930132533514003, "grad_norm": 0.12847162783145905, "learning_rate": 0.0005, "loss": 2.0935, "step": 155800 }, { "epoch": 0.593051315819523, "grad_norm": 0.12895318865776062, "learning_rate": 0.0005, "loss": 2.1104, "step": 155810 }, { "epoch": 0.5930893782876456, "grad_norm": 0.12282812595367432, "learning_rate": 0.0005, "loss": 2.1092, "step": 155820 }, { "epoch": 0.5931274407557684, "grad_norm": 0.11810677498579025, "learning_rate": 0.0005, "loss": 2.1077, "step": 155830 }, { "epoch": 0.593165503223891, "grad_norm": 0.12165694683790207, "learning_rate": 0.0005, "loss": 2.1037, "step": 155840 }, { "epoch": 0.5932035656920137, "grad_norm": 0.1188536211848259, "learning_rate": 0.0005, "loss": 2.1127, "step": 155850 }, { "epoch": 0.5932416281601364, "grad_norm": 0.11744207888841629, "learning_rate": 0.0005, "loss": 2.1216, "step": 155860 }, { "epoch": 0.5932796906282591, "grad_norm": 0.11881184577941895, "learning_rate": 0.0005, "loss": 2.1005, "step": 155870 }, { "epoch": 0.5933177530963818, "grad_norm": 0.1358705759048462, "learning_rate": 0.0005, "loss": 2.1143, "step": 155880 }, { "epoch": 0.5933558155645045, "grad_norm": 0.12554964423179626, "learning_rate": 0.0005, "loss": 2.089, "step": 155890 }, { "epoch": 0.5933938780326271, "grad_norm": 0.13144125044345856, "learning_rate": 0.0005, "loss": 2.1043, "step": 155900 }, { "epoch": 0.5934319405007499, "grad_norm": 0.12426116317510605, "learning_rate": 0.0005, "loss": 2.1121, "step": 155910 }, { "epoch": 0.5934700029688725, "grad_norm": 0.12069100141525269, "learning_rate": 0.0005, "loss": 2.094, "step": 155920 }, { "epoch": 0.5935080654369952, "grad_norm": 0.12368258088827133, "learning_rate": 0.0005, "loss": 2.1172, "step": 155930 }, { "epoch": 0.5935461279051178, "grad_norm": 0.1293889284133911, "learning_rate": 0.0005, "loss": 2.1104, "step": 155940 }, { "epoch": 0.5935841903732406, "grad_norm": 0.12078117579221725, "learning_rate": 0.0005, "loss": 2.1166, "step": 155950 }, { "epoch": 0.5936222528413633, "grad_norm": 0.12822595238685608, "learning_rate": 0.0005, "loss": 2.1016, "step": 155960 }, { "epoch": 0.5936603153094859, "grad_norm": 0.12021885067224503, "learning_rate": 0.0005, "loss": 2.0914, "step": 155970 }, { "epoch": 0.5936983777776086, "grad_norm": 0.12757346034049988, "learning_rate": 0.0005, "loss": 2.1049, "step": 155980 }, { "epoch": 0.5937364402457312, "grad_norm": 0.12365161627531052, "learning_rate": 0.0005, "loss": 2.1264, "step": 155990 }, { "epoch": 0.593774502713854, "grad_norm": 0.13567115366458893, "learning_rate": 0.0005, "loss": 2.1169, "step": 156000 }, { "epoch": 0.5938125651819767, "grad_norm": 0.13355757296085358, "learning_rate": 0.0005, "loss": 2.12, "step": 156010 }, { "epoch": 0.5938506276500993, "grad_norm": 0.1318034529685974, "learning_rate": 0.0005, "loss": 2.1073, "step": 156020 }, { "epoch": 0.593888690118222, "grad_norm": 0.11863218992948532, "learning_rate": 0.0005, "loss": 2.1074, "step": 156030 }, { "epoch": 0.5939267525863448, "grad_norm": 0.13734151422977448, "learning_rate": 0.0005, "loss": 2.1067, "step": 156040 }, { "epoch": 0.5939648150544674, "grad_norm": 0.12175113707780838, "learning_rate": 0.0005, "loss": 2.1148, "step": 156050 }, { "epoch": 0.5940028775225901, "grad_norm": 0.12426832318305969, "learning_rate": 0.0005, "loss": 2.1151, "step": 156060 }, { "epoch": 0.5940409399907127, "grad_norm": 0.1340668648481369, "learning_rate": 0.0005, "loss": 2.1094, "step": 156070 }, { "epoch": 0.5940790024588355, "grad_norm": 0.12402141094207764, "learning_rate": 0.0005, "loss": 2.107, "step": 156080 }, { "epoch": 0.5941170649269581, "grad_norm": 0.12372589111328125, "learning_rate": 0.0005, "loss": 2.0952, "step": 156090 }, { "epoch": 0.5941551273950808, "grad_norm": 0.11944245547056198, "learning_rate": 0.0005, "loss": 2.1106, "step": 156100 }, { "epoch": 0.5941931898632035, "grad_norm": 0.12028376013040543, "learning_rate": 0.0005, "loss": 2.1242, "step": 156110 }, { "epoch": 0.5942312523313261, "grad_norm": 0.12873254716396332, "learning_rate": 0.0005, "loss": 2.1107, "step": 156120 }, { "epoch": 0.5942693147994489, "grad_norm": 0.1547442525625229, "learning_rate": 0.0005, "loss": 2.1226, "step": 156130 }, { "epoch": 0.5943073772675715, "grad_norm": 0.1363360434770584, "learning_rate": 0.0005, "loss": 2.116, "step": 156140 }, { "epoch": 0.5943454397356942, "grad_norm": 0.11733988672494888, "learning_rate": 0.0005, "loss": 2.0964, "step": 156150 }, { "epoch": 0.5943835022038169, "grad_norm": 0.11870251595973969, "learning_rate": 0.0005, "loss": 2.1161, "step": 156160 }, { "epoch": 0.5944215646719396, "grad_norm": 0.12539437413215637, "learning_rate": 0.0005, "loss": 2.109, "step": 156170 }, { "epoch": 0.5944596271400623, "grad_norm": 0.11921115219593048, "learning_rate": 0.0005, "loss": 2.1033, "step": 156180 }, { "epoch": 0.5944976896081849, "grad_norm": 0.12341469526290894, "learning_rate": 0.0005, "loss": 2.1276, "step": 156190 }, { "epoch": 0.5945357520763076, "grad_norm": 0.12102333456277847, "learning_rate": 0.0005, "loss": 2.1088, "step": 156200 }, { "epoch": 0.5945738145444304, "grad_norm": 0.1152803897857666, "learning_rate": 0.0005, "loss": 2.1114, "step": 156210 }, { "epoch": 0.594611877012553, "grad_norm": 0.13594534993171692, "learning_rate": 0.0005, "loss": 2.1023, "step": 156220 }, { "epoch": 0.5946499394806757, "grad_norm": 0.11999261379241943, "learning_rate": 0.0005, "loss": 2.1124, "step": 156230 }, { "epoch": 0.5946880019487983, "grad_norm": 0.1253129243850708, "learning_rate": 0.0005, "loss": 2.1087, "step": 156240 }, { "epoch": 0.594726064416921, "grad_norm": 0.11831702291965485, "learning_rate": 0.0005, "loss": 2.1111, "step": 156250 }, { "epoch": 0.5947641268850438, "grad_norm": 0.13602091372013092, "learning_rate": 0.0005, "loss": 2.1018, "step": 156260 }, { "epoch": 0.5948021893531664, "grad_norm": 0.12433513253927231, "learning_rate": 0.0005, "loss": 2.103, "step": 156270 }, { "epoch": 0.5948402518212891, "grad_norm": 0.11340433359146118, "learning_rate": 0.0005, "loss": 2.1079, "step": 156280 }, { "epoch": 0.5948783142894117, "grad_norm": 0.11828036606311798, "learning_rate": 0.0005, "loss": 2.1196, "step": 156290 }, { "epoch": 0.5949163767575345, "grad_norm": 0.11860781162977219, "learning_rate": 0.0005, "loss": 2.1034, "step": 156300 }, { "epoch": 0.5949544392256572, "grad_norm": 0.12515273690223694, "learning_rate": 0.0005, "loss": 2.11, "step": 156310 }, { "epoch": 0.5949925016937798, "grad_norm": 0.12391990423202515, "learning_rate": 0.0005, "loss": 2.109, "step": 156320 }, { "epoch": 0.5950305641619025, "grad_norm": 0.13401736319065094, "learning_rate": 0.0005, "loss": 2.1083, "step": 156330 }, { "epoch": 0.5950686266300252, "grad_norm": 0.12803804874420166, "learning_rate": 0.0005, "loss": 2.1164, "step": 156340 }, { "epoch": 0.5951066890981479, "grad_norm": 0.12634626030921936, "learning_rate": 0.0005, "loss": 2.1035, "step": 156350 }, { "epoch": 0.5951447515662706, "grad_norm": 0.1226266622543335, "learning_rate": 0.0005, "loss": 2.1122, "step": 156360 }, { "epoch": 0.5951828140343932, "grad_norm": 0.1310350000858307, "learning_rate": 0.0005, "loss": 2.1058, "step": 156370 }, { "epoch": 0.595220876502516, "grad_norm": 0.11200859397649765, "learning_rate": 0.0005, "loss": 2.1191, "step": 156380 }, { "epoch": 0.5952589389706386, "grad_norm": 0.1256367266178131, "learning_rate": 0.0005, "loss": 2.1308, "step": 156390 }, { "epoch": 0.5952970014387613, "grad_norm": 0.11647086590528488, "learning_rate": 0.0005, "loss": 2.1095, "step": 156400 }, { "epoch": 0.595335063906884, "grad_norm": 0.11876901984214783, "learning_rate": 0.0005, "loss": 2.1055, "step": 156410 }, { "epoch": 0.5953731263750066, "grad_norm": 0.12916727364063263, "learning_rate": 0.0005, "loss": 2.1178, "step": 156420 }, { "epoch": 0.5954111888431294, "grad_norm": 0.12823139131069183, "learning_rate": 0.0005, "loss": 2.1032, "step": 156430 }, { "epoch": 0.595449251311252, "grad_norm": 0.13512112200260162, "learning_rate": 0.0005, "loss": 2.1107, "step": 156440 }, { "epoch": 0.5954873137793747, "grad_norm": 0.12843908369541168, "learning_rate": 0.0005, "loss": 2.0912, "step": 156450 }, { "epoch": 0.5955253762474974, "grad_norm": 0.12319228798151016, "learning_rate": 0.0005, "loss": 2.1146, "step": 156460 }, { "epoch": 0.5955634387156201, "grad_norm": 0.13872624933719635, "learning_rate": 0.0005, "loss": 2.1192, "step": 156470 }, { "epoch": 0.5956015011837428, "grad_norm": 0.12136664241552353, "learning_rate": 0.0005, "loss": 2.1098, "step": 156480 }, { "epoch": 0.5956395636518654, "grad_norm": 0.12200096249580383, "learning_rate": 0.0005, "loss": 2.1157, "step": 156490 }, { "epoch": 0.5956776261199881, "grad_norm": 0.12467196583747864, "learning_rate": 0.0005, "loss": 2.1296, "step": 156500 }, { "epoch": 0.5957156885881109, "grad_norm": 0.11812784522771835, "learning_rate": 0.0005, "loss": 2.1169, "step": 156510 }, { "epoch": 0.5957537510562335, "grad_norm": 0.12433940917253494, "learning_rate": 0.0005, "loss": 2.124, "step": 156520 }, { "epoch": 0.5957918135243562, "grad_norm": 0.11930729448795319, "learning_rate": 0.0005, "loss": 2.1166, "step": 156530 }, { "epoch": 0.5958298759924788, "grad_norm": 0.12910746037960052, "learning_rate": 0.0005, "loss": 2.109, "step": 156540 }, { "epoch": 0.5958679384606015, "grad_norm": 0.12703043222427368, "learning_rate": 0.0005, "loss": 2.1013, "step": 156550 }, { "epoch": 0.5959060009287243, "grad_norm": 0.12971876561641693, "learning_rate": 0.0005, "loss": 2.12, "step": 156560 }, { "epoch": 0.5959440633968469, "grad_norm": 0.12978804111480713, "learning_rate": 0.0005, "loss": 2.1142, "step": 156570 }, { "epoch": 0.5959821258649696, "grad_norm": 0.1250307708978653, "learning_rate": 0.0005, "loss": 2.1128, "step": 156580 }, { "epoch": 0.5960201883330922, "grad_norm": 0.12306489050388336, "learning_rate": 0.0005, "loss": 2.0998, "step": 156590 }, { "epoch": 0.596058250801215, "grad_norm": 0.12678273022174835, "learning_rate": 0.0005, "loss": 2.1096, "step": 156600 }, { "epoch": 0.5960963132693377, "grad_norm": 0.1361202746629715, "learning_rate": 0.0005, "loss": 2.1128, "step": 156610 }, { "epoch": 0.5961343757374603, "grad_norm": 0.1354992687702179, "learning_rate": 0.0005, "loss": 2.1059, "step": 156620 }, { "epoch": 0.596172438205583, "grad_norm": 0.11919796466827393, "learning_rate": 0.0005, "loss": 2.116, "step": 156630 }, { "epoch": 0.5962105006737057, "grad_norm": 0.12351883947849274, "learning_rate": 0.0005, "loss": 2.1155, "step": 156640 }, { "epoch": 0.5962485631418284, "grad_norm": 0.1216733455657959, "learning_rate": 0.0005, "loss": 2.1044, "step": 156650 }, { "epoch": 0.596286625609951, "grad_norm": 0.11714150756597519, "learning_rate": 0.0005, "loss": 2.1072, "step": 156660 }, { "epoch": 0.5963246880780737, "grad_norm": 0.11788475513458252, "learning_rate": 0.0005, "loss": 2.1117, "step": 156670 }, { "epoch": 0.5963627505461964, "grad_norm": 0.1324845850467682, "learning_rate": 0.0005, "loss": 2.1071, "step": 156680 }, { "epoch": 0.5964008130143191, "grad_norm": 0.16658127307891846, "learning_rate": 0.0005, "loss": 2.115, "step": 156690 }, { "epoch": 0.5964388754824418, "grad_norm": 0.1240057572722435, "learning_rate": 0.0005, "loss": 2.1113, "step": 156700 }, { "epoch": 0.5964769379505644, "grad_norm": 0.12588045001029968, "learning_rate": 0.0005, "loss": 2.1047, "step": 156710 }, { "epoch": 0.5965150004186871, "grad_norm": 0.12813587486743927, "learning_rate": 0.0005, "loss": 2.1136, "step": 156720 }, { "epoch": 0.5965530628868099, "grad_norm": 0.12198911607265472, "learning_rate": 0.0005, "loss": 2.1182, "step": 156730 }, { "epoch": 0.5965911253549325, "grad_norm": 0.13085797429084778, "learning_rate": 0.0005, "loss": 2.1214, "step": 156740 }, { "epoch": 0.5966291878230552, "grad_norm": 0.12170829623937607, "learning_rate": 0.0005, "loss": 2.1023, "step": 156750 }, { "epoch": 0.5966672502911778, "grad_norm": 0.1213245689868927, "learning_rate": 0.0005, "loss": 2.1117, "step": 156760 }, { "epoch": 0.5967053127593006, "grad_norm": 0.11355091631412506, "learning_rate": 0.0005, "loss": 2.1216, "step": 156770 }, { "epoch": 0.5967433752274233, "grad_norm": 0.12887951731681824, "learning_rate": 0.0005, "loss": 2.1116, "step": 156780 }, { "epoch": 0.5967814376955459, "grad_norm": 0.12898492813110352, "learning_rate": 0.0005, "loss": 2.1114, "step": 156790 }, { "epoch": 0.5968195001636686, "grad_norm": 0.1291269212961197, "learning_rate": 0.0005, "loss": 2.1092, "step": 156800 }, { "epoch": 0.5968575626317913, "grad_norm": 0.11913888901472092, "learning_rate": 0.0005, "loss": 2.117, "step": 156810 }, { "epoch": 0.596895625099914, "grad_norm": 0.11804598569869995, "learning_rate": 0.0005, "loss": 2.1128, "step": 156820 }, { "epoch": 0.5969336875680367, "grad_norm": 0.11910227686166763, "learning_rate": 0.0005, "loss": 2.1121, "step": 156830 }, { "epoch": 0.5969717500361593, "grad_norm": 0.12902553379535675, "learning_rate": 0.0005, "loss": 2.1144, "step": 156840 }, { "epoch": 0.597009812504282, "grad_norm": 0.13280050456523895, "learning_rate": 0.0005, "loss": 2.1066, "step": 156850 }, { "epoch": 0.5970478749724047, "grad_norm": 0.14002548158168793, "learning_rate": 0.0005, "loss": 2.1086, "step": 156860 }, { "epoch": 0.5970859374405274, "grad_norm": 0.11478378623723984, "learning_rate": 0.0005, "loss": 2.1098, "step": 156870 }, { "epoch": 0.5971239999086501, "grad_norm": 0.129014253616333, "learning_rate": 0.0005, "loss": 2.1396, "step": 156880 }, { "epoch": 0.5971620623767727, "grad_norm": 0.12656255066394806, "learning_rate": 0.0005, "loss": 2.1327, "step": 156890 }, { "epoch": 0.5972001248448955, "grad_norm": 0.14026038348674774, "learning_rate": 0.0005, "loss": 2.0998, "step": 156900 }, { "epoch": 0.5972381873130181, "grad_norm": 0.11731848120689392, "learning_rate": 0.0005, "loss": 2.1113, "step": 156910 }, { "epoch": 0.5972762497811408, "grad_norm": 0.12975703179836273, "learning_rate": 0.0005, "loss": 2.1127, "step": 156920 }, { "epoch": 0.5973143122492635, "grad_norm": 0.1149299144744873, "learning_rate": 0.0005, "loss": 2.114, "step": 156930 }, { "epoch": 0.5973523747173862, "grad_norm": 0.12866534292697906, "learning_rate": 0.0005, "loss": 2.1184, "step": 156940 }, { "epoch": 0.5973904371855089, "grad_norm": 0.12251448631286621, "learning_rate": 0.0005, "loss": 2.1049, "step": 156950 }, { "epoch": 0.5974284996536315, "grad_norm": 0.13120411336421967, "learning_rate": 0.0005, "loss": 2.1066, "step": 156960 }, { "epoch": 0.5974665621217542, "grad_norm": 0.1309138685464859, "learning_rate": 0.0005, "loss": 2.1113, "step": 156970 }, { "epoch": 0.5975046245898769, "grad_norm": 0.12745153903961182, "learning_rate": 0.0005, "loss": 2.1249, "step": 156980 }, { "epoch": 0.5975426870579996, "grad_norm": 0.1209530383348465, "learning_rate": 0.0005, "loss": 2.1114, "step": 156990 }, { "epoch": 0.5975807495261223, "grad_norm": 0.13832753896713257, "learning_rate": 0.0005, "loss": 2.1178, "step": 157000 }, { "epoch": 0.5976188119942449, "grad_norm": 0.12662719190120697, "learning_rate": 0.0005, "loss": 2.1153, "step": 157010 }, { "epoch": 0.5976568744623676, "grad_norm": 0.1310693472623825, "learning_rate": 0.0005, "loss": 2.1132, "step": 157020 }, { "epoch": 0.5976949369304904, "grad_norm": 0.12905949354171753, "learning_rate": 0.0005, "loss": 2.1137, "step": 157030 }, { "epoch": 0.597732999398613, "grad_norm": 0.12887629866600037, "learning_rate": 0.0005, "loss": 2.1096, "step": 157040 }, { "epoch": 0.5977710618667357, "grad_norm": 0.14822103083133698, "learning_rate": 0.0005, "loss": 2.1141, "step": 157050 }, { "epoch": 0.5978091243348583, "grad_norm": 0.142548605799675, "learning_rate": 0.0005, "loss": 2.0982, "step": 157060 }, { "epoch": 0.5978471868029811, "grad_norm": 0.13723008334636688, "learning_rate": 0.0005, "loss": 2.1115, "step": 157070 }, { "epoch": 0.5978852492711038, "grad_norm": 0.11937706917524338, "learning_rate": 0.0005, "loss": 2.1075, "step": 157080 }, { "epoch": 0.5979233117392264, "grad_norm": 0.12686049938201904, "learning_rate": 0.0005, "loss": 2.0989, "step": 157090 }, { "epoch": 0.5979613742073491, "grad_norm": 0.12511029839515686, "learning_rate": 0.0005, "loss": 2.1155, "step": 157100 }, { "epoch": 0.5979994366754717, "grad_norm": 0.13167688250541687, "learning_rate": 0.0005, "loss": 2.1132, "step": 157110 }, { "epoch": 0.5980374991435945, "grad_norm": 0.11056148260831833, "learning_rate": 0.0005, "loss": 2.1077, "step": 157120 }, { "epoch": 0.5980755616117172, "grad_norm": 0.13581658899784088, "learning_rate": 0.0005, "loss": 2.1147, "step": 157130 }, { "epoch": 0.5981136240798398, "grad_norm": 0.11735519021749496, "learning_rate": 0.0005, "loss": 2.1103, "step": 157140 }, { "epoch": 0.5981516865479625, "grad_norm": 0.11120060831308365, "learning_rate": 0.0005, "loss": 2.1234, "step": 157150 }, { "epoch": 0.5981897490160852, "grad_norm": 0.122090183198452, "learning_rate": 0.0005, "loss": 2.1093, "step": 157160 }, { "epoch": 0.5982278114842079, "grad_norm": 0.1253940910100937, "learning_rate": 0.0005, "loss": 2.1131, "step": 157170 }, { "epoch": 0.5982658739523306, "grad_norm": 0.13477633893489838, "learning_rate": 0.0005, "loss": 2.1209, "step": 157180 }, { "epoch": 0.5983039364204532, "grad_norm": 0.1272205114364624, "learning_rate": 0.0005, "loss": 2.111, "step": 157190 }, { "epoch": 0.598341998888576, "grad_norm": 0.12778523564338684, "learning_rate": 0.0005, "loss": 2.1172, "step": 157200 }, { "epoch": 0.5983800613566986, "grad_norm": 0.1212928295135498, "learning_rate": 0.0005, "loss": 2.1172, "step": 157210 }, { "epoch": 0.5984181238248213, "grad_norm": 0.12366236001253128, "learning_rate": 0.0005, "loss": 2.105, "step": 157220 }, { "epoch": 0.598456186292944, "grad_norm": 0.12961052358150482, "learning_rate": 0.0005, "loss": 2.0999, "step": 157230 }, { "epoch": 0.5984942487610667, "grad_norm": 0.12433735281229019, "learning_rate": 0.0005, "loss": 2.1227, "step": 157240 }, { "epoch": 0.5985323112291894, "grad_norm": 0.12253043800592422, "learning_rate": 0.0005, "loss": 2.1117, "step": 157250 }, { "epoch": 0.598570373697312, "grad_norm": 0.11693271994590759, "learning_rate": 0.0005, "loss": 2.114, "step": 157260 }, { "epoch": 0.5986084361654347, "grad_norm": 0.11834903806447983, "learning_rate": 0.0005, "loss": 2.1158, "step": 157270 }, { "epoch": 0.5986464986335573, "grad_norm": 0.12358506768941879, "learning_rate": 0.0005, "loss": 2.1113, "step": 157280 }, { "epoch": 0.5986845611016801, "grad_norm": 0.12689338624477386, "learning_rate": 0.0005, "loss": 2.0958, "step": 157290 }, { "epoch": 0.5987226235698028, "grad_norm": 0.12762139737606049, "learning_rate": 0.0005, "loss": 2.1149, "step": 157300 }, { "epoch": 0.5987606860379254, "grad_norm": 0.12027955055236816, "learning_rate": 0.0005, "loss": 2.1128, "step": 157310 }, { "epoch": 0.5987987485060481, "grad_norm": 0.13357017934322357, "learning_rate": 0.0005, "loss": 2.1037, "step": 157320 }, { "epoch": 0.5988368109741709, "grad_norm": 0.13205654919147491, "learning_rate": 0.0005, "loss": 2.0932, "step": 157330 }, { "epoch": 0.5988748734422935, "grad_norm": 0.12938706576824188, "learning_rate": 0.0005, "loss": 2.0985, "step": 157340 }, { "epoch": 0.5989129359104162, "grad_norm": 0.11992862075567245, "learning_rate": 0.0005, "loss": 2.1179, "step": 157350 }, { "epoch": 0.5989509983785388, "grad_norm": 0.12154851108789444, "learning_rate": 0.0005, "loss": 2.1085, "step": 157360 }, { "epoch": 0.5989890608466616, "grad_norm": 0.1233595684170723, "learning_rate": 0.0005, "loss": 2.1057, "step": 157370 }, { "epoch": 0.5990271233147842, "grad_norm": 0.12630879878997803, "learning_rate": 0.0005, "loss": 2.112, "step": 157380 }, { "epoch": 0.5990651857829069, "grad_norm": 0.12620669603347778, "learning_rate": 0.0005, "loss": 2.1059, "step": 157390 }, { "epoch": 0.5991032482510296, "grad_norm": 0.1237010583281517, "learning_rate": 0.0005, "loss": 2.1081, "step": 157400 }, { "epoch": 0.5991413107191522, "grad_norm": 0.12899191677570343, "learning_rate": 0.0005, "loss": 2.1171, "step": 157410 }, { "epoch": 0.599179373187275, "grad_norm": 0.13100019097328186, "learning_rate": 0.0005, "loss": 2.1062, "step": 157420 }, { "epoch": 0.5992174356553976, "grad_norm": 0.12148187309503555, "learning_rate": 0.0005, "loss": 2.118, "step": 157430 }, { "epoch": 0.5992554981235203, "grad_norm": 0.12283346056938171, "learning_rate": 0.0005, "loss": 2.1136, "step": 157440 }, { "epoch": 0.599293560591643, "grad_norm": 0.1465480476617813, "learning_rate": 0.0005, "loss": 2.1074, "step": 157450 }, { "epoch": 0.5993316230597657, "grad_norm": 0.11719803512096405, "learning_rate": 0.0005, "loss": 2.1042, "step": 157460 }, { "epoch": 0.5993696855278884, "grad_norm": 0.12916360795497894, "learning_rate": 0.0005, "loss": 2.1206, "step": 157470 }, { "epoch": 0.599407747996011, "grad_norm": 0.1229061484336853, "learning_rate": 0.0005, "loss": 2.114, "step": 157480 }, { "epoch": 0.5994458104641337, "grad_norm": 0.1390194296836853, "learning_rate": 0.0005, "loss": 2.1084, "step": 157490 }, { "epoch": 0.5994838729322565, "grad_norm": 0.12508492171764374, "learning_rate": 0.0005, "loss": 2.1187, "step": 157500 }, { "epoch": 0.5995219354003791, "grad_norm": 0.11759859323501587, "learning_rate": 0.0005, "loss": 2.1138, "step": 157510 }, { "epoch": 0.5995599978685018, "grad_norm": 0.12426968663930893, "learning_rate": 0.0005, "loss": 2.1131, "step": 157520 }, { "epoch": 0.5995980603366244, "grad_norm": 0.11593806743621826, "learning_rate": 0.0005, "loss": 2.1148, "step": 157530 }, { "epoch": 0.5996361228047471, "grad_norm": 0.12494784593582153, "learning_rate": 0.0005, "loss": 2.118, "step": 157540 }, { "epoch": 0.5996741852728699, "grad_norm": 0.12596601247787476, "learning_rate": 0.0005, "loss": 2.1149, "step": 157550 }, { "epoch": 0.5997122477409925, "grad_norm": 0.12842027842998505, "learning_rate": 0.0005, "loss": 2.1131, "step": 157560 }, { "epoch": 0.5997503102091152, "grad_norm": 0.13903789222240448, "learning_rate": 0.0005, "loss": 2.1352, "step": 157570 }, { "epoch": 0.5997883726772378, "grad_norm": 0.11448253691196442, "learning_rate": 0.0005, "loss": 2.118, "step": 157580 }, { "epoch": 0.5998264351453606, "grad_norm": 0.12419616430997849, "learning_rate": 0.0005, "loss": 2.1171, "step": 157590 }, { "epoch": 0.5998644976134833, "grad_norm": 0.12554436922073364, "learning_rate": 0.0005, "loss": 2.1099, "step": 157600 }, { "epoch": 0.5999025600816059, "grad_norm": 0.12357697635889053, "learning_rate": 0.0005, "loss": 2.1089, "step": 157610 }, { "epoch": 0.5999406225497286, "grad_norm": 0.12808255851268768, "learning_rate": 0.0005, "loss": 2.0929, "step": 157620 }, { "epoch": 0.5999786850178513, "grad_norm": 0.13314425945281982, "learning_rate": 0.0005, "loss": 2.1226, "step": 157630 }, { "epoch": 0.600016747485974, "grad_norm": 0.13815081119537354, "learning_rate": 0.0005, "loss": 2.1224, "step": 157640 }, { "epoch": 0.6000548099540967, "grad_norm": 0.14097647368907928, "learning_rate": 0.0005, "loss": 2.1285, "step": 157650 }, { "epoch": 0.6000928724222193, "grad_norm": 0.12406017631292343, "learning_rate": 0.0005, "loss": 2.1065, "step": 157660 }, { "epoch": 0.6001309348903421, "grad_norm": 0.1178424134850502, "learning_rate": 0.0005, "loss": 2.1119, "step": 157670 }, { "epoch": 0.6001689973584647, "grad_norm": 0.12351218611001968, "learning_rate": 0.0005, "loss": 2.1148, "step": 157680 }, { "epoch": 0.6002070598265874, "grad_norm": 0.12011931836605072, "learning_rate": 0.0005, "loss": 2.1061, "step": 157690 }, { "epoch": 0.60024512229471, "grad_norm": 0.1278388351202011, "learning_rate": 0.0005, "loss": 2.1, "step": 157700 }, { "epoch": 0.6002831847628327, "grad_norm": 0.12576743960380554, "learning_rate": 0.0005, "loss": 2.1084, "step": 157710 }, { "epoch": 0.6003212472309555, "grad_norm": 0.14748530089855194, "learning_rate": 0.0005, "loss": 2.1193, "step": 157720 }, { "epoch": 0.6003593096990781, "grad_norm": 0.12966130673885345, "learning_rate": 0.0005, "loss": 2.1184, "step": 157730 }, { "epoch": 0.6003973721672008, "grad_norm": 0.1358342468738556, "learning_rate": 0.0005, "loss": 2.1064, "step": 157740 }, { "epoch": 0.6004354346353235, "grad_norm": 0.11560594290494919, "learning_rate": 0.0005, "loss": 2.0952, "step": 157750 }, { "epoch": 0.6004734971034462, "grad_norm": 0.1258758008480072, "learning_rate": 0.0005, "loss": 2.1087, "step": 157760 }, { "epoch": 0.6005115595715689, "grad_norm": 0.12026939541101456, "learning_rate": 0.0005, "loss": 2.1319, "step": 157770 }, { "epoch": 0.6005496220396915, "grad_norm": 0.12306264787912369, "learning_rate": 0.0005, "loss": 2.1139, "step": 157780 }, { "epoch": 0.6005876845078142, "grad_norm": 0.13696016371250153, "learning_rate": 0.0005, "loss": 2.1197, "step": 157790 }, { "epoch": 0.600625746975937, "grad_norm": 0.12249837815761566, "learning_rate": 0.0005, "loss": 2.1112, "step": 157800 }, { "epoch": 0.6006638094440596, "grad_norm": 0.13254183530807495, "learning_rate": 0.0005, "loss": 2.1137, "step": 157810 }, { "epoch": 0.6007018719121823, "grad_norm": 0.1298949271440506, "learning_rate": 0.0005, "loss": 2.1164, "step": 157820 }, { "epoch": 0.6007399343803049, "grad_norm": 0.1328222155570984, "learning_rate": 0.0005, "loss": 2.1081, "step": 157830 }, { "epoch": 0.6007779968484276, "grad_norm": 0.12573933601379395, "learning_rate": 0.0005, "loss": 2.1005, "step": 157840 }, { "epoch": 0.6008160593165504, "grad_norm": 0.14559721946716309, "learning_rate": 0.0005, "loss": 2.124, "step": 157850 }, { "epoch": 0.600854121784673, "grad_norm": 0.13562941551208496, "learning_rate": 0.0005, "loss": 2.0963, "step": 157860 }, { "epoch": 0.6008921842527957, "grad_norm": 0.12265808880329132, "learning_rate": 0.0005, "loss": 2.1058, "step": 157870 }, { "epoch": 0.6009302467209183, "grad_norm": 0.1416488140821457, "learning_rate": 0.0005, "loss": 2.1189, "step": 157880 }, { "epoch": 0.6009683091890411, "grad_norm": 0.126577690243721, "learning_rate": 0.0005, "loss": 2.1098, "step": 157890 }, { "epoch": 0.6010063716571638, "grad_norm": 0.13580593466758728, "learning_rate": 0.0005, "loss": 2.095, "step": 157900 }, { "epoch": 0.6010444341252864, "grad_norm": 0.1211516261100769, "learning_rate": 0.0005, "loss": 2.1072, "step": 157910 }, { "epoch": 0.6010824965934091, "grad_norm": 0.13828988373279572, "learning_rate": 0.0005, "loss": 2.1101, "step": 157920 }, { "epoch": 0.6011205590615318, "grad_norm": 0.11835107952356339, "learning_rate": 0.0005, "loss": 2.1156, "step": 157930 }, { "epoch": 0.6011586215296545, "grad_norm": 0.11946625262498856, "learning_rate": 0.0005, "loss": 2.105, "step": 157940 }, { "epoch": 0.6011966839977771, "grad_norm": 0.12866944074630737, "learning_rate": 0.0005, "loss": 2.1167, "step": 157950 }, { "epoch": 0.6012347464658998, "grad_norm": 0.12162616103887558, "learning_rate": 0.0005, "loss": 2.1188, "step": 157960 }, { "epoch": 0.6012728089340225, "grad_norm": 0.1159362643957138, "learning_rate": 0.0005, "loss": 2.1358, "step": 157970 }, { "epoch": 0.6013108714021452, "grad_norm": 0.11833919584751129, "learning_rate": 0.0005, "loss": 2.106, "step": 157980 }, { "epoch": 0.6013489338702679, "grad_norm": 0.18633560836315155, "learning_rate": 0.0005, "loss": 2.1184, "step": 157990 }, { "epoch": 0.6013869963383905, "grad_norm": 0.12414807081222534, "learning_rate": 0.0005, "loss": 2.1068, "step": 158000 }, { "epoch": 0.6014250588065132, "grad_norm": 0.12252053618431091, "learning_rate": 0.0005, "loss": 2.1004, "step": 158010 }, { "epoch": 0.601463121274636, "grad_norm": 0.11328023672103882, "learning_rate": 0.0005, "loss": 2.114, "step": 158020 }, { "epoch": 0.6015011837427586, "grad_norm": 0.12160588800907135, "learning_rate": 0.0005, "loss": 2.1228, "step": 158030 }, { "epoch": 0.6015392462108813, "grad_norm": 0.15667229890823364, "learning_rate": 0.0005, "loss": 2.1059, "step": 158040 }, { "epoch": 0.6015773086790039, "grad_norm": 0.12987153232097626, "learning_rate": 0.0005, "loss": 2.1159, "step": 158050 }, { "epoch": 0.6016153711471267, "grad_norm": 0.13119001686573029, "learning_rate": 0.0005, "loss": 2.1181, "step": 158060 }, { "epoch": 0.6016534336152494, "grad_norm": 0.12040984630584717, "learning_rate": 0.0005, "loss": 2.1181, "step": 158070 }, { "epoch": 0.601691496083372, "grad_norm": 0.11469858884811401, "learning_rate": 0.0005, "loss": 2.1266, "step": 158080 }, { "epoch": 0.6017295585514947, "grad_norm": 0.1116182953119278, "learning_rate": 0.0005, "loss": 2.1147, "step": 158090 }, { "epoch": 0.6017676210196174, "grad_norm": 0.1325918287038803, "learning_rate": 0.0005, "loss": 2.1309, "step": 158100 }, { "epoch": 0.6018056834877401, "grad_norm": 0.11638123542070389, "learning_rate": 0.0005, "loss": 2.1151, "step": 158110 }, { "epoch": 0.6018437459558628, "grad_norm": 0.12299565225839615, "learning_rate": 0.0005, "loss": 2.1024, "step": 158120 }, { "epoch": 0.6018818084239854, "grad_norm": 0.1290263831615448, "learning_rate": 0.0005, "loss": 2.1144, "step": 158130 }, { "epoch": 0.6019198708921081, "grad_norm": 0.1211891621351242, "learning_rate": 0.0005, "loss": 2.1138, "step": 158140 }, { "epoch": 0.6019579333602308, "grad_norm": 0.11648281663656235, "learning_rate": 0.0005, "loss": 2.1105, "step": 158150 }, { "epoch": 0.6019959958283535, "grad_norm": 0.10998690128326416, "learning_rate": 0.0005, "loss": 2.1201, "step": 158160 }, { "epoch": 0.6020340582964762, "grad_norm": 0.12193288654088974, "learning_rate": 0.0005, "loss": 2.1247, "step": 158170 }, { "epoch": 0.6020721207645988, "grad_norm": 0.11943355947732925, "learning_rate": 0.0005, "loss": 2.123, "step": 158180 }, { "epoch": 0.6021101832327216, "grad_norm": 0.11397210508584976, "learning_rate": 0.0005, "loss": 2.1253, "step": 158190 }, { "epoch": 0.6021482457008442, "grad_norm": 0.5890202522277832, "learning_rate": 0.0005, "loss": 2.1139, "step": 158200 }, { "epoch": 0.6021863081689669, "grad_norm": 0.12778812646865845, "learning_rate": 0.0005, "loss": 2.1034, "step": 158210 }, { "epoch": 0.6022243706370896, "grad_norm": 0.1296212375164032, "learning_rate": 0.0005, "loss": 2.1091, "step": 158220 }, { "epoch": 0.6022624331052123, "grad_norm": 0.12705478072166443, "learning_rate": 0.0005, "loss": 2.1177, "step": 158230 }, { "epoch": 0.602300495573335, "grad_norm": 0.12249784171581268, "learning_rate": 0.0005, "loss": 2.109, "step": 158240 }, { "epoch": 0.6023385580414576, "grad_norm": 0.13054530322551727, "learning_rate": 0.0005, "loss": 2.1173, "step": 158250 }, { "epoch": 0.6023766205095803, "grad_norm": 0.12559649348258972, "learning_rate": 0.0005, "loss": 2.0865, "step": 158260 }, { "epoch": 0.602414682977703, "grad_norm": 0.1225946918129921, "learning_rate": 0.0005, "loss": 2.0942, "step": 158270 }, { "epoch": 0.6024527454458257, "grad_norm": 0.132630854845047, "learning_rate": 0.0005, "loss": 2.1198, "step": 158280 }, { "epoch": 0.6024908079139484, "grad_norm": 0.12257161736488342, "learning_rate": 0.0005, "loss": 2.0972, "step": 158290 }, { "epoch": 0.602528870382071, "grad_norm": 0.11760008335113525, "learning_rate": 0.0005, "loss": 2.1096, "step": 158300 }, { "epoch": 0.6025669328501937, "grad_norm": 0.12999369204044342, "learning_rate": 0.0005, "loss": 2.1221, "step": 158310 }, { "epoch": 0.6026049953183165, "grad_norm": 0.15574951469898224, "learning_rate": 0.0005, "loss": 2.109, "step": 158320 }, { "epoch": 0.6026430577864391, "grad_norm": 0.10984206199645996, "learning_rate": 0.0005, "loss": 2.1144, "step": 158330 }, { "epoch": 0.6026811202545618, "grad_norm": 0.1276576966047287, "learning_rate": 0.0005, "loss": 2.1178, "step": 158340 }, { "epoch": 0.6027191827226844, "grad_norm": 0.12136907875537872, "learning_rate": 0.0005, "loss": 2.1046, "step": 158350 }, { "epoch": 0.6027572451908072, "grad_norm": 0.1264997273683548, "learning_rate": 0.0005, "loss": 2.0908, "step": 158360 }, { "epoch": 0.6027953076589299, "grad_norm": 0.1296354979276657, "learning_rate": 0.0005, "loss": 2.0985, "step": 158370 }, { "epoch": 0.6028333701270525, "grad_norm": 0.14940467476844788, "learning_rate": 0.0005, "loss": 2.1189, "step": 158380 }, { "epoch": 0.6028714325951752, "grad_norm": 0.12434712797403336, "learning_rate": 0.0005, "loss": 2.1199, "step": 158390 }, { "epoch": 0.6029094950632979, "grad_norm": 0.12737199664115906, "learning_rate": 0.0005, "loss": 2.0976, "step": 158400 }, { "epoch": 0.6029475575314206, "grad_norm": 0.1796177625656128, "learning_rate": 0.0005, "loss": 2.1092, "step": 158410 }, { "epoch": 0.6029856199995433, "grad_norm": 0.11623994261026382, "learning_rate": 0.0005, "loss": 2.1072, "step": 158420 }, { "epoch": 0.6030236824676659, "grad_norm": 0.11934838443994522, "learning_rate": 0.0005, "loss": 2.1221, "step": 158430 }, { "epoch": 0.6030617449357886, "grad_norm": 0.11687199771404266, "learning_rate": 0.0005, "loss": 2.1101, "step": 158440 }, { "epoch": 0.6030998074039113, "grad_norm": 0.11579285562038422, "learning_rate": 0.0005, "loss": 2.1023, "step": 158450 }, { "epoch": 0.603137869872034, "grad_norm": 0.12085693329572678, "learning_rate": 0.0005, "loss": 2.1106, "step": 158460 }, { "epoch": 0.6031759323401567, "grad_norm": 0.11823724955320358, "learning_rate": 0.0005, "loss": 2.1179, "step": 158470 }, { "epoch": 0.6032139948082793, "grad_norm": 0.11455874890089035, "learning_rate": 0.0005, "loss": 2.1009, "step": 158480 }, { "epoch": 0.6032520572764021, "grad_norm": 0.11831489950418472, "learning_rate": 0.0005, "loss": 2.1241, "step": 158490 }, { "epoch": 0.6032901197445247, "grad_norm": 0.1299872249364853, "learning_rate": 0.0005, "loss": 2.1142, "step": 158500 }, { "epoch": 0.6033281822126474, "grad_norm": 0.13752396404743195, "learning_rate": 0.0005, "loss": 2.108, "step": 158510 }, { "epoch": 0.60336624468077, "grad_norm": 0.135402113199234, "learning_rate": 0.0005, "loss": 2.1107, "step": 158520 }, { "epoch": 0.6034043071488928, "grad_norm": 0.12532898783683777, "learning_rate": 0.0005, "loss": 2.117, "step": 158530 }, { "epoch": 0.6034423696170155, "grad_norm": 0.13254982233047485, "learning_rate": 0.0005, "loss": 2.1119, "step": 158540 }, { "epoch": 0.6034804320851381, "grad_norm": 0.12623955309391022, "learning_rate": 0.0005, "loss": 2.1098, "step": 158550 }, { "epoch": 0.6035184945532608, "grad_norm": 0.1269414871931076, "learning_rate": 0.0005, "loss": 2.123, "step": 158560 }, { "epoch": 0.6035565570213834, "grad_norm": 0.12244527786970139, "learning_rate": 0.0005, "loss": 2.1179, "step": 158570 }, { "epoch": 0.6035946194895062, "grad_norm": 0.11707145720720291, "learning_rate": 0.0005, "loss": 2.1102, "step": 158580 }, { "epoch": 0.6036326819576289, "grad_norm": 0.124131940305233, "learning_rate": 0.0005, "loss": 2.1093, "step": 158590 }, { "epoch": 0.6036707444257515, "grad_norm": 0.11714545637369156, "learning_rate": 0.0005, "loss": 2.1071, "step": 158600 }, { "epoch": 0.6037088068938742, "grad_norm": 0.11764192581176758, "learning_rate": 0.0005, "loss": 2.1007, "step": 158610 }, { "epoch": 0.603746869361997, "grad_norm": 0.12883175909519196, "learning_rate": 0.0005, "loss": 2.1055, "step": 158620 }, { "epoch": 0.6037849318301196, "grad_norm": 0.12830160558223724, "learning_rate": 0.0005, "loss": 2.1045, "step": 158630 }, { "epoch": 0.6038229942982423, "grad_norm": 0.13523967564105988, "learning_rate": 0.0005, "loss": 2.0989, "step": 158640 }, { "epoch": 0.6038610567663649, "grad_norm": 0.29038044810295105, "learning_rate": 0.0005, "loss": 2.1167, "step": 158650 }, { "epoch": 0.6038991192344877, "grad_norm": 0.1264004111289978, "learning_rate": 0.0005, "loss": 2.1043, "step": 158660 }, { "epoch": 0.6039371817026103, "grad_norm": 0.12499309331178665, "learning_rate": 0.0005, "loss": 2.1067, "step": 158670 }, { "epoch": 0.603975244170733, "grad_norm": 0.12598103284835815, "learning_rate": 0.0005, "loss": 2.1066, "step": 158680 }, { "epoch": 0.6040133066388557, "grad_norm": 0.13107284903526306, "learning_rate": 0.0005, "loss": 2.1214, "step": 158690 }, { "epoch": 0.6040513691069783, "grad_norm": 0.11517848819494247, "learning_rate": 0.0005, "loss": 2.1021, "step": 158700 }, { "epoch": 0.6040894315751011, "grad_norm": 0.12531235814094543, "learning_rate": 0.0005, "loss": 2.1295, "step": 158710 }, { "epoch": 0.6041274940432237, "grad_norm": 0.12719525396823883, "learning_rate": 0.0005, "loss": 2.1223, "step": 158720 }, { "epoch": 0.6041655565113464, "grad_norm": 0.12175626307725906, "learning_rate": 0.0005, "loss": 2.1197, "step": 158730 }, { "epoch": 0.6042036189794691, "grad_norm": 0.11426258087158203, "learning_rate": 0.0005, "loss": 2.1004, "step": 158740 }, { "epoch": 0.6042416814475918, "grad_norm": 0.12400322407484055, "learning_rate": 0.0005, "loss": 2.1254, "step": 158750 }, { "epoch": 0.6042797439157145, "grad_norm": 0.13072001934051514, "learning_rate": 0.0005, "loss": 2.1304, "step": 158760 }, { "epoch": 0.6043178063838371, "grad_norm": 0.12369487434625626, "learning_rate": 0.0005, "loss": 2.1188, "step": 158770 }, { "epoch": 0.6043558688519598, "grad_norm": 0.12297425419092178, "learning_rate": 0.0005, "loss": 2.0934, "step": 158780 }, { "epoch": 0.6043939313200826, "grad_norm": 0.11754658073186874, "learning_rate": 0.0005, "loss": 2.1083, "step": 158790 }, { "epoch": 0.6044319937882052, "grad_norm": 0.12254299223423004, "learning_rate": 0.0005, "loss": 2.1253, "step": 158800 }, { "epoch": 0.6044700562563279, "grad_norm": 0.12980803847312927, "learning_rate": 0.0005, "loss": 2.1072, "step": 158810 }, { "epoch": 0.6045081187244505, "grad_norm": 0.13752233982086182, "learning_rate": 0.0005, "loss": 2.114, "step": 158820 }, { "epoch": 0.6045461811925733, "grad_norm": 0.12054204940795898, "learning_rate": 0.0005, "loss": 2.0929, "step": 158830 }, { "epoch": 0.604584243660696, "grad_norm": 0.13067400455474854, "learning_rate": 0.0005, "loss": 2.1153, "step": 158840 }, { "epoch": 0.6046223061288186, "grad_norm": 0.1433134377002716, "learning_rate": 0.0005, "loss": 2.1043, "step": 158850 }, { "epoch": 0.6046603685969413, "grad_norm": 0.14245948195457458, "learning_rate": 0.0005, "loss": 2.122, "step": 158860 }, { "epoch": 0.6046984310650639, "grad_norm": 0.1247478574514389, "learning_rate": 0.0005, "loss": 2.1115, "step": 158870 }, { "epoch": 0.6047364935331867, "grad_norm": 0.12523062527179718, "learning_rate": 0.0005, "loss": 2.099, "step": 158880 }, { "epoch": 0.6047745560013094, "grad_norm": 0.11393284797668457, "learning_rate": 0.0005, "loss": 2.1057, "step": 158890 }, { "epoch": 0.604812618469432, "grad_norm": 0.1180889904499054, "learning_rate": 0.0005, "loss": 2.1177, "step": 158900 }, { "epoch": 0.6048506809375547, "grad_norm": 0.13296665251255035, "learning_rate": 0.0005, "loss": 2.1199, "step": 158910 }, { "epoch": 0.6048887434056774, "grad_norm": 0.13823893666267395, "learning_rate": 0.0005, "loss": 2.1005, "step": 158920 }, { "epoch": 0.6049268058738001, "grad_norm": 0.13230063021183014, "learning_rate": 0.0005, "loss": 2.1109, "step": 158930 }, { "epoch": 0.6049648683419228, "grad_norm": 0.11703740805387497, "learning_rate": 0.0005, "loss": 2.1243, "step": 158940 }, { "epoch": 0.6050029308100454, "grad_norm": 0.1351865530014038, "learning_rate": 0.0005, "loss": 2.0944, "step": 158950 }, { "epoch": 0.6050409932781682, "grad_norm": 0.12466300278902054, "learning_rate": 0.0005, "loss": 2.111, "step": 158960 }, { "epoch": 0.6050790557462908, "grad_norm": 0.11597589403390884, "learning_rate": 0.0005, "loss": 2.1127, "step": 158970 }, { "epoch": 0.6051171182144135, "grad_norm": 0.12419840693473816, "learning_rate": 0.0005, "loss": 2.1098, "step": 158980 }, { "epoch": 0.6051551806825362, "grad_norm": 0.12489227950572968, "learning_rate": 0.0005, "loss": 2.1108, "step": 158990 }, { "epoch": 0.6051932431506588, "grad_norm": 0.1234791949391365, "learning_rate": 0.0005, "loss": 2.1158, "step": 159000 }, { "epoch": 0.6052313056187816, "grad_norm": 0.11526373773813248, "learning_rate": 0.0005, "loss": 2.1072, "step": 159010 }, { "epoch": 0.6052693680869042, "grad_norm": 0.11705972254276276, "learning_rate": 0.0005, "loss": 2.099, "step": 159020 }, { "epoch": 0.6053074305550269, "grad_norm": 0.1232641190290451, "learning_rate": 0.0005, "loss": 2.0949, "step": 159030 }, { "epoch": 0.6053454930231496, "grad_norm": 0.13102813065052032, "learning_rate": 0.0005, "loss": 2.1044, "step": 159040 }, { "epoch": 0.6053835554912723, "grad_norm": 0.11702539771795273, "learning_rate": 0.0005, "loss": 2.0988, "step": 159050 }, { "epoch": 0.605421617959395, "grad_norm": 0.12806493043899536, "learning_rate": 0.0005, "loss": 2.1075, "step": 159060 }, { "epoch": 0.6054596804275176, "grad_norm": 0.12140469253063202, "learning_rate": 0.0005, "loss": 2.1101, "step": 159070 }, { "epoch": 0.6054977428956403, "grad_norm": 0.13960334658622742, "learning_rate": 0.0005, "loss": 2.1192, "step": 159080 }, { "epoch": 0.6055358053637631, "grad_norm": 0.11300751566886902, "learning_rate": 0.0005, "loss": 2.1214, "step": 159090 }, { "epoch": 0.6055738678318857, "grad_norm": 0.1150921955704689, "learning_rate": 0.0005, "loss": 2.0974, "step": 159100 }, { "epoch": 0.6056119303000084, "grad_norm": 0.12943331897258759, "learning_rate": 0.0005, "loss": 2.1318, "step": 159110 }, { "epoch": 0.605649992768131, "grad_norm": 0.12138635665178299, "learning_rate": 0.0005, "loss": 2.1014, "step": 159120 }, { "epoch": 0.6056880552362537, "grad_norm": 0.12762968242168427, "learning_rate": 0.0005, "loss": 2.1082, "step": 159130 }, { "epoch": 0.6057261177043765, "grad_norm": 0.11474503576755524, "learning_rate": 0.0005, "loss": 2.114, "step": 159140 }, { "epoch": 0.6057641801724991, "grad_norm": 0.12041950970888138, "learning_rate": 0.0005, "loss": 2.0943, "step": 159150 }, { "epoch": 0.6058022426406218, "grad_norm": 0.11588443070650101, "learning_rate": 0.0005, "loss": 2.0871, "step": 159160 }, { "epoch": 0.6058403051087444, "grad_norm": 0.12442999333143234, "learning_rate": 0.0005, "loss": 2.1393, "step": 159170 }, { "epoch": 0.6058783675768672, "grad_norm": 0.1322493851184845, "learning_rate": 0.0005, "loss": 2.122, "step": 159180 }, { "epoch": 0.6059164300449899, "grad_norm": 0.12426673620939255, "learning_rate": 0.0005, "loss": 2.1166, "step": 159190 }, { "epoch": 0.6059544925131125, "grad_norm": 0.11464572697877884, "learning_rate": 0.0005, "loss": 2.1083, "step": 159200 }, { "epoch": 0.6059925549812352, "grad_norm": 0.11597344279289246, "learning_rate": 0.0005, "loss": 2.1079, "step": 159210 }, { "epoch": 0.6060306174493579, "grad_norm": 0.12046016752719879, "learning_rate": 0.0005, "loss": 2.1199, "step": 159220 }, { "epoch": 0.6060686799174806, "grad_norm": 0.12151051312685013, "learning_rate": 0.0005, "loss": 2.1004, "step": 159230 }, { "epoch": 0.6061067423856032, "grad_norm": 0.11330360174179077, "learning_rate": 0.0005, "loss": 2.1156, "step": 159240 }, { "epoch": 0.6061448048537259, "grad_norm": 0.12379861623048782, "learning_rate": 0.0005, "loss": 2.1281, "step": 159250 }, { "epoch": 0.6061828673218487, "grad_norm": 0.11828950047492981, "learning_rate": 0.0005, "loss": 2.0971, "step": 159260 }, { "epoch": 0.6062209297899713, "grad_norm": 0.12113313376903534, "learning_rate": 0.0005, "loss": 2.1111, "step": 159270 }, { "epoch": 0.606258992258094, "grad_norm": 0.12315396964550018, "learning_rate": 0.0005, "loss": 2.1194, "step": 159280 }, { "epoch": 0.6062970547262166, "grad_norm": 0.12765733897686005, "learning_rate": 0.0005, "loss": 2.118, "step": 159290 }, { "epoch": 0.6063351171943393, "grad_norm": 0.12009833753108978, "learning_rate": 0.0005, "loss": 2.1122, "step": 159300 }, { "epoch": 0.6063731796624621, "grad_norm": 0.13260215520858765, "learning_rate": 0.0005, "loss": 2.1118, "step": 159310 }, { "epoch": 0.6064112421305847, "grad_norm": 0.12929363548755646, "learning_rate": 0.0005, "loss": 2.1143, "step": 159320 }, { "epoch": 0.6064493045987074, "grad_norm": 0.13376052677631378, "learning_rate": 0.0005, "loss": 2.1003, "step": 159330 }, { "epoch": 0.60648736706683, "grad_norm": 0.14937913417816162, "learning_rate": 0.0005, "loss": 2.0955, "step": 159340 }, { "epoch": 0.6065254295349528, "grad_norm": 0.13000261783599854, "learning_rate": 0.0005, "loss": 2.1181, "step": 159350 }, { "epoch": 0.6065634920030755, "grad_norm": 0.12199776619672775, "learning_rate": 0.0005, "loss": 2.1186, "step": 159360 }, { "epoch": 0.6066015544711981, "grad_norm": 0.13417278230190277, "learning_rate": 0.0005, "loss": 2.1135, "step": 159370 }, { "epoch": 0.6066396169393208, "grad_norm": 0.11662577837705612, "learning_rate": 0.0005, "loss": 2.1058, "step": 159380 }, { "epoch": 0.6066776794074435, "grad_norm": 0.12676531076431274, "learning_rate": 0.0005, "loss": 2.1113, "step": 159390 }, { "epoch": 0.6067157418755662, "grad_norm": 0.11627112329006195, "learning_rate": 0.0005, "loss": 2.1075, "step": 159400 }, { "epoch": 0.6067538043436889, "grad_norm": 0.1277991533279419, "learning_rate": 0.0005, "loss": 2.1138, "step": 159410 }, { "epoch": 0.6067918668118115, "grad_norm": 0.12240882217884064, "learning_rate": 0.0005, "loss": 2.1042, "step": 159420 }, { "epoch": 0.6068299292799342, "grad_norm": 0.1373898833990097, "learning_rate": 0.0005, "loss": 2.1022, "step": 159430 }, { "epoch": 0.606867991748057, "grad_norm": 0.1356273591518402, "learning_rate": 0.0005, "loss": 2.1124, "step": 159440 }, { "epoch": 0.6069060542161796, "grad_norm": 0.1183418333530426, "learning_rate": 0.0005, "loss": 2.1079, "step": 159450 }, { "epoch": 0.6069441166843023, "grad_norm": 0.12304482609033585, "learning_rate": 0.0005, "loss": 2.0957, "step": 159460 }, { "epoch": 0.6069821791524249, "grad_norm": 0.1182306781411171, "learning_rate": 0.0005, "loss": 2.1196, "step": 159470 }, { "epoch": 0.6070202416205477, "grad_norm": 0.11697513610124588, "learning_rate": 0.0005, "loss": 2.1153, "step": 159480 }, { "epoch": 0.6070583040886703, "grad_norm": 0.11313638091087341, "learning_rate": 0.0005, "loss": 2.111, "step": 159490 }, { "epoch": 0.607096366556793, "grad_norm": 0.11949624121189117, "learning_rate": 0.0005, "loss": 2.1168, "step": 159500 }, { "epoch": 0.6071344290249157, "grad_norm": 0.12727008759975433, "learning_rate": 0.0005, "loss": 2.1105, "step": 159510 }, { "epoch": 0.6071724914930384, "grad_norm": 0.13288576900959015, "learning_rate": 0.0005, "loss": 2.1151, "step": 159520 }, { "epoch": 0.6072105539611611, "grad_norm": 0.13605280220508575, "learning_rate": 0.0005, "loss": 2.1098, "step": 159530 }, { "epoch": 0.6072486164292837, "grad_norm": 0.11556291580200195, "learning_rate": 0.0005, "loss": 2.1108, "step": 159540 }, { "epoch": 0.6072866788974064, "grad_norm": 0.13261030614376068, "learning_rate": 0.0005, "loss": 2.1309, "step": 159550 }, { "epoch": 0.607324741365529, "grad_norm": 0.11572163552045822, "learning_rate": 0.0005, "loss": 2.1092, "step": 159560 }, { "epoch": 0.6073628038336518, "grad_norm": 0.1369166076183319, "learning_rate": 0.0005, "loss": 2.1122, "step": 159570 }, { "epoch": 0.6074008663017745, "grad_norm": 0.11626111716032028, "learning_rate": 0.0005, "loss": 2.116, "step": 159580 }, { "epoch": 0.6074389287698971, "grad_norm": 0.12466323375701904, "learning_rate": 0.0005, "loss": 2.104, "step": 159590 }, { "epoch": 0.6074769912380198, "grad_norm": 0.12724941968917847, "learning_rate": 0.0005, "loss": 2.1022, "step": 159600 }, { "epoch": 0.6075150537061426, "grad_norm": 0.11998318135738373, "learning_rate": 0.0005, "loss": 2.1131, "step": 159610 }, { "epoch": 0.6075531161742652, "grad_norm": 0.11803542822599411, "learning_rate": 0.0005, "loss": 2.1046, "step": 159620 }, { "epoch": 0.6075911786423879, "grad_norm": 0.24420522153377533, "learning_rate": 0.0005, "loss": 2.1182, "step": 159630 }, { "epoch": 0.6076292411105105, "grad_norm": 0.12755897641181946, "learning_rate": 0.0005, "loss": 2.1129, "step": 159640 }, { "epoch": 0.6076673035786333, "grad_norm": 0.12958073616027832, "learning_rate": 0.0005, "loss": 2.1068, "step": 159650 }, { "epoch": 0.607705366046756, "grad_norm": 0.12851276993751526, "learning_rate": 0.0005, "loss": 2.1278, "step": 159660 }, { "epoch": 0.6077434285148786, "grad_norm": 0.13393059372901917, "learning_rate": 0.0005, "loss": 2.0993, "step": 159670 }, { "epoch": 0.6077814909830013, "grad_norm": 0.11613164842128754, "learning_rate": 0.0005, "loss": 2.1032, "step": 159680 }, { "epoch": 0.607819553451124, "grad_norm": 0.12504012882709503, "learning_rate": 0.0005, "loss": 2.0983, "step": 159690 }, { "epoch": 0.6078576159192467, "grad_norm": 0.12086059153079987, "learning_rate": 0.0005, "loss": 2.1118, "step": 159700 }, { "epoch": 0.6078956783873694, "grad_norm": 0.12351615726947784, "learning_rate": 0.0005, "loss": 2.1193, "step": 159710 }, { "epoch": 0.607933740855492, "grad_norm": 0.13252276182174683, "learning_rate": 0.0005, "loss": 2.1181, "step": 159720 }, { "epoch": 0.6079718033236147, "grad_norm": 0.12680232524871826, "learning_rate": 0.0005, "loss": 2.1134, "step": 159730 }, { "epoch": 0.6080098657917374, "grad_norm": 0.44076067209243774, "learning_rate": 0.0005, "loss": 2.1089, "step": 159740 }, { "epoch": 0.6080479282598601, "grad_norm": 0.3102264404296875, "learning_rate": 0.0005, "loss": 2.1049, "step": 159750 }, { "epoch": 0.6080859907279828, "grad_norm": 0.11716543883085251, "learning_rate": 0.0005, "loss": 2.1137, "step": 159760 }, { "epoch": 0.6081240531961054, "grad_norm": 0.12028875946998596, "learning_rate": 0.0005, "loss": 2.1207, "step": 159770 }, { "epoch": 0.6081621156642282, "grad_norm": 0.12898167967796326, "learning_rate": 0.0005, "loss": 2.1092, "step": 159780 }, { "epoch": 0.6082001781323508, "grad_norm": 0.12819376587867737, "learning_rate": 0.0005, "loss": 2.0988, "step": 159790 }, { "epoch": 0.6082382406004735, "grad_norm": 0.10979683697223663, "learning_rate": 0.0005, "loss": 2.1149, "step": 159800 }, { "epoch": 0.6082763030685961, "grad_norm": 0.11153441667556763, "learning_rate": 0.0005, "loss": 2.0982, "step": 159810 }, { "epoch": 0.6083143655367189, "grad_norm": 0.14920252561569214, "learning_rate": 0.0005, "loss": 2.105, "step": 159820 }, { "epoch": 0.6083524280048416, "grad_norm": 0.1329171061515808, "learning_rate": 0.0005, "loss": 2.1208, "step": 159830 }, { "epoch": 0.6083904904729642, "grad_norm": 0.12048627436161041, "learning_rate": 0.0005, "loss": 2.1077, "step": 159840 }, { "epoch": 0.6084285529410869, "grad_norm": 0.1240534782409668, "learning_rate": 0.0005, "loss": 2.1126, "step": 159850 }, { "epoch": 0.6084666154092095, "grad_norm": 0.12022659927606583, "learning_rate": 0.0005, "loss": 2.1131, "step": 159860 }, { "epoch": 0.6085046778773323, "grad_norm": 0.12578216195106506, "learning_rate": 0.0005, "loss": 2.1068, "step": 159870 }, { "epoch": 0.608542740345455, "grad_norm": 0.12415976822376251, "learning_rate": 0.0005, "loss": 2.1101, "step": 159880 }, { "epoch": 0.6085808028135776, "grad_norm": 0.14356285333633423, "learning_rate": 0.0005, "loss": 2.1133, "step": 159890 }, { "epoch": 0.6086188652817003, "grad_norm": 0.1228196918964386, "learning_rate": 0.0005, "loss": 2.1025, "step": 159900 }, { "epoch": 0.608656927749823, "grad_norm": 0.12486620992422104, "learning_rate": 0.0005, "loss": 2.1115, "step": 159910 }, { "epoch": 0.6086949902179457, "grad_norm": 0.13456842303276062, "learning_rate": 0.0005, "loss": 2.1007, "step": 159920 }, { "epoch": 0.6087330526860684, "grad_norm": 0.13543975353240967, "learning_rate": 0.0005, "loss": 2.1116, "step": 159930 }, { "epoch": 0.608771115154191, "grad_norm": 0.13487686216831207, "learning_rate": 0.0005, "loss": 2.1092, "step": 159940 }, { "epoch": 0.6088091776223138, "grad_norm": 0.13343895971775055, "learning_rate": 0.0005, "loss": 2.1188, "step": 159950 }, { "epoch": 0.6088472400904364, "grad_norm": 0.12179489433765411, "learning_rate": 0.0005, "loss": 2.0941, "step": 159960 }, { "epoch": 0.6088853025585591, "grad_norm": 0.13544785976409912, "learning_rate": 0.0005, "loss": 2.1212, "step": 159970 }, { "epoch": 0.6089233650266818, "grad_norm": 0.11898493021726608, "learning_rate": 0.0005, "loss": 2.1048, "step": 159980 }, { "epoch": 0.6089614274948044, "grad_norm": 0.1387069821357727, "learning_rate": 0.0005, "loss": 2.1015, "step": 159990 }, { "epoch": 0.6089994899629272, "grad_norm": 0.1274258941411972, "learning_rate": 0.0005, "loss": 2.095, "step": 160000 }, { "epoch": 0.6090375524310498, "grad_norm": 0.14660042524337769, "learning_rate": 0.0005, "loss": 2.1091, "step": 160010 }, { "epoch": 0.6090756148991725, "grad_norm": 0.12244995683431625, "learning_rate": 0.0005, "loss": 2.1165, "step": 160020 }, { "epoch": 0.6091136773672952, "grad_norm": 0.1266985833644867, "learning_rate": 0.0005, "loss": 2.0891, "step": 160030 }, { "epoch": 0.6091517398354179, "grad_norm": 0.1258295476436615, "learning_rate": 0.0005, "loss": 2.0889, "step": 160040 }, { "epoch": 0.6091898023035406, "grad_norm": 0.11685820668935776, "learning_rate": 0.0005, "loss": 2.1076, "step": 160050 }, { "epoch": 0.6092278647716632, "grad_norm": 0.125748410820961, "learning_rate": 0.0005, "loss": 2.1183, "step": 160060 }, { "epoch": 0.6092659272397859, "grad_norm": 0.120501309633255, "learning_rate": 0.0005, "loss": 2.1172, "step": 160070 }, { "epoch": 0.6093039897079087, "grad_norm": 0.1285618394613266, "learning_rate": 0.0005, "loss": 2.1092, "step": 160080 }, { "epoch": 0.6093420521760313, "grad_norm": 0.12627874314785004, "learning_rate": 0.0005, "loss": 2.11, "step": 160090 }, { "epoch": 0.609380114644154, "grad_norm": 0.11696766316890717, "learning_rate": 0.0005, "loss": 2.1051, "step": 160100 }, { "epoch": 0.6094181771122766, "grad_norm": 0.13108812272548676, "learning_rate": 0.0005, "loss": 2.1192, "step": 160110 }, { "epoch": 0.6094562395803994, "grad_norm": 0.11752744764089584, "learning_rate": 0.0005, "loss": 2.101, "step": 160120 }, { "epoch": 0.6094943020485221, "grad_norm": 0.12401732057332993, "learning_rate": 0.0005, "loss": 2.1013, "step": 160130 }, { "epoch": 0.6095323645166447, "grad_norm": 0.11538513004779816, "learning_rate": 0.0005, "loss": 2.1094, "step": 160140 }, { "epoch": 0.6095704269847674, "grad_norm": 0.12397050857543945, "learning_rate": 0.0005, "loss": 2.1084, "step": 160150 }, { "epoch": 0.60960848945289, "grad_norm": 0.12734395265579224, "learning_rate": 0.0005, "loss": 2.1067, "step": 160160 }, { "epoch": 0.6096465519210128, "grad_norm": 0.11543060094118118, "learning_rate": 0.0005, "loss": 2.1086, "step": 160170 }, { "epoch": 0.6096846143891355, "grad_norm": 0.13912728428840637, "learning_rate": 0.0005, "loss": 2.1107, "step": 160180 }, { "epoch": 0.6097226768572581, "grad_norm": 0.12097612023353577, "learning_rate": 0.0005, "loss": 2.1134, "step": 160190 }, { "epoch": 0.6097607393253808, "grad_norm": 0.11677367985248566, "learning_rate": 0.0005, "loss": 2.105, "step": 160200 }, { "epoch": 0.6097988017935035, "grad_norm": 0.11732392013072968, "learning_rate": 0.0005, "loss": 2.1185, "step": 160210 }, { "epoch": 0.6098368642616262, "grad_norm": 0.11748608201742172, "learning_rate": 0.0005, "loss": 2.1, "step": 160220 }, { "epoch": 0.6098749267297489, "grad_norm": 0.12275837361812592, "learning_rate": 0.0005, "loss": 2.1141, "step": 160230 }, { "epoch": 0.6099129891978715, "grad_norm": 0.11576947569847107, "learning_rate": 0.0005, "loss": 2.1205, "step": 160240 }, { "epoch": 0.6099510516659943, "grad_norm": 0.12737642228603363, "learning_rate": 0.0005, "loss": 2.1173, "step": 160250 }, { "epoch": 0.6099891141341169, "grad_norm": 0.12359835207462311, "learning_rate": 0.0005, "loss": 2.1022, "step": 160260 }, { "epoch": 0.6100271766022396, "grad_norm": 0.12053797394037247, "learning_rate": 0.0005, "loss": 2.1288, "step": 160270 }, { "epoch": 0.6100652390703623, "grad_norm": 0.11976463347673416, "learning_rate": 0.0005, "loss": 2.1096, "step": 160280 }, { "epoch": 0.6101033015384849, "grad_norm": 0.13453908264636993, "learning_rate": 0.0005, "loss": 2.1026, "step": 160290 }, { "epoch": 0.6101413640066077, "grad_norm": 0.14670896530151367, "learning_rate": 0.0005, "loss": 2.1178, "step": 160300 }, { "epoch": 0.6101794264747303, "grad_norm": 0.12755566835403442, "learning_rate": 0.0005, "loss": 2.0956, "step": 160310 }, { "epoch": 0.610217488942853, "grad_norm": 0.13645780086517334, "learning_rate": 0.0005, "loss": 2.11, "step": 160320 }, { "epoch": 0.6102555514109756, "grad_norm": 0.13180497288703918, "learning_rate": 0.0005, "loss": 2.1181, "step": 160330 }, { "epoch": 0.6102936138790984, "grad_norm": 0.12781399488449097, "learning_rate": 0.0005, "loss": 2.1057, "step": 160340 }, { "epoch": 0.6103316763472211, "grad_norm": 0.13411003351211548, "learning_rate": 0.0005, "loss": 2.1144, "step": 160350 }, { "epoch": 0.6103697388153437, "grad_norm": 0.11541711539030075, "learning_rate": 0.0005, "loss": 2.1205, "step": 160360 }, { "epoch": 0.6104078012834664, "grad_norm": 0.13555757701396942, "learning_rate": 0.0005, "loss": 2.1067, "step": 160370 }, { "epoch": 0.6104458637515892, "grad_norm": 0.12073865532875061, "learning_rate": 0.0005, "loss": 2.1049, "step": 160380 }, { "epoch": 0.6104839262197118, "grad_norm": 0.13339164853096008, "learning_rate": 0.0005, "loss": 2.1154, "step": 160390 }, { "epoch": 0.6105219886878345, "grad_norm": 0.13692334294319153, "learning_rate": 0.0005, "loss": 2.1045, "step": 160400 }, { "epoch": 0.6105600511559571, "grad_norm": 0.12026344984769821, "learning_rate": 0.0005, "loss": 2.0925, "step": 160410 }, { "epoch": 0.6105981136240798, "grad_norm": 0.1249057799577713, "learning_rate": 0.0005, "loss": 2.1166, "step": 160420 }, { "epoch": 0.6106361760922026, "grad_norm": 0.12343106418848038, "learning_rate": 0.0005, "loss": 2.1048, "step": 160430 }, { "epoch": 0.6106742385603252, "grad_norm": 0.12635646760463715, "learning_rate": 0.0005, "loss": 2.1112, "step": 160440 }, { "epoch": 0.6107123010284479, "grad_norm": 0.12143664807081223, "learning_rate": 0.0005, "loss": 2.1236, "step": 160450 }, { "epoch": 0.6107503634965705, "grad_norm": 0.1384011059999466, "learning_rate": 0.0005, "loss": 2.1039, "step": 160460 }, { "epoch": 0.6107884259646933, "grad_norm": 0.11583954840898514, "learning_rate": 0.0005, "loss": 2.1017, "step": 160470 }, { "epoch": 0.610826488432816, "grad_norm": 0.13183361291885376, "learning_rate": 0.0005, "loss": 2.1093, "step": 160480 }, { "epoch": 0.6108645509009386, "grad_norm": 0.12459275871515274, "learning_rate": 0.0005, "loss": 2.1195, "step": 160490 }, { "epoch": 0.6109026133690613, "grad_norm": 0.1303837150335312, "learning_rate": 0.0005, "loss": 2.1213, "step": 160500 }, { "epoch": 0.610940675837184, "grad_norm": 0.12999789416790009, "learning_rate": 0.0005, "loss": 2.0995, "step": 160510 }, { "epoch": 0.6109787383053067, "grad_norm": 0.11867684870958328, "learning_rate": 0.0005, "loss": 2.1164, "step": 160520 }, { "epoch": 0.6110168007734293, "grad_norm": 0.133700430393219, "learning_rate": 0.0005, "loss": 2.115, "step": 160530 }, { "epoch": 0.611054863241552, "grad_norm": 0.1394004225730896, "learning_rate": 0.0005, "loss": 2.1054, "step": 160540 }, { "epoch": 0.6110929257096748, "grad_norm": 0.12464521080255508, "learning_rate": 0.0005, "loss": 2.1172, "step": 160550 }, { "epoch": 0.6111309881777974, "grad_norm": 0.129132479429245, "learning_rate": 0.0005, "loss": 2.1082, "step": 160560 }, { "epoch": 0.6111690506459201, "grad_norm": 0.13225175440311432, "learning_rate": 0.0005, "loss": 2.0994, "step": 160570 }, { "epoch": 0.6112071131140427, "grad_norm": 0.11520901322364807, "learning_rate": 0.0005, "loss": 2.102, "step": 160580 }, { "epoch": 0.6112451755821654, "grad_norm": 0.11503398418426514, "learning_rate": 0.0005, "loss": 2.106, "step": 160590 }, { "epoch": 0.6112832380502882, "grad_norm": 0.13115368783473969, "learning_rate": 0.0005, "loss": 2.1056, "step": 160600 }, { "epoch": 0.6113213005184108, "grad_norm": 0.14434094727039337, "learning_rate": 0.0005, "loss": 2.1048, "step": 160610 }, { "epoch": 0.6113593629865335, "grad_norm": 0.11493008583784103, "learning_rate": 0.0005, "loss": 2.1141, "step": 160620 }, { "epoch": 0.6113974254546561, "grad_norm": 0.1224646270275116, "learning_rate": 0.0005, "loss": 2.1137, "step": 160630 }, { "epoch": 0.6114354879227789, "grad_norm": 0.12974074482917786, "learning_rate": 0.0005, "loss": 2.1169, "step": 160640 }, { "epoch": 0.6114735503909016, "grad_norm": 0.12477756291627884, "learning_rate": 0.0005, "loss": 2.1057, "step": 160650 }, { "epoch": 0.6115116128590242, "grad_norm": 0.11827721446752548, "learning_rate": 0.0005, "loss": 2.1279, "step": 160660 }, { "epoch": 0.6115496753271469, "grad_norm": 0.12852802872657776, "learning_rate": 0.0005, "loss": 2.1032, "step": 160670 }, { "epoch": 0.6115877377952696, "grad_norm": 0.11004035919904709, "learning_rate": 0.0005, "loss": 2.108, "step": 160680 }, { "epoch": 0.6116258002633923, "grad_norm": 0.12859857082366943, "learning_rate": 0.0005, "loss": 2.1218, "step": 160690 }, { "epoch": 0.611663862731515, "grad_norm": 0.1396910548210144, "learning_rate": 0.0005, "loss": 2.0984, "step": 160700 }, { "epoch": 0.6117019251996376, "grad_norm": 0.13013729453086853, "learning_rate": 0.0005, "loss": 2.1059, "step": 160710 }, { "epoch": 0.6117399876677603, "grad_norm": 0.11966916918754578, "learning_rate": 0.0005, "loss": 2.1198, "step": 160720 }, { "epoch": 0.611778050135883, "grad_norm": 0.1219642385840416, "learning_rate": 0.0005, "loss": 2.1125, "step": 160730 }, { "epoch": 0.6118161126040057, "grad_norm": 0.11183346807956696, "learning_rate": 0.0005, "loss": 2.1146, "step": 160740 }, { "epoch": 0.6118541750721284, "grad_norm": 0.12620967626571655, "learning_rate": 0.0005, "loss": 2.1045, "step": 160750 }, { "epoch": 0.611892237540251, "grad_norm": 0.12379848212003708, "learning_rate": 0.0005, "loss": 2.1103, "step": 160760 }, { "epoch": 0.6119303000083738, "grad_norm": 0.1288774013519287, "learning_rate": 0.0005, "loss": 2.12, "step": 160770 }, { "epoch": 0.6119683624764964, "grad_norm": 0.12595853209495544, "learning_rate": 0.0005, "loss": 2.1205, "step": 160780 }, { "epoch": 0.6120064249446191, "grad_norm": 0.1244102343916893, "learning_rate": 0.0005, "loss": 2.1091, "step": 160790 }, { "epoch": 0.6120444874127418, "grad_norm": 0.12475734204053879, "learning_rate": 0.0005, "loss": 2.1306, "step": 160800 }, { "epoch": 0.6120825498808645, "grad_norm": 0.142828568816185, "learning_rate": 0.0005, "loss": 2.099, "step": 160810 }, { "epoch": 0.6121206123489872, "grad_norm": 0.1296069175004959, "learning_rate": 0.0005, "loss": 2.1061, "step": 160820 }, { "epoch": 0.6121586748171098, "grad_norm": 0.130901500582695, "learning_rate": 0.0005, "loss": 2.1315, "step": 160830 }, { "epoch": 0.6121967372852325, "grad_norm": 0.12161926180124283, "learning_rate": 0.0005, "loss": 2.1133, "step": 160840 }, { "epoch": 0.6122347997533552, "grad_norm": 0.1284041553735733, "learning_rate": 0.0005, "loss": 2.1163, "step": 160850 }, { "epoch": 0.6122728622214779, "grad_norm": 0.1331159770488739, "learning_rate": 0.0005, "loss": 2.0981, "step": 160860 }, { "epoch": 0.6123109246896006, "grad_norm": 0.12174385040998459, "learning_rate": 0.0005, "loss": 2.1039, "step": 160870 }, { "epoch": 0.6123489871577232, "grad_norm": 0.1177845224738121, "learning_rate": 0.0005, "loss": 2.0947, "step": 160880 }, { "epoch": 0.6123870496258459, "grad_norm": 0.11481917649507523, "learning_rate": 0.0005, "loss": 2.0904, "step": 160890 }, { "epoch": 0.6124251120939687, "grad_norm": 0.11830814182758331, "learning_rate": 0.0005, "loss": 2.1125, "step": 160900 }, { "epoch": 0.6124631745620913, "grad_norm": 0.12196170538663864, "learning_rate": 0.0005, "loss": 2.1017, "step": 160910 }, { "epoch": 0.612501237030214, "grad_norm": 0.1265617161989212, "learning_rate": 0.0005, "loss": 2.1094, "step": 160920 }, { "epoch": 0.6125392994983366, "grad_norm": 0.13557623326778412, "learning_rate": 0.0005, "loss": 2.1089, "step": 160930 }, { "epoch": 0.6125773619664594, "grad_norm": 0.13241899013519287, "learning_rate": 0.0005, "loss": 2.1174, "step": 160940 }, { "epoch": 0.6126154244345821, "grad_norm": 0.11941409111022949, "learning_rate": 0.0005, "loss": 2.1128, "step": 160950 }, { "epoch": 0.6126534869027047, "grad_norm": 0.11615356057882309, "learning_rate": 0.0005, "loss": 2.1187, "step": 160960 }, { "epoch": 0.6126915493708274, "grad_norm": 0.11697541177272797, "learning_rate": 0.0005, "loss": 2.0934, "step": 160970 }, { "epoch": 0.6127296118389501, "grad_norm": 0.11659201234579086, "learning_rate": 0.0005, "loss": 2.107, "step": 160980 }, { "epoch": 0.6127676743070728, "grad_norm": 0.13227131962776184, "learning_rate": 0.0005, "loss": 2.1149, "step": 160990 }, { "epoch": 0.6128057367751955, "grad_norm": 0.12896333634853363, "learning_rate": 0.0005, "loss": 2.0977, "step": 161000 }, { "epoch": 0.6128437992433181, "grad_norm": 0.13977555930614471, "learning_rate": 0.0005, "loss": 2.1136, "step": 161010 }, { "epoch": 0.6128818617114408, "grad_norm": 0.13483978807926178, "learning_rate": 0.0005, "loss": 2.1102, "step": 161020 }, { "epoch": 0.6129199241795635, "grad_norm": 0.145457461476326, "learning_rate": 0.0005, "loss": 2.1201, "step": 161030 }, { "epoch": 0.6129579866476862, "grad_norm": 0.13186997175216675, "learning_rate": 0.0005, "loss": 2.1105, "step": 161040 }, { "epoch": 0.6129960491158088, "grad_norm": 0.1312837451696396, "learning_rate": 0.0005, "loss": 2.1191, "step": 161050 }, { "epoch": 0.6130341115839315, "grad_norm": 0.10970824211835861, "learning_rate": 0.0005, "loss": 2.1115, "step": 161060 }, { "epoch": 0.6130721740520543, "grad_norm": 0.12316413968801498, "learning_rate": 0.0005, "loss": 2.1098, "step": 161070 }, { "epoch": 0.6131102365201769, "grad_norm": 0.12216629087924957, "learning_rate": 0.0005, "loss": 2.1253, "step": 161080 }, { "epoch": 0.6131482989882996, "grad_norm": 0.12921833992004395, "learning_rate": 0.0005, "loss": 2.119, "step": 161090 }, { "epoch": 0.6131863614564222, "grad_norm": 0.1225934848189354, "learning_rate": 0.0005, "loss": 2.1085, "step": 161100 }, { "epoch": 0.613224423924545, "grad_norm": 0.13255460560321808, "learning_rate": 0.0005, "loss": 2.129, "step": 161110 }, { "epoch": 0.6132624863926677, "grad_norm": 0.11727086454629898, "learning_rate": 0.0005, "loss": 2.0972, "step": 161120 }, { "epoch": 0.6133005488607903, "grad_norm": 0.13540498912334442, "learning_rate": 0.0005, "loss": 2.1161, "step": 161130 }, { "epoch": 0.613338611328913, "grad_norm": 0.11803429573774338, "learning_rate": 0.0005, "loss": 2.1202, "step": 161140 }, { "epoch": 0.6133766737970356, "grad_norm": 0.12163504213094711, "learning_rate": 0.0005, "loss": 2.1111, "step": 161150 }, { "epoch": 0.6134147362651584, "grad_norm": 0.10948128253221512, "learning_rate": 0.0005, "loss": 2.119, "step": 161160 }, { "epoch": 0.6134527987332811, "grad_norm": 0.1163942962884903, "learning_rate": 0.0005, "loss": 2.1129, "step": 161170 }, { "epoch": 0.6134908612014037, "grad_norm": 0.12510649859905243, "learning_rate": 0.0005, "loss": 2.1007, "step": 161180 }, { "epoch": 0.6135289236695264, "grad_norm": 0.11732012778520584, "learning_rate": 0.0005, "loss": 2.1127, "step": 161190 }, { "epoch": 0.6135669861376492, "grad_norm": 0.12112889438867569, "learning_rate": 0.0005, "loss": 2.1203, "step": 161200 }, { "epoch": 0.6136050486057718, "grad_norm": 0.131588414311409, "learning_rate": 0.0005, "loss": 2.1155, "step": 161210 }, { "epoch": 0.6136431110738945, "grad_norm": 0.12222830951213837, "learning_rate": 0.0005, "loss": 2.1057, "step": 161220 }, { "epoch": 0.6136811735420171, "grad_norm": 0.12943825125694275, "learning_rate": 0.0005, "loss": 2.1184, "step": 161230 }, { "epoch": 0.6137192360101399, "grad_norm": 0.11008955538272858, "learning_rate": 0.0005, "loss": 2.1102, "step": 161240 }, { "epoch": 0.6137572984782625, "grad_norm": 0.11975482106208801, "learning_rate": 0.0005, "loss": 2.1146, "step": 161250 }, { "epoch": 0.6137953609463852, "grad_norm": 0.11721187084913254, "learning_rate": 0.0005, "loss": 2.1059, "step": 161260 }, { "epoch": 0.6138334234145079, "grad_norm": 0.13013498485088348, "learning_rate": 0.0005, "loss": 2.106, "step": 161270 }, { "epoch": 0.6138714858826305, "grad_norm": 0.1277289241552353, "learning_rate": 0.0005, "loss": 2.1072, "step": 161280 }, { "epoch": 0.6139095483507533, "grad_norm": 0.12991102039813995, "learning_rate": 0.0005, "loss": 2.1242, "step": 161290 }, { "epoch": 0.6139476108188759, "grad_norm": 0.11442521959543228, "learning_rate": 0.0005, "loss": 2.1313, "step": 161300 }, { "epoch": 0.6139856732869986, "grad_norm": 0.11590270698070526, "learning_rate": 0.0005, "loss": 2.0906, "step": 161310 }, { "epoch": 0.6140237357551213, "grad_norm": 0.12811385095119476, "learning_rate": 0.0005, "loss": 2.1287, "step": 161320 }, { "epoch": 0.614061798223244, "grad_norm": 0.1309431940317154, "learning_rate": 0.0005, "loss": 2.1158, "step": 161330 }, { "epoch": 0.6140998606913667, "grad_norm": 0.11549009382724762, "learning_rate": 0.0005, "loss": 2.1129, "step": 161340 }, { "epoch": 0.6141379231594893, "grad_norm": 0.1286330223083496, "learning_rate": 0.0005, "loss": 2.1294, "step": 161350 }, { "epoch": 0.614175985627612, "grad_norm": 0.12446651607751846, "learning_rate": 0.0005, "loss": 2.1094, "step": 161360 }, { "epoch": 0.6142140480957348, "grad_norm": 0.12710335850715637, "learning_rate": 0.0005, "loss": 2.1293, "step": 161370 }, { "epoch": 0.6142521105638574, "grad_norm": 0.13139621913433075, "learning_rate": 0.0005, "loss": 2.1015, "step": 161380 }, { "epoch": 0.6142901730319801, "grad_norm": 0.1189979761838913, "learning_rate": 0.0005, "loss": 2.1107, "step": 161390 }, { "epoch": 0.6143282355001027, "grad_norm": 0.12037677317857742, "learning_rate": 0.0005, "loss": 2.0992, "step": 161400 }, { "epoch": 0.6143662979682255, "grad_norm": 0.11824595183134079, "learning_rate": 0.0005, "loss": 2.1136, "step": 161410 }, { "epoch": 0.6144043604363482, "grad_norm": 0.13079175353050232, "learning_rate": 0.0005, "loss": 2.1289, "step": 161420 }, { "epoch": 0.6144424229044708, "grad_norm": 0.12407071888446808, "learning_rate": 0.0005, "loss": 2.0972, "step": 161430 }, { "epoch": 0.6144804853725935, "grad_norm": 0.13047856092453003, "learning_rate": 0.0005, "loss": 2.1289, "step": 161440 }, { "epoch": 0.6145185478407161, "grad_norm": 0.11712726205587387, "learning_rate": 0.0005, "loss": 2.1, "step": 161450 }, { "epoch": 0.6145566103088389, "grad_norm": 0.12459481507539749, "learning_rate": 0.0005, "loss": 2.1236, "step": 161460 }, { "epoch": 0.6145946727769616, "grad_norm": 0.11205767095088959, "learning_rate": 0.0005, "loss": 2.1142, "step": 161470 }, { "epoch": 0.6146327352450842, "grad_norm": 0.13772039115428925, "learning_rate": 0.0005, "loss": 2.1139, "step": 161480 }, { "epoch": 0.6146707977132069, "grad_norm": 0.13500213623046875, "learning_rate": 0.0005, "loss": 2.0978, "step": 161490 }, { "epoch": 0.6147088601813296, "grad_norm": 0.12265978753566742, "learning_rate": 0.0005, "loss": 2.1076, "step": 161500 }, { "epoch": 0.6147469226494523, "grad_norm": 0.11898373067378998, "learning_rate": 0.0005, "loss": 2.1165, "step": 161510 }, { "epoch": 0.614784985117575, "grad_norm": 0.12593039870262146, "learning_rate": 0.0005, "loss": 2.0989, "step": 161520 }, { "epoch": 0.6148230475856976, "grad_norm": 0.12925824522972107, "learning_rate": 0.0005, "loss": 2.0987, "step": 161530 }, { "epoch": 0.6148611100538204, "grad_norm": 0.1286146491765976, "learning_rate": 0.0005, "loss": 2.1105, "step": 161540 }, { "epoch": 0.614899172521943, "grad_norm": 0.13130497932434082, "learning_rate": 0.0005, "loss": 2.1072, "step": 161550 }, { "epoch": 0.6149372349900657, "grad_norm": 0.13628366589546204, "learning_rate": 0.0005, "loss": 2.1175, "step": 161560 }, { "epoch": 0.6149752974581884, "grad_norm": 0.12789924442768097, "learning_rate": 0.0005, "loss": 2.0948, "step": 161570 }, { "epoch": 0.615013359926311, "grad_norm": 0.126933291554451, "learning_rate": 0.0005, "loss": 2.1082, "step": 161580 }, { "epoch": 0.6150514223944338, "grad_norm": 0.12284158915281296, "learning_rate": 0.0005, "loss": 2.1169, "step": 161590 }, { "epoch": 0.6150894848625564, "grad_norm": 0.13748809695243835, "learning_rate": 0.0005, "loss": 2.1245, "step": 161600 }, { "epoch": 0.6151275473306791, "grad_norm": 0.12846975028514862, "learning_rate": 0.0005, "loss": 2.1188, "step": 161610 }, { "epoch": 0.6151656097988017, "grad_norm": 0.1149773970246315, "learning_rate": 0.0005, "loss": 2.1102, "step": 161620 }, { "epoch": 0.6152036722669245, "grad_norm": 0.1303306370973587, "learning_rate": 0.0005, "loss": 2.11, "step": 161630 }, { "epoch": 0.6152417347350472, "grad_norm": 0.1264232099056244, "learning_rate": 0.0005, "loss": 2.1226, "step": 161640 }, { "epoch": 0.6152797972031698, "grad_norm": 0.11736659705638885, "learning_rate": 0.0005, "loss": 2.0973, "step": 161650 }, { "epoch": 0.6153178596712925, "grad_norm": 0.13512319326400757, "learning_rate": 0.0005, "loss": 2.116, "step": 161660 }, { "epoch": 0.6153559221394153, "grad_norm": 0.10682762414216995, "learning_rate": 0.0005, "loss": 2.1212, "step": 161670 }, { "epoch": 0.6153939846075379, "grad_norm": 0.10843072831630707, "learning_rate": 0.0005, "loss": 2.1191, "step": 161680 }, { "epoch": 0.6154320470756606, "grad_norm": 0.1175176352262497, "learning_rate": 0.0005, "loss": 2.1074, "step": 161690 }, { "epoch": 0.6154701095437832, "grad_norm": 0.11914224177598953, "learning_rate": 0.0005, "loss": 2.1108, "step": 161700 }, { "epoch": 0.6155081720119059, "grad_norm": 0.11765717715024948, "learning_rate": 0.0005, "loss": 2.1327, "step": 161710 }, { "epoch": 0.6155462344800287, "grad_norm": 0.11849711090326309, "learning_rate": 0.0005, "loss": 2.0947, "step": 161720 }, { "epoch": 0.6155842969481513, "grad_norm": 0.13567321002483368, "learning_rate": 0.0005, "loss": 2.0982, "step": 161730 }, { "epoch": 0.615622359416274, "grad_norm": 0.13653963804244995, "learning_rate": 0.0005, "loss": 2.1137, "step": 161740 }, { "epoch": 0.6156604218843966, "grad_norm": 0.11373693495988846, "learning_rate": 0.0005, "loss": 2.1263, "step": 161750 }, { "epoch": 0.6156984843525194, "grad_norm": 0.1299883872270584, "learning_rate": 0.0005, "loss": 2.1122, "step": 161760 }, { "epoch": 0.615736546820642, "grad_norm": 0.11805561184883118, "learning_rate": 0.0005, "loss": 2.1048, "step": 161770 }, { "epoch": 0.6157746092887647, "grad_norm": 0.12167860567569733, "learning_rate": 0.0005, "loss": 2.1036, "step": 161780 }, { "epoch": 0.6158126717568874, "grad_norm": 0.12831658124923706, "learning_rate": 0.0005, "loss": 2.1183, "step": 161790 }, { "epoch": 0.6158507342250101, "grad_norm": 0.12048164010047913, "learning_rate": 0.0005, "loss": 2.1081, "step": 161800 }, { "epoch": 0.6158887966931328, "grad_norm": 0.12541131675243378, "learning_rate": 0.0005, "loss": 2.114, "step": 161810 }, { "epoch": 0.6159268591612554, "grad_norm": 0.12292610853910446, "learning_rate": 0.0005, "loss": 2.1274, "step": 161820 }, { "epoch": 0.6159649216293781, "grad_norm": 0.11723213642835617, "learning_rate": 0.0005, "loss": 2.103, "step": 161830 }, { "epoch": 0.6160029840975009, "grad_norm": 0.1285085827112198, "learning_rate": 0.0005, "loss": 2.1084, "step": 161840 }, { "epoch": 0.6160410465656235, "grad_norm": 0.12634149193763733, "learning_rate": 0.0005, "loss": 2.1111, "step": 161850 }, { "epoch": 0.6160791090337462, "grad_norm": 0.12730668485164642, "learning_rate": 0.0005, "loss": 2.1117, "step": 161860 }, { "epoch": 0.6161171715018688, "grad_norm": 0.12409865856170654, "learning_rate": 0.0005, "loss": 2.0979, "step": 161870 }, { "epoch": 0.6161552339699915, "grad_norm": 0.4979267716407776, "learning_rate": 0.0005, "loss": 2.0942, "step": 161880 }, { "epoch": 0.6161932964381143, "grad_norm": 0.1151154488325119, "learning_rate": 0.0005, "loss": 2.1098, "step": 161890 }, { "epoch": 0.6162313589062369, "grad_norm": 0.12512674927711487, "learning_rate": 0.0005, "loss": 2.1035, "step": 161900 }, { "epoch": 0.6162694213743596, "grad_norm": 0.12398797273635864, "learning_rate": 0.0005, "loss": 2.1059, "step": 161910 }, { "epoch": 0.6163074838424822, "grad_norm": 0.13985444605350494, "learning_rate": 0.0005, "loss": 2.1108, "step": 161920 }, { "epoch": 0.616345546310605, "grad_norm": 0.1274701952934265, "learning_rate": 0.0005, "loss": 2.1019, "step": 161930 }, { "epoch": 0.6163836087787277, "grad_norm": 0.11744947731494904, "learning_rate": 0.0005, "loss": 2.1242, "step": 161940 }, { "epoch": 0.6164216712468503, "grad_norm": 0.11642337590456009, "learning_rate": 0.0005, "loss": 2.1123, "step": 161950 }, { "epoch": 0.616459733714973, "grad_norm": 0.12301789969205856, "learning_rate": 0.0005, "loss": 2.1089, "step": 161960 }, { "epoch": 0.6164977961830957, "grad_norm": 0.11755318194627762, "learning_rate": 0.0005, "loss": 2.0998, "step": 161970 }, { "epoch": 0.6165358586512184, "grad_norm": 0.12033776938915253, "learning_rate": 0.0005, "loss": 2.1068, "step": 161980 }, { "epoch": 0.6165739211193411, "grad_norm": 0.1159583255648613, "learning_rate": 0.0005, "loss": 2.1265, "step": 161990 }, { "epoch": 0.6166119835874637, "grad_norm": 0.1193026751279831, "learning_rate": 0.0005, "loss": 2.1127, "step": 162000 }, { "epoch": 0.6166500460555864, "grad_norm": 0.13349869847297668, "learning_rate": 0.0005, "loss": 2.0997, "step": 162010 }, { "epoch": 0.6166881085237091, "grad_norm": 0.12275967001914978, "learning_rate": 0.0005, "loss": 2.113, "step": 162020 }, { "epoch": 0.6167261709918318, "grad_norm": 0.11640046536922455, "learning_rate": 0.0005, "loss": 2.1176, "step": 162030 }, { "epoch": 0.6167642334599545, "grad_norm": 0.12108620256185532, "learning_rate": 0.0005, "loss": 2.1107, "step": 162040 }, { "epoch": 0.6168022959280771, "grad_norm": 0.11861438304185867, "learning_rate": 0.0005, "loss": 2.1093, "step": 162050 }, { "epoch": 0.6168403583961999, "grad_norm": 0.12845297157764435, "learning_rate": 0.0005, "loss": 2.1163, "step": 162060 }, { "epoch": 0.6168784208643225, "grad_norm": 0.12152732163667679, "learning_rate": 0.0005, "loss": 2.1085, "step": 162070 }, { "epoch": 0.6169164833324452, "grad_norm": 0.11804987490177155, "learning_rate": 0.0005, "loss": 2.1206, "step": 162080 }, { "epoch": 0.6169545458005679, "grad_norm": 0.12480165809392929, "learning_rate": 0.0005, "loss": 2.1141, "step": 162090 }, { "epoch": 0.6169926082686906, "grad_norm": 0.12539172172546387, "learning_rate": 0.0005, "loss": 2.1075, "step": 162100 }, { "epoch": 0.6170306707368133, "grad_norm": 0.11880171298980713, "learning_rate": 0.0005, "loss": 2.1106, "step": 162110 }, { "epoch": 0.6170687332049359, "grad_norm": 0.11587626487016678, "learning_rate": 0.0005, "loss": 2.1192, "step": 162120 }, { "epoch": 0.6171067956730586, "grad_norm": 0.6760389804840088, "learning_rate": 0.0005, "loss": 2.1056, "step": 162130 }, { "epoch": 0.6171448581411813, "grad_norm": 0.10932061821222305, "learning_rate": 0.0005, "loss": 2.1241, "step": 162140 }, { "epoch": 0.617182920609304, "grad_norm": 0.11558043211698532, "learning_rate": 0.0005, "loss": 2.1021, "step": 162150 }, { "epoch": 0.6172209830774267, "grad_norm": 0.12326429784297943, "learning_rate": 0.0005, "loss": 2.112, "step": 162160 }, { "epoch": 0.6172590455455493, "grad_norm": 0.11882788687944412, "learning_rate": 0.0005, "loss": 2.1035, "step": 162170 }, { "epoch": 0.617297108013672, "grad_norm": 0.11889998614788055, "learning_rate": 0.0005, "loss": 2.1046, "step": 162180 }, { "epoch": 0.6173351704817948, "grad_norm": 0.11966200917959213, "learning_rate": 0.0005, "loss": 2.1133, "step": 162190 }, { "epoch": 0.6173732329499174, "grad_norm": 0.12400029599666595, "learning_rate": 0.0005, "loss": 2.1088, "step": 162200 }, { "epoch": 0.6174112954180401, "grad_norm": 0.13062159717082977, "learning_rate": 0.0005, "loss": 2.1037, "step": 162210 }, { "epoch": 0.6174493578861627, "grad_norm": 0.12579452991485596, "learning_rate": 0.0005, "loss": 2.1147, "step": 162220 }, { "epoch": 0.6174874203542855, "grad_norm": 0.12085752189159393, "learning_rate": 0.0005, "loss": 2.1117, "step": 162230 }, { "epoch": 0.6175254828224082, "grad_norm": 0.1321951448917389, "learning_rate": 0.0005, "loss": 2.1116, "step": 162240 }, { "epoch": 0.6175635452905308, "grad_norm": 0.11965358257293701, "learning_rate": 0.0005, "loss": 2.1075, "step": 162250 }, { "epoch": 0.6176016077586535, "grad_norm": 0.13514268398284912, "learning_rate": 0.0005, "loss": 2.1165, "step": 162260 }, { "epoch": 0.6176396702267762, "grad_norm": 0.12487059831619263, "learning_rate": 0.0005, "loss": 2.1132, "step": 162270 }, { "epoch": 0.6176777326948989, "grad_norm": 0.13903431594371796, "learning_rate": 0.0005, "loss": 2.108, "step": 162280 }, { "epoch": 0.6177157951630216, "grad_norm": 0.11895901709794998, "learning_rate": 0.0005, "loss": 2.1341, "step": 162290 }, { "epoch": 0.6177538576311442, "grad_norm": 0.1182001382112503, "learning_rate": 0.0005, "loss": 2.1133, "step": 162300 }, { "epoch": 0.6177919200992669, "grad_norm": 0.13222895562648773, "learning_rate": 0.0005, "loss": 2.1086, "step": 162310 }, { "epoch": 0.6178299825673896, "grad_norm": 0.13359028100967407, "learning_rate": 0.0005, "loss": 2.0987, "step": 162320 }, { "epoch": 0.6178680450355123, "grad_norm": 0.12804371118545532, "learning_rate": 0.0005, "loss": 2.1114, "step": 162330 }, { "epoch": 0.617906107503635, "grad_norm": 0.11614733189344406, "learning_rate": 0.0005, "loss": 2.1095, "step": 162340 }, { "epoch": 0.6179441699717576, "grad_norm": 0.13432751595973969, "learning_rate": 0.0005, "loss": 2.1175, "step": 162350 }, { "epoch": 0.6179822324398804, "grad_norm": 0.1297280639410019, "learning_rate": 0.0005, "loss": 2.1155, "step": 162360 }, { "epoch": 0.618020294908003, "grad_norm": 0.13260680437088013, "learning_rate": 0.0005, "loss": 2.1112, "step": 162370 }, { "epoch": 0.6180583573761257, "grad_norm": 0.12126235663890839, "learning_rate": 0.0005, "loss": 2.1108, "step": 162380 }, { "epoch": 0.6180964198442483, "grad_norm": 0.11476351320743561, "learning_rate": 0.0005, "loss": 2.1136, "step": 162390 }, { "epoch": 0.6181344823123711, "grad_norm": 0.11820220202207565, "learning_rate": 0.0005, "loss": 2.1116, "step": 162400 }, { "epoch": 0.6181725447804938, "grad_norm": 0.11576348543167114, "learning_rate": 0.0005, "loss": 2.108, "step": 162410 }, { "epoch": 0.6182106072486164, "grad_norm": 0.12855997681617737, "learning_rate": 0.0005, "loss": 2.1121, "step": 162420 }, { "epoch": 0.6182486697167391, "grad_norm": 0.1176319420337677, "learning_rate": 0.0005, "loss": 2.1154, "step": 162430 }, { "epoch": 0.6182867321848617, "grad_norm": 0.12622740864753723, "learning_rate": 0.0005, "loss": 2.1072, "step": 162440 }, { "epoch": 0.6183247946529845, "grad_norm": 0.11863669008016586, "learning_rate": 0.0005, "loss": 2.106, "step": 162450 }, { "epoch": 0.6183628571211072, "grad_norm": 0.1218191608786583, "learning_rate": 0.0005, "loss": 2.1022, "step": 162460 }, { "epoch": 0.6184009195892298, "grad_norm": 0.13541346788406372, "learning_rate": 0.0005, "loss": 2.1145, "step": 162470 }, { "epoch": 0.6184389820573525, "grad_norm": 0.11973481625318527, "learning_rate": 0.0005, "loss": 2.1064, "step": 162480 }, { "epoch": 0.6184770445254753, "grad_norm": 0.12786909937858582, "learning_rate": 0.0005, "loss": 2.1145, "step": 162490 }, { "epoch": 0.6185151069935979, "grad_norm": 0.12716612219810486, "learning_rate": 0.0005, "loss": 2.1027, "step": 162500 }, { "epoch": 0.6185531694617206, "grad_norm": 0.12912815809249878, "learning_rate": 0.0005, "loss": 2.1109, "step": 162510 }, { "epoch": 0.6185912319298432, "grad_norm": 0.14242874085903168, "learning_rate": 0.0005, "loss": 2.128, "step": 162520 }, { "epoch": 0.618629294397966, "grad_norm": 0.12945939600467682, "learning_rate": 0.0005, "loss": 2.1147, "step": 162530 }, { "epoch": 0.6186673568660886, "grad_norm": 0.1209016814827919, "learning_rate": 0.0005, "loss": 2.109, "step": 162540 }, { "epoch": 0.6187054193342113, "grad_norm": 0.1292838752269745, "learning_rate": 0.0005, "loss": 2.1037, "step": 162550 }, { "epoch": 0.618743481802334, "grad_norm": 0.12523207068443298, "learning_rate": 0.0005, "loss": 2.1037, "step": 162560 }, { "epoch": 0.6187815442704566, "grad_norm": 0.12495558708906174, "learning_rate": 0.0005, "loss": 2.1083, "step": 162570 }, { "epoch": 0.6188196067385794, "grad_norm": 0.11377590894699097, "learning_rate": 0.0005, "loss": 2.0985, "step": 162580 }, { "epoch": 0.618857669206702, "grad_norm": 0.12689173221588135, "learning_rate": 0.0005, "loss": 2.1236, "step": 162590 }, { "epoch": 0.6188957316748247, "grad_norm": 0.12863075733184814, "learning_rate": 0.0005, "loss": 2.1083, "step": 162600 }, { "epoch": 0.6189337941429474, "grad_norm": 0.129258930683136, "learning_rate": 0.0005, "loss": 2.1202, "step": 162610 }, { "epoch": 0.6189718566110701, "grad_norm": 0.12370667606592178, "learning_rate": 0.0005, "loss": 2.1004, "step": 162620 }, { "epoch": 0.6190099190791928, "grad_norm": 0.13053497672080994, "learning_rate": 0.0005, "loss": 2.1197, "step": 162630 }, { "epoch": 0.6190479815473154, "grad_norm": 0.11634115129709244, "learning_rate": 0.0005, "loss": 2.1022, "step": 162640 }, { "epoch": 0.6190860440154381, "grad_norm": 0.11391100287437439, "learning_rate": 0.0005, "loss": 2.1101, "step": 162650 }, { "epoch": 0.6191241064835609, "grad_norm": 0.11741019040346146, "learning_rate": 0.0005, "loss": 2.1109, "step": 162660 }, { "epoch": 0.6191621689516835, "grad_norm": 0.11574865132570267, "learning_rate": 0.0005, "loss": 2.1207, "step": 162670 }, { "epoch": 0.6192002314198062, "grad_norm": 0.12010656297206879, "learning_rate": 0.0005, "loss": 2.1068, "step": 162680 }, { "epoch": 0.6192382938879288, "grad_norm": 0.1286909133195877, "learning_rate": 0.0005, "loss": 2.1173, "step": 162690 }, { "epoch": 0.6192763563560516, "grad_norm": 0.11293233931064606, "learning_rate": 0.0005, "loss": 2.1255, "step": 162700 }, { "epoch": 0.6193144188241743, "grad_norm": 0.12448955327272415, "learning_rate": 0.0005, "loss": 2.1126, "step": 162710 }, { "epoch": 0.6193524812922969, "grad_norm": 0.12170399725437164, "learning_rate": 0.0005, "loss": 2.1128, "step": 162720 }, { "epoch": 0.6193905437604196, "grad_norm": 0.11960456520318985, "learning_rate": 0.0005, "loss": 2.1109, "step": 162730 }, { "epoch": 0.6194286062285422, "grad_norm": 0.12298374623060226, "learning_rate": 0.0005, "loss": 2.1111, "step": 162740 }, { "epoch": 0.619466668696665, "grad_norm": 0.12716816365718842, "learning_rate": 0.0005, "loss": 2.1032, "step": 162750 }, { "epoch": 0.6195047311647877, "grad_norm": 0.12332828342914581, "learning_rate": 0.0005, "loss": 2.0954, "step": 162760 }, { "epoch": 0.6195427936329103, "grad_norm": 0.1455685794353485, "learning_rate": 0.0005, "loss": 2.1168, "step": 162770 }, { "epoch": 0.619580856101033, "grad_norm": 0.11270184069871902, "learning_rate": 0.0005, "loss": 2.1164, "step": 162780 }, { "epoch": 0.6196189185691557, "grad_norm": 0.12589813768863678, "learning_rate": 0.0005, "loss": 2.1115, "step": 162790 }, { "epoch": 0.6196569810372784, "grad_norm": 0.11809215694665909, "learning_rate": 0.0005, "loss": 2.1058, "step": 162800 }, { "epoch": 0.619695043505401, "grad_norm": 0.1226392388343811, "learning_rate": 0.0005, "loss": 2.1249, "step": 162810 }, { "epoch": 0.6197331059735237, "grad_norm": 0.12393829971551895, "learning_rate": 0.0005, "loss": 2.1143, "step": 162820 }, { "epoch": 0.6197711684416465, "grad_norm": 0.11937522888183594, "learning_rate": 0.0005, "loss": 2.1301, "step": 162830 }, { "epoch": 0.6198092309097691, "grad_norm": 0.11727029085159302, "learning_rate": 0.0005, "loss": 2.1133, "step": 162840 }, { "epoch": 0.6198472933778918, "grad_norm": 0.1266438364982605, "learning_rate": 0.0005, "loss": 2.115, "step": 162850 }, { "epoch": 0.6198853558460145, "grad_norm": 0.14234566688537598, "learning_rate": 0.0005, "loss": 2.1041, "step": 162860 }, { "epoch": 0.6199234183141371, "grad_norm": 0.12679512798786163, "learning_rate": 0.0005, "loss": 2.1081, "step": 162870 }, { "epoch": 0.6199614807822599, "grad_norm": 0.136153444647789, "learning_rate": 0.0005, "loss": 2.0946, "step": 162880 }, { "epoch": 0.6199995432503825, "grad_norm": 0.12381961196660995, "learning_rate": 0.0005, "loss": 2.0958, "step": 162890 }, { "epoch": 0.6200376057185052, "grad_norm": 0.12256696820259094, "learning_rate": 0.0005, "loss": 2.1035, "step": 162900 }, { "epoch": 0.6200756681866278, "grad_norm": 0.11147227138280869, "learning_rate": 0.0005, "loss": 2.1154, "step": 162910 }, { "epoch": 0.6201137306547506, "grad_norm": 0.11578986793756485, "learning_rate": 0.0005, "loss": 2.1142, "step": 162920 }, { "epoch": 0.6201517931228733, "grad_norm": 0.12968918681144714, "learning_rate": 0.0005, "loss": 2.1062, "step": 162930 }, { "epoch": 0.6201898555909959, "grad_norm": 0.11465263366699219, "learning_rate": 0.0005, "loss": 2.118, "step": 162940 }, { "epoch": 0.6202279180591186, "grad_norm": 0.14826619625091553, "learning_rate": 0.0005, "loss": 2.0986, "step": 162950 }, { "epoch": 0.6202659805272414, "grad_norm": 0.13926471769809723, "learning_rate": 0.0005, "loss": 2.109, "step": 162960 }, { "epoch": 0.620304042995364, "grad_norm": 0.1253323256969452, "learning_rate": 0.0005, "loss": 2.1065, "step": 162970 }, { "epoch": 0.6203421054634867, "grad_norm": 0.12329831719398499, "learning_rate": 0.0005, "loss": 2.1014, "step": 162980 }, { "epoch": 0.6203801679316093, "grad_norm": 0.12464464455842972, "learning_rate": 0.0005, "loss": 2.1194, "step": 162990 }, { "epoch": 0.620418230399732, "grad_norm": 0.11613859236240387, "learning_rate": 0.0005, "loss": 2.1213, "step": 163000 }, { "epoch": 0.6204562928678548, "grad_norm": 0.13070306181907654, "learning_rate": 0.0005, "loss": 2.1133, "step": 163010 }, { "epoch": 0.6204943553359774, "grad_norm": 0.12880989909172058, "learning_rate": 0.0005, "loss": 2.1048, "step": 163020 }, { "epoch": 0.6205324178041001, "grad_norm": 0.1222710907459259, "learning_rate": 0.0005, "loss": 2.1309, "step": 163030 }, { "epoch": 0.6205704802722227, "grad_norm": 0.1197257861495018, "learning_rate": 0.0005, "loss": 2.1172, "step": 163040 }, { "epoch": 0.6206085427403455, "grad_norm": 0.12303168326616287, "learning_rate": 0.0005, "loss": 2.1151, "step": 163050 }, { "epoch": 0.6206466052084681, "grad_norm": 0.11282678693532944, "learning_rate": 0.0005, "loss": 2.1171, "step": 163060 }, { "epoch": 0.6206846676765908, "grad_norm": 0.11966310441493988, "learning_rate": 0.0005, "loss": 2.0977, "step": 163070 }, { "epoch": 0.6207227301447135, "grad_norm": 0.13249096274375916, "learning_rate": 0.0005, "loss": 2.1125, "step": 163080 }, { "epoch": 0.6207607926128362, "grad_norm": 0.1273467242717743, "learning_rate": 0.0005, "loss": 2.1018, "step": 163090 }, { "epoch": 0.6207988550809589, "grad_norm": 0.14432646334171295, "learning_rate": 0.0005, "loss": 2.1093, "step": 163100 }, { "epoch": 0.6208369175490815, "grad_norm": 0.1329868584871292, "learning_rate": 0.0005, "loss": 2.1164, "step": 163110 }, { "epoch": 0.6208749800172042, "grad_norm": 0.13333286345005035, "learning_rate": 0.0005, "loss": 2.1104, "step": 163120 }, { "epoch": 0.620913042485327, "grad_norm": 0.15608209371566772, "learning_rate": 0.0005, "loss": 2.1115, "step": 163130 }, { "epoch": 0.6209511049534496, "grad_norm": 0.11932935565710068, "learning_rate": 0.0005, "loss": 2.0991, "step": 163140 }, { "epoch": 0.6209891674215723, "grad_norm": 0.12438883632421494, "learning_rate": 0.0005, "loss": 2.1009, "step": 163150 }, { "epoch": 0.6210272298896949, "grad_norm": 0.12361640483140945, "learning_rate": 0.0005, "loss": 2.1085, "step": 163160 }, { "epoch": 0.6210652923578176, "grad_norm": 0.11473851650953293, "learning_rate": 0.0005, "loss": 2.099, "step": 163170 }, { "epoch": 0.6211033548259404, "grad_norm": 0.12637248635292053, "learning_rate": 0.0005, "loss": 2.1105, "step": 163180 }, { "epoch": 0.621141417294063, "grad_norm": 0.13711248338222504, "learning_rate": 0.0005, "loss": 2.1088, "step": 163190 }, { "epoch": 0.6211794797621857, "grad_norm": 0.12292785942554474, "learning_rate": 0.0005, "loss": 2.105, "step": 163200 }, { "epoch": 0.6212175422303083, "grad_norm": 0.13030977547168732, "learning_rate": 0.0005, "loss": 2.102, "step": 163210 }, { "epoch": 0.6212556046984311, "grad_norm": 0.130666121840477, "learning_rate": 0.0005, "loss": 2.0948, "step": 163220 }, { "epoch": 0.6212936671665538, "grad_norm": 0.12032100558280945, "learning_rate": 0.0005, "loss": 2.1163, "step": 163230 }, { "epoch": 0.6213317296346764, "grad_norm": 0.11286701261997223, "learning_rate": 0.0005, "loss": 2.1101, "step": 163240 }, { "epoch": 0.6213697921027991, "grad_norm": 0.1288653463125229, "learning_rate": 0.0005, "loss": 2.1098, "step": 163250 }, { "epoch": 0.6214078545709218, "grad_norm": 0.11776786297559738, "learning_rate": 0.0005, "loss": 2.1043, "step": 163260 }, { "epoch": 0.6214459170390445, "grad_norm": 0.12605293095111847, "learning_rate": 0.0005, "loss": 2.1322, "step": 163270 }, { "epoch": 0.6214839795071672, "grad_norm": 0.11133257299661636, "learning_rate": 0.0005, "loss": 2.1115, "step": 163280 }, { "epoch": 0.6215220419752898, "grad_norm": 0.1279788464307785, "learning_rate": 0.0005, "loss": 2.1264, "step": 163290 }, { "epoch": 0.6215601044434125, "grad_norm": 0.13132244348526, "learning_rate": 0.0005, "loss": 2.1092, "step": 163300 }, { "epoch": 0.6215981669115352, "grad_norm": 0.12739558517932892, "learning_rate": 0.0005, "loss": 2.0894, "step": 163310 }, { "epoch": 0.6216362293796579, "grad_norm": 0.1202457919716835, "learning_rate": 0.0005, "loss": 2.1098, "step": 163320 }, { "epoch": 0.6216742918477806, "grad_norm": 0.12516264617443085, "learning_rate": 0.0005, "loss": 2.119, "step": 163330 }, { "epoch": 0.6217123543159032, "grad_norm": 0.11494667083024979, "learning_rate": 0.0005, "loss": 2.1162, "step": 163340 }, { "epoch": 0.621750416784026, "grad_norm": 0.12559252977371216, "learning_rate": 0.0005, "loss": 2.11, "step": 163350 }, { "epoch": 0.6217884792521486, "grad_norm": 0.12517182528972626, "learning_rate": 0.0005, "loss": 2.116, "step": 163360 }, { "epoch": 0.6218265417202713, "grad_norm": 0.1260019838809967, "learning_rate": 0.0005, "loss": 2.109, "step": 163370 }, { "epoch": 0.621864604188394, "grad_norm": 0.11948941648006439, "learning_rate": 0.0005, "loss": 2.1029, "step": 163380 }, { "epoch": 0.6219026666565167, "grad_norm": 0.12022241204977036, "learning_rate": 0.0005, "loss": 2.1137, "step": 163390 }, { "epoch": 0.6219407291246394, "grad_norm": 0.11221758276224136, "learning_rate": 0.0005, "loss": 2.1013, "step": 163400 }, { "epoch": 0.621978791592762, "grad_norm": 0.12240494042634964, "learning_rate": 0.0005, "loss": 2.1028, "step": 163410 }, { "epoch": 0.6220168540608847, "grad_norm": 0.12656979262828827, "learning_rate": 0.0005, "loss": 2.095, "step": 163420 }, { "epoch": 0.6220549165290075, "grad_norm": 0.12262611091136932, "learning_rate": 0.0005, "loss": 2.1334, "step": 163430 }, { "epoch": 0.6220929789971301, "grad_norm": 0.12745757400989532, "learning_rate": 0.0005, "loss": 2.1098, "step": 163440 }, { "epoch": 0.6221310414652528, "grad_norm": 0.12522058188915253, "learning_rate": 0.0005, "loss": 2.114, "step": 163450 }, { "epoch": 0.6221691039333754, "grad_norm": 0.1248687207698822, "learning_rate": 0.0005, "loss": 2.1088, "step": 163460 }, { "epoch": 0.6222071664014981, "grad_norm": 0.13304556906223297, "learning_rate": 0.0005, "loss": 2.1246, "step": 163470 }, { "epoch": 0.6222452288696209, "grad_norm": 0.12558689713478088, "learning_rate": 0.0005, "loss": 2.1102, "step": 163480 }, { "epoch": 0.6222832913377435, "grad_norm": 0.11721085011959076, "learning_rate": 0.0005, "loss": 2.1031, "step": 163490 }, { "epoch": 0.6223213538058662, "grad_norm": 0.11873149126768112, "learning_rate": 0.0005, "loss": 2.1289, "step": 163500 }, { "epoch": 0.6223594162739888, "grad_norm": 0.1278821974992752, "learning_rate": 0.0005, "loss": 2.1133, "step": 163510 }, { "epoch": 0.6223974787421116, "grad_norm": 0.13227632641792297, "learning_rate": 0.0005, "loss": 2.1286, "step": 163520 }, { "epoch": 0.6224355412102343, "grad_norm": 0.12484121322631836, "learning_rate": 0.0005, "loss": 2.1025, "step": 163530 }, { "epoch": 0.6224736036783569, "grad_norm": 0.1308179348707199, "learning_rate": 0.0005, "loss": 2.1212, "step": 163540 }, { "epoch": 0.6225116661464796, "grad_norm": 0.12707605957984924, "learning_rate": 0.0005, "loss": 2.1039, "step": 163550 }, { "epoch": 0.6225497286146023, "grad_norm": 0.12090172618627548, "learning_rate": 0.0005, "loss": 2.1226, "step": 163560 }, { "epoch": 0.622587791082725, "grad_norm": 0.16413117945194244, "learning_rate": 0.0005, "loss": 2.0984, "step": 163570 }, { "epoch": 0.6226258535508477, "grad_norm": 0.13108348846435547, "learning_rate": 0.0005, "loss": 2.1053, "step": 163580 }, { "epoch": 0.6226639160189703, "grad_norm": 0.11516100913286209, "learning_rate": 0.0005, "loss": 2.093, "step": 163590 }, { "epoch": 0.622701978487093, "grad_norm": 0.12196829169988632, "learning_rate": 0.0005, "loss": 2.103, "step": 163600 }, { "epoch": 0.6227400409552157, "grad_norm": 0.11693855375051498, "learning_rate": 0.0005, "loss": 2.1151, "step": 163610 }, { "epoch": 0.6227781034233384, "grad_norm": 0.12894059717655182, "learning_rate": 0.0005, "loss": 2.1119, "step": 163620 }, { "epoch": 0.622816165891461, "grad_norm": 0.12001071870326996, "learning_rate": 0.0005, "loss": 2.1105, "step": 163630 }, { "epoch": 0.6228542283595837, "grad_norm": 0.11836356669664383, "learning_rate": 0.0005, "loss": 2.1123, "step": 163640 }, { "epoch": 0.6228922908277065, "grad_norm": 0.11794774234294891, "learning_rate": 0.0005, "loss": 2.0995, "step": 163650 }, { "epoch": 0.6229303532958291, "grad_norm": 0.13065387308597565, "learning_rate": 0.0005, "loss": 2.0968, "step": 163660 }, { "epoch": 0.6229684157639518, "grad_norm": 0.12129966914653778, "learning_rate": 0.0005, "loss": 2.1238, "step": 163670 }, { "epoch": 0.6230064782320744, "grad_norm": 0.11834661662578583, "learning_rate": 0.0005, "loss": 2.1143, "step": 163680 }, { "epoch": 0.6230445407001972, "grad_norm": 0.12051723152399063, "learning_rate": 0.0005, "loss": 2.1246, "step": 163690 }, { "epoch": 0.6230826031683199, "grad_norm": 0.12891320884227753, "learning_rate": 0.0005, "loss": 2.1033, "step": 163700 }, { "epoch": 0.6231206656364425, "grad_norm": 0.11517681926488876, "learning_rate": 0.0005, "loss": 2.1075, "step": 163710 }, { "epoch": 0.6231587281045652, "grad_norm": 0.12965747714042664, "learning_rate": 0.0005, "loss": 2.113, "step": 163720 }, { "epoch": 0.6231967905726878, "grad_norm": 0.11559164524078369, "learning_rate": 0.0005, "loss": 2.131, "step": 163730 }, { "epoch": 0.6232348530408106, "grad_norm": 0.1317571997642517, "learning_rate": 0.0005, "loss": 2.0897, "step": 163740 }, { "epoch": 0.6232729155089333, "grad_norm": 0.11871747672557831, "learning_rate": 0.0005, "loss": 2.1185, "step": 163750 }, { "epoch": 0.6233109779770559, "grad_norm": 0.12817269563674927, "learning_rate": 0.0005, "loss": 2.1153, "step": 163760 }, { "epoch": 0.6233490404451786, "grad_norm": 0.11896772682666779, "learning_rate": 0.0005, "loss": 2.1233, "step": 163770 }, { "epoch": 0.6233871029133013, "grad_norm": 0.13990911841392517, "learning_rate": 0.0005, "loss": 2.1188, "step": 163780 }, { "epoch": 0.623425165381424, "grad_norm": 0.12218964099884033, "learning_rate": 0.0005, "loss": 2.1136, "step": 163790 }, { "epoch": 0.6234632278495467, "grad_norm": 0.1254860758781433, "learning_rate": 0.0005, "loss": 2.116, "step": 163800 }, { "epoch": 0.6235012903176693, "grad_norm": 0.12563014030456543, "learning_rate": 0.0005, "loss": 2.1001, "step": 163810 }, { "epoch": 0.6235393527857921, "grad_norm": 0.12120439857244492, "learning_rate": 0.0005, "loss": 2.1159, "step": 163820 }, { "epoch": 0.6235774152539147, "grad_norm": 0.1312512308359146, "learning_rate": 0.0005, "loss": 2.1041, "step": 163830 }, { "epoch": 0.6236154777220374, "grad_norm": 0.15499520301818848, "learning_rate": 0.0005, "loss": 2.1251, "step": 163840 }, { "epoch": 0.6236535401901601, "grad_norm": 0.126829132437706, "learning_rate": 0.0005, "loss": 2.1206, "step": 163850 }, { "epoch": 0.6236916026582828, "grad_norm": 0.1292577087879181, "learning_rate": 0.0005, "loss": 2.1005, "step": 163860 }, { "epoch": 0.6237296651264055, "grad_norm": 0.12449748814105988, "learning_rate": 0.0005, "loss": 2.1025, "step": 163870 }, { "epoch": 0.6237677275945281, "grad_norm": 0.12957262992858887, "learning_rate": 0.0005, "loss": 2.11, "step": 163880 }, { "epoch": 0.6238057900626508, "grad_norm": 0.12353445589542389, "learning_rate": 0.0005, "loss": 2.126, "step": 163890 }, { "epoch": 0.6238438525307735, "grad_norm": 0.12337320297956467, "learning_rate": 0.0005, "loss": 2.1134, "step": 163900 }, { "epoch": 0.6238819149988962, "grad_norm": 0.12145992368459702, "learning_rate": 0.0005, "loss": 2.1171, "step": 163910 }, { "epoch": 0.6239199774670189, "grad_norm": 0.11696632206439972, "learning_rate": 0.0005, "loss": 2.1118, "step": 163920 }, { "epoch": 0.6239580399351415, "grad_norm": 0.12449745833873749, "learning_rate": 0.0005, "loss": 2.117, "step": 163930 }, { "epoch": 0.6239961024032642, "grad_norm": 0.11991845816373825, "learning_rate": 0.0005, "loss": 2.1048, "step": 163940 }, { "epoch": 0.624034164871387, "grad_norm": 0.11270780861377716, "learning_rate": 0.0005, "loss": 2.1092, "step": 163950 }, { "epoch": 0.6240722273395096, "grad_norm": 0.11561381071805954, "learning_rate": 0.0005, "loss": 2.1172, "step": 163960 }, { "epoch": 0.6241102898076323, "grad_norm": 0.12485513836145401, "learning_rate": 0.0005, "loss": 2.1055, "step": 163970 }, { "epoch": 0.6241483522757549, "grad_norm": 0.12313344329595566, "learning_rate": 0.0005, "loss": 2.1062, "step": 163980 }, { "epoch": 0.6241864147438777, "grad_norm": 0.13852645456790924, "learning_rate": 0.0005, "loss": 2.1129, "step": 163990 }, { "epoch": 0.6242244772120004, "grad_norm": 0.13669776916503906, "learning_rate": 0.0005, "loss": 2.1085, "step": 164000 }, { "epoch": 0.624262539680123, "grad_norm": 0.12754948437213898, "learning_rate": 0.0005, "loss": 2.1113, "step": 164010 }, { "epoch": 0.6243006021482457, "grad_norm": 0.12182336300611496, "learning_rate": 0.0005, "loss": 2.0984, "step": 164020 }, { "epoch": 0.6243386646163683, "grad_norm": 0.13123556971549988, "learning_rate": 0.0005, "loss": 2.0986, "step": 164030 }, { "epoch": 0.6243767270844911, "grad_norm": 0.13097603619098663, "learning_rate": 0.0005, "loss": 2.1174, "step": 164040 }, { "epoch": 0.6244147895526138, "grad_norm": 0.12931232154369354, "learning_rate": 0.0005, "loss": 2.1072, "step": 164050 }, { "epoch": 0.6244528520207364, "grad_norm": 0.12148045003414154, "learning_rate": 0.0005, "loss": 2.1293, "step": 164060 }, { "epoch": 0.6244909144888591, "grad_norm": 0.11986979097127914, "learning_rate": 0.0005, "loss": 2.1182, "step": 164070 }, { "epoch": 0.6245289769569818, "grad_norm": 0.12932519614696503, "learning_rate": 0.0005, "loss": 2.0999, "step": 164080 }, { "epoch": 0.6245670394251045, "grad_norm": 0.11838708072900772, "learning_rate": 0.0005, "loss": 2.1002, "step": 164090 }, { "epoch": 0.6246051018932272, "grad_norm": 0.13119414448738098, "learning_rate": 0.0005, "loss": 2.1163, "step": 164100 }, { "epoch": 0.6246431643613498, "grad_norm": 0.1321251541376114, "learning_rate": 0.0005, "loss": 2.1165, "step": 164110 }, { "epoch": 0.6246812268294726, "grad_norm": 0.11741336435079575, "learning_rate": 0.0005, "loss": 2.1147, "step": 164120 }, { "epoch": 0.6247192892975952, "grad_norm": 0.12370091676712036, "learning_rate": 0.0005, "loss": 2.1381, "step": 164130 }, { "epoch": 0.6247573517657179, "grad_norm": 0.12821203470230103, "learning_rate": 0.0005, "loss": 2.1027, "step": 164140 }, { "epoch": 0.6247954142338406, "grad_norm": 0.1400570273399353, "learning_rate": 0.0005, "loss": 2.1056, "step": 164150 }, { "epoch": 0.6248334767019632, "grad_norm": 0.12167726457118988, "learning_rate": 0.0005, "loss": 2.122, "step": 164160 }, { "epoch": 0.624871539170086, "grad_norm": 0.11930079758167267, "learning_rate": 0.0005, "loss": 2.1142, "step": 164170 }, { "epoch": 0.6249096016382086, "grad_norm": 0.14793331921100616, "learning_rate": 0.0005, "loss": 2.1357, "step": 164180 }, { "epoch": 0.6249476641063313, "grad_norm": 0.13453777134418488, "learning_rate": 0.0005, "loss": 2.1089, "step": 164190 }, { "epoch": 0.624985726574454, "grad_norm": 0.13323485851287842, "learning_rate": 0.0005, "loss": 2.1213, "step": 164200 }, { "epoch": 0.6250237890425767, "grad_norm": 0.12917537987232208, "learning_rate": 0.0005, "loss": 2.116, "step": 164210 }, { "epoch": 0.6250618515106994, "grad_norm": 0.1289193481206894, "learning_rate": 0.0005, "loss": 2.112, "step": 164220 }, { "epoch": 0.625099913978822, "grad_norm": 0.1279052197933197, "learning_rate": 0.0005, "loss": 2.1183, "step": 164230 }, { "epoch": 0.6251379764469447, "grad_norm": 0.13927242159843445, "learning_rate": 0.0005, "loss": 2.1076, "step": 164240 }, { "epoch": 0.6251760389150675, "grad_norm": 0.1160060465335846, "learning_rate": 0.0005, "loss": 2.0902, "step": 164250 }, { "epoch": 0.6252141013831901, "grad_norm": 0.1169615238904953, "learning_rate": 0.0005, "loss": 2.1107, "step": 164260 }, { "epoch": 0.6252521638513128, "grad_norm": 0.13549870252609253, "learning_rate": 0.0005, "loss": 2.1011, "step": 164270 }, { "epoch": 0.6252902263194354, "grad_norm": 0.13748615980148315, "learning_rate": 0.0005, "loss": 2.1076, "step": 164280 }, { "epoch": 0.6253282887875582, "grad_norm": 0.11843782663345337, "learning_rate": 0.0005, "loss": 2.1016, "step": 164290 }, { "epoch": 0.6253663512556809, "grad_norm": 0.12487849593162537, "learning_rate": 0.0005, "loss": 2.1061, "step": 164300 }, { "epoch": 0.6254044137238035, "grad_norm": 0.1206541433930397, "learning_rate": 0.0005, "loss": 2.1275, "step": 164310 }, { "epoch": 0.6254424761919262, "grad_norm": 0.12133076786994934, "learning_rate": 0.0005, "loss": 2.1084, "step": 164320 }, { "epoch": 0.6254805386600488, "grad_norm": 0.12979364395141602, "learning_rate": 0.0005, "loss": 2.1141, "step": 164330 }, { "epoch": 0.6255186011281716, "grad_norm": 0.11954282969236374, "learning_rate": 0.0005, "loss": 2.1073, "step": 164340 }, { "epoch": 0.6255566635962942, "grad_norm": 0.11928258091211319, "learning_rate": 0.0005, "loss": 2.1136, "step": 164350 }, { "epoch": 0.6255947260644169, "grad_norm": 0.11903034895658493, "learning_rate": 0.0005, "loss": 2.1115, "step": 164360 }, { "epoch": 0.6256327885325396, "grad_norm": 0.1315557211637497, "learning_rate": 0.0005, "loss": 2.1325, "step": 164370 }, { "epoch": 0.6256708510006623, "grad_norm": 0.12095145136117935, "learning_rate": 0.0005, "loss": 2.102, "step": 164380 }, { "epoch": 0.625708913468785, "grad_norm": 0.12724807858467102, "learning_rate": 0.0005, "loss": 2.1018, "step": 164390 }, { "epoch": 0.6257469759369076, "grad_norm": 0.1404864639043808, "learning_rate": 0.0005, "loss": 2.1113, "step": 164400 }, { "epoch": 0.6257850384050303, "grad_norm": 0.13892942667007446, "learning_rate": 0.0005, "loss": 2.1022, "step": 164410 }, { "epoch": 0.6258231008731531, "grad_norm": 0.133827805519104, "learning_rate": 0.0005, "loss": 2.1038, "step": 164420 }, { "epoch": 0.6258611633412757, "grad_norm": 0.11841180175542831, "learning_rate": 0.0005, "loss": 2.1171, "step": 164430 }, { "epoch": 0.6258992258093984, "grad_norm": 0.1325208693742752, "learning_rate": 0.0005, "loss": 2.1221, "step": 164440 }, { "epoch": 0.625937288277521, "grad_norm": 0.1317589282989502, "learning_rate": 0.0005, "loss": 2.1104, "step": 164450 }, { "epoch": 0.6259753507456437, "grad_norm": 0.126037135720253, "learning_rate": 0.0005, "loss": 2.1138, "step": 164460 }, { "epoch": 0.6260134132137665, "grad_norm": 0.13882552087306976, "learning_rate": 0.0005, "loss": 2.1242, "step": 164470 }, { "epoch": 0.6260514756818891, "grad_norm": 0.1219392642378807, "learning_rate": 0.0005, "loss": 2.1131, "step": 164480 }, { "epoch": 0.6260895381500118, "grad_norm": 0.12018023431301117, "learning_rate": 0.0005, "loss": 2.1105, "step": 164490 }, { "epoch": 0.6261276006181344, "grad_norm": 0.11514944583177567, "learning_rate": 0.0005, "loss": 2.1139, "step": 164500 }, { "epoch": 0.6261656630862572, "grad_norm": 0.1256149411201477, "learning_rate": 0.0005, "loss": 2.1194, "step": 164510 }, { "epoch": 0.6262037255543799, "grad_norm": 0.1267387866973877, "learning_rate": 0.0005, "loss": 2.0985, "step": 164520 }, { "epoch": 0.6262417880225025, "grad_norm": 0.12900102138519287, "learning_rate": 0.0005, "loss": 2.1088, "step": 164530 }, { "epoch": 0.6262798504906252, "grad_norm": 0.1457938253879547, "learning_rate": 0.0005, "loss": 2.1078, "step": 164540 }, { "epoch": 0.626317912958748, "grad_norm": 0.12686443328857422, "learning_rate": 0.0005, "loss": 2.1156, "step": 164550 }, { "epoch": 0.6263559754268706, "grad_norm": 0.14455707371234894, "learning_rate": 0.0005, "loss": 2.119, "step": 164560 }, { "epoch": 0.6263940378949933, "grad_norm": 0.1281202733516693, "learning_rate": 0.0005, "loss": 2.111, "step": 164570 }, { "epoch": 0.6264321003631159, "grad_norm": 0.1168116107583046, "learning_rate": 0.0005, "loss": 2.127, "step": 164580 }, { "epoch": 0.6264701628312386, "grad_norm": 0.12170332670211792, "learning_rate": 0.0005, "loss": 2.1217, "step": 164590 }, { "epoch": 0.6265082252993613, "grad_norm": 0.11665555834770203, "learning_rate": 0.0005, "loss": 2.1043, "step": 164600 }, { "epoch": 0.626546287767484, "grad_norm": 0.12107517570257187, "learning_rate": 0.0005, "loss": 2.1009, "step": 164610 }, { "epoch": 0.6265843502356067, "grad_norm": 0.1253291517496109, "learning_rate": 0.0005, "loss": 2.1077, "step": 164620 }, { "epoch": 0.6266224127037293, "grad_norm": 0.12508216500282288, "learning_rate": 0.0005, "loss": 2.1075, "step": 164630 }, { "epoch": 0.6266604751718521, "grad_norm": 0.1261623650789261, "learning_rate": 0.0005, "loss": 2.1086, "step": 164640 }, { "epoch": 0.6266985376399747, "grad_norm": 0.13363027572631836, "learning_rate": 0.0005, "loss": 2.1088, "step": 164650 }, { "epoch": 0.6267366001080974, "grad_norm": 0.11288081854581833, "learning_rate": 0.0005, "loss": 2.108, "step": 164660 }, { "epoch": 0.62677466257622, "grad_norm": 0.12379693239927292, "learning_rate": 0.0005, "loss": 2.1168, "step": 164670 }, { "epoch": 0.6268127250443428, "grad_norm": 0.11657523363828659, "learning_rate": 0.0005, "loss": 2.1257, "step": 164680 }, { "epoch": 0.6268507875124655, "grad_norm": 0.11529957503080368, "learning_rate": 0.0005, "loss": 2.111, "step": 164690 }, { "epoch": 0.6268888499805881, "grad_norm": 0.19511523842811584, "learning_rate": 0.0005, "loss": 2.092, "step": 164700 }, { "epoch": 0.6269269124487108, "grad_norm": 0.13535825908184052, "learning_rate": 0.0005, "loss": 2.111, "step": 164710 }, { "epoch": 0.6269649749168336, "grad_norm": 0.12043019384145737, "learning_rate": 0.0005, "loss": 2.1121, "step": 164720 }, { "epoch": 0.6270030373849562, "grad_norm": 0.11477592587471008, "learning_rate": 0.0005, "loss": 2.125, "step": 164730 }, { "epoch": 0.6270410998530789, "grad_norm": 0.11728215217590332, "learning_rate": 0.0005, "loss": 2.1102, "step": 164740 }, { "epoch": 0.6270791623212015, "grad_norm": 0.12050362676382065, "learning_rate": 0.0005, "loss": 2.1162, "step": 164750 }, { "epoch": 0.6271172247893242, "grad_norm": 0.13324016332626343, "learning_rate": 0.0005, "loss": 2.1032, "step": 164760 }, { "epoch": 0.627155287257447, "grad_norm": 0.1299990713596344, "learning_rate": 0.0005, "loss": 2.1104, "step": 164770 }, { "epoch": 0.6271933497255696, "grad_norm": 0.12177756428718567, "learning_rate": 0.0005, "loss": 2.1195, "step": 164780 }, { "epoch": 0.6272314121936923, "grad_norm": 0.12219898402690887, "learning_rate": 0.0005, "loss": 2.1032, "step": 164790 }, { "epoch": 0.6272694746618149, "grad_norm": 0.1313663274049759, "learning_rate": 0.0005, "loss": 2.118, "step": 164800 }, { "epoch": 0.6273075371299377, "grad_norm": 0.12362924218177795, "learning_rate": 0.0005, "loss": 2.1088, "step": 164810 }, { "epoch": 0.6273455995980604, "grad_norm": 0.1263677179813385, "learning_rate": 0.0005, "loss": 2.1222, "step": 164820 }, { "epoch": 0.627383662066183, "grad_norm": 0.12366585433483124, "learning_rate": 0.0005, "loss": 2.1113, "step": 164830 }, { "epoch": 0.6274217245343057, "grad_norm": 0.18121863901615143, "learning_rate": 0.0005, "loss": 2.1101, "step": 164840 }, { "epoch": 0.6274597870024284, "grad_norm": 0.12810751795768738, "learning_rate": 0.0005, "loss": 2.1104, "step": 164850 }, { "epoch": 0.6274978494705511, "grad_norm": 0.11524245142936707, "learning_rate": 0.0005, "loss": 2.1164, "step": 164860 }, { "epoch": 0.6275359119386738, "grad_norm": 0.1260351538658142, "learning_rate": 0.0005, "loss": 2.1078, "step": 164870 }, { "epoch": 0.6275739744067964, "grad_norm": 0.1230417788028717, "learning_rate": 0.0005, "loss": 2.1131, "step": 164880 }, { "epoch": 0.6276120368749191, "grad_norm": 0.13061945140361786, "learning_rate": 0.0005, "loss": 2.1121, "step": 164890 }, { "epoch": 0.6276500993430418, "grad_norm": 0.13039201498031616, "learning_rate": 0.0005, "loss": 2.0959, "step": 164900 }, { "epoch": 0.6276881618111645, "grad_norm": 0.12625457346439362, "learning_rate": 0.0005, "loss": 2.1191, "step": 164910 }, { "epoch": 0.6277262242792871, "grad_norm": 0.11405977606773376, "learning_rate": 0.0005, "loss": 2.104, "step": 164920 }, { "epoch": 0.6277642867474098, "grad_norm": 0.12500326335430145, "learning_rate": 0.0005, "loss": 2.1031, "step": 164930 }, { "epoch": 0.6278023492155326, "grad_norm": 0.12264113873243332, "learning_rate": 0.0005, "loss": 2.1049, "step": 164940 }, { "epoch": 0.6278404116836552, "grad_norm": 0.1172245517373085, "learning_rate": 0.0005, "loss": 2.1214, "step": 164950 }, { "epoch": 0.6278784741517779, "grad_norm": 0.13103266060352325, "learning_rate": 0.0005, "loss": 2.1135, "step": 164960 }, { "epoch": 0.6279165366199005, "grad_norm": 0.12313253432512283, "learning_rate": 0.0005, "loss": 2.1131, "step": 164970 }, { "epoch": 0.6279545990880233, "grad_norm": 0.1311279535293579, "learning_rate": 0.0005, "loss": 2.1044, "step": 164980 }, { "epoch": 0.627992661556146, "grad_norm": 0.11780136823654175, "learning_rate": 0.0005, "loss": 2.1026, "step": 164990 }, { "epoch": 0.6280307240242686, "grad_norm": 0.127878338098526, "learning_rate": 0.0005, "loss": 2.1107, "step": 165000 }, { "epoch": 0.6280687864923913, "grad_norm": 0.10879893600940704, "learning_rate": 0.0005, "loss": 2.1088, "step": 165010 }, { "epoch": 0.6281068489605139, "grad_norm": 0.12845277786254883, "learning_rate": 0.0005, "loss": 2.1113, "step": 165020 }, { "epoch": 0.6281449114286367, "grad_norm": 0.1244046613574028, "learning_rate": 0.0005, "loss": 2.109, "step": 165030 }, { "epoch": 0.6281829738967594, "grad_norm": 0.1285630613565445, "learning_rate": 0.0005, "loss": 2.1223, "step": 165040 }, { "epoch": 0.628221036364882, "grad_norm": 0.13964420557022095, "learning_rate": 0.0005, "loss": 2.1111, "step": 165050 }, { "epoch": 0.6282590988330047, "grad_norm": 0.13453397154808044, "learning_rate": 0.0005, "loss": 2.0991, "step": 165060 }, { "epoch": 0.6282971613011274, "grad_norm": 0.1252523958683014, "learning_rate": 0.0005, "loss": 2.1028, "step": 165070 }, { "epoch": 0.6283352237692501, "grad_norm": 0.12090957909822464, "learning_rate": 0.0005, "loss": 2.098, "step": 165080 }, { "epoch": 0.6283732862373728, "grad_norm": 0.11995670199394226, "learning_rate": 0.0005, "loss": 2.1171, "step": 165090 }, { "epoch": 0.6284113487054954, "grad_norm": 0.12850458920001984, "learning_rate": 0.0005, "loss": 2.1108, "step": 165100 }, { "epoch": 0.6284494111736182, "grad_norm": 0.1120951920747757, "learning_rate": 0.0005, "loss": 2.1052, "step": 165110 }, { "epoch": 0.6284874736417408, "grad_norm": 0.11967871338129044, "learning_rate": 0.0005, "loss": 2.1117, "step": 165120 }, { "epoch": 0.6285255361098635, "grad_norm": 0.14278464019298553, "learning_rate": 0.0005, "loss": 2.1224, "step": 165130 }, { "epoch": 0.6285635985779862, "grad_norm": 0.12599928677082062, "learning_rate": 0.0005, "loss": 2.1023, "step": 165140 }, { "epoch": 0.6286016610461089, "grad_norm": 0.12482509016990662, "learning_rate": 0.0005, "loss": 2.1043, "step": 165150 }, { "epoch": 0.6286397235142316, "grad_norm": 0.12580351531505585, "learning_rate": 0.0005, "loss": 2.1113, "step": 165160 }, { "epoch": 0.6286777859823542, "grad_norm": 0.13060778379440308, "learning_rate": 0.0005, "loss": 2.1074, "step": 165170 }, { "epoch": 0.6287158484504769, "grad_norm": 0.11947949230670929, "learning_rate": 0.0005, "loss": 2.1122, "step": 165180 }, { "epoch": 0.6287539109185996, "grad_norm": 0.11321130394935608, "learning_rate": 0.0005, "loss": 2.109, "step": 165190 }, { "epoch": 0.6287919733867223, "grad_norm": 0.12642672657966614, "learning_rate": 0.0005, "loss": 2.1154, "step": 165200 }, { "epoch": 0.628830035854845, "grad_norm": 0.1435793936252594, "learning_rate": 0.0005, "loss": 2.0962, "step": 165210 }, { "epoch": 0.6288680983229676, "grad_norm": 0.1154685840010643, "learning_rate": 0.0005, "loss": 2.1122, "step": 165220 }, { "epoch": 0.6289061607910903, "grad_norm": 0.11487980931997299, "learning_rate": 0.0005, "loss": 2.1214, "step": 165230 }, { "epoch": 0.6289442232592131, "grad_norm": 0.11569257825613022, "learning_rate": 0.0005, "loss": 2.1217, "step": 165240 }, { "epoch": 0.6289822857273357, "grad_norm": 0.11872398853302002, "learning_rate": 0.0005, "loss": 2.1169, "step": 165250 }, { "epoch": 0.6290203481954584, "grad_norm": 0.13011687994003296, "learning_rate": 0.0005, "loss": 2.0994, "step": 165260 }, { "epoch": 0.629058410663581, "grad_norm": 0.129885733127594, "learning_rate": 0.0005, "loss": 2.1117, "step": 165270 }, { "epoch": 0.6290964731317038, "grad_norm": 0.12594501674175262, "learning_rate": 0.0005, "loss": 2.0879, "step": 165280 }, { "epoch": 0.6291345355998265, "grad_norm": 0.12376669049263, "learning_rate": 0.0005, "loss": 2.1069, "step": 165290 }, { "epoch": 0.6291725980679491, "grad_norm": 0.12542007863521576, "learning_rate": 0.0005, "loss": 2.1016, "step": 165300 }, { "epoch": 0.6292106605360718, "grad_norm": 0.12012087553739548, "learning_rate": 0.0005, "loss": 2.1198, "step": 165310 }, { "epoch": 0.6292487230041944, "grad_norm": 0.1414281576871872, "learning_rate": 0.0005, "loss": 2.0979, "step": 165320 }, { "epoch": 0.6292867854723172, "grad_norm": 0.13443519175052643, "learning_rate": 0.0005, "loss": 2.1073, "step": 165330 }, { "epoch": 0.6293248479404399, "grad_norm": 0.11794332414865494, "learning_rate": 0.0005, "loss": 2.1182, "step": 165340 }, { "epoch": 0.6293629104085625, "grad_norm": 0.1224747747182846, "learning_rate": 0.0005, "loss": 2.1142, "step": 165350 }, { "epoch": 0.6294009728766852, "grad_norm": 0.149954691529274, "learning_rate": 0.0005, "loss": 2.1077, "step": 165360 }, { "epoch": 0.6294390353448079, "grad_norm": 0.11514601856470108, "learning_rate": 0.0005, "loss": 2.1067, "step": 165370 }, { "epoch": 0.6294770978129306, "grad_norm": 0.12775687873363495, "learning_rate": 0.0005, "loss": 2.1015, "step": 165380 }, { "epoch": 0.6295151602810533, "grad_norm": 0.13284911215305328, "learning_rate": 0.0005, "loss": 2.1074, "step": 165390 }, { "epoch": 0.6295532227491759, "grad_norm": 0.13676607608795166, "learning_rate": 0.0005, "loss": 2.1043, "step": 165400 }, { "epoch": 0.6295912852172987, "grad_norm": 0.12092647701501846, "learning_rate": 0.0005, "loss": 2.117, "step": 165410 }, { "epoch": 0.6296293476854213, "grad_norm": 0.1766921877861023, "learning_rate": 0.0005, "loss": 2.1106, "step": 165420 }, { "epoch": 0.629667410153544, "grad_norm": 0.12163124233484268, "learning_rate": 0.0005, "loss": 2.1047, "step": 165430 }, { "epoch": 0.6297054726216667, "grad_norm": 0.1240711435675621, "learning_rate": 0.0005, "loss": 2.106, "step": 165440 }, { "epoch": 0.6297435350897893, "grad_norm": 0.1204158365726471, "learning_rate": 0.0005, "loss": 2.1123, "step": 165450 }, { "epoch": 0.6297815975579121, "grad_norm": 0.12393902242183685, "learning_rate": 0.0005, "loss": 2.1062, "step": 165460 }, { "epoch": 0.6298196600260347, "grad_norm": 0.12610715627670288, "learning_rate": 0.0005, "loss": 2.1139, "step": 165470 }, { "epoch": 0.6298577224941574, "grad_norm": 0.1293548047542572, "learning_rate": 0.0005, "loss": 2.1153, "step": 165480 }, { "epoch": 0.62989578496228, "grad_norm": 0.13520577549934387, "learning_rate": 0.0005, "loss": 2.11, "step": 165490 }, { "epoch": 0.6299338474304028, "grad_norm": 0.12101728469133377, "learning_rate": 0.0005, "loss": 2.1202, "step": 165500 }, { "epoch": 0.6299719098985255, "grad_norm": 0.11872179061174393, "learning_rate": 0.0005, "loss": 2.1022, "step": 165510 }, { "epoch": 0.6300099723666481, "grad_norm": 0.12255527824163437, "learning_rate": 0.0005, "loss": 2.0967, "step": 165520 }, { "epoch": 0.6300480348347708, "grad_norm": 0.12082166969776154, "learning_rate": 0.0005, "loss": 2.103, "step": 165530 }, { "epoch": 0.6300860973028936, "grad_norm": 0.12696723639965057, "learning_rate": 0.0005, "loss": 2.1142, "step": 165540 }, { "epoch": 0.6301241597710162, "grad_norm": 0.12669730186462402, "learning_rate": 0.0005, "loss": 2.1109, "step": 165550 }, { "epoch": 0.6301622222391389, "grad_norm": 0.12998205423355103, "learning_rate": 0.0005, "loss": 2.1195, "step": 165560 }, { "epoch": 0.6302002847072615, "grad_norm": 0.11483128368854523, "learning_rate": 0.0005, "loss": 2.1029, "step": 165570 }, { "epoch": 0.6302383471753843, "grad_norm": 0.11890500783920288, "learning_rate": 0.0005, "loss": 2.1024, "step": 165580 }, { "epoch": 0.630276409643507, "grad_norm": 0.12761257588863373, "learning_rate": 0.0005, "loss": 2.1192, "step": 165590 }, { "epoch": 0.6303144721116296, "grad_norm": 0.12069553881883621, "learning_rate": 0.0005, "loss": 2.114, "step": 165600 }, { "epoch": 0.6303525345797523, "grad_norm": 0.11751195788383484, "learning_rate": 0.0005, "loss": 2.1167, "step": 165610 }, { "epoch": 0.6303905970478749, "grad_norm": 0.1265881359577179, "learning_rate": 0.0005, "loss": 2.1127, "step": 165620 }, { "epoch": 0.6304286595159977, "grad_norm": 0.12955141067504883, "learning_rate": 0.0005, "loss": 2.0929, "step": 165630 }, { "epoch": 0.6304667219841203, "grad_norm": 0.12171577662229538, "learning_rate": 0.0005, "loss": 2.11, "step": 165640 }, { "epoch": 0.630504784452243, "grad_norm": 0.12080515176057816, "learning_rate": 0.0005, "loss": 2.1084, "step": 165650 }, { "epoch": 0.6305428469203657, "grad_norm": 0.12178708612918854, "learning_rate": 0.0005, "loss": 2.1112, "step": 165660 }, { "epoch": 0.6305809093884884, "grad_norm": 0.12464504688978195, "learning_rate": 0.0005, "loss": 2.1157, "step": 165670 }, { "epoch": 0.6306189718566111, "grad_norm": 0.11466963589191437, "learning_rate": 0.0005, "loss": 2.1032, "step": 165680 }, { "epoch": 0.6306570343247337, "grad_norm": 0.11494333297014236, "learning_rate": 0.0005, "loss": 2.1148, "step": 165690 }, { "epoch": 0.6306950967928564, "grad_norm": 0.11840105056762695, "learning_rate": 0.0005, "loss": 2.1153, "step": 165700 }, { "epoch": 0.6307331592609792, "grad_norm": 0.12538498640060425, "learning_rate": 0.0005, "loss": 2.1312, "step": 165710 }, { "epoch": 0.6307712217291018, "grad_norm": 0.14788444340229034, "learning_rate": 0.0005, "loss": 2.1013, "step": 165720 }, { "epoch": 0.6308092841972245, "grad_norm": 0.1195816695690155, "learning_rate": 0.0005, "loss": 2.1064, "step": 165730 }, { "epoch": 0.6308473466653471, "grad_norm": 0.1341714709997177, "learning_rate": 0.0005, "loss": 2.1058, "step": 165740 }, { "epoch": 0.6308854091334698, "grad_norm": 0.13644200563430786, "learning_rate": 0.0005, "loss": 2.107, "step": 165750 }, { "epoch": 0.6309234716015926, "grad_norm": 0.1300642192363739, "learning_rate": 0.0005, "loss": 2.1153, "step": 165760 }, { "epoch": 0.6309615340697152, "grad_norm": 0.13034403324127197, "learning_rate": 0.0005, "loss": 2.1079, "step": 165770 }, { "epoch": 0.6309995965378379, "grad_norm": 0.14354290068149567, "learning_rate": 0.0005, "loss": 2.1189, "step": 165780 }, { "epoch": 0.6310376590059605, "grad_norm": 0.12148837745189667, "learning_rate": 0.0005, "loss": 2.114, "step": 165790 }, { "epoch": 0.6310757214740833, "grad_norm": 0.12006913870573044, "learning_rate": 0.0005, "loss": 2.116, "step": 165800 }, { "epoch": 0.631113783942206, "grad_norm": 0.12500861287117004, "learning_rate": 0.0005, "loss": 2.1125, "step": 165810 }, { "epoch": 0.6311518464103286, "grad_norm": 0.11222272366285324, "learning_rate": 0.0005, "loss": 2.1315, "step": 165820 }, { "epoch": 0.6311899088784513, "grad_norm": 0.1350722461938858, "learning_rate": 0.0005, "loss": 2.1025, "step": 165830 }, { "epoch": 0.631227971346574, "grad_norm": 0.12127304822206497, "learning_rate": 0.0005, "loss": 2.0979, "step": 165840 }, { "epoch": 0.6312660338146967, "grad_norm": 0.1220502182841301, "learning_rate": 0.0005, "loss": 2.1148, "step": 165850 }, { "epoch": 0.6313040962828194, "grad_norm": 0.1291094273328781, "learning_rate": 0.0005, "loss": 2.111, "step": 165860 }, { "epoch": 0.631342158750942, "grad_norm": 0.11507304757833481, "learning_rate": 0.0005, "loss": 2.1038, "step": 165870 }, { "epoch": 0.6313802212190647, "grad_norm": 0.1210707575082779, "learning_rate": 0.0005, "loss": 2.1021, "step": 165880 }, { "epoch": 0.6314182836871874, "grad_norm": 0.1463552713394165, "learning_rate": 0.0005, "loss": 2.1219, "step": 165890 }, { "epoch": 0.6314563461553101, "grad_norm": 0.1260424256324768, "learning_rate": 0.0005, "loss": 2.1069, "step": 165900 }, { "epoch": 0.6314944086234328, "grad_norm": 0.14042676985263824, "learning_rate": 0.0005, "loss": 2.1141, "step": 165910 }, { "epoch": 0.6315324710915554, "grad_norm": 0.12042347341775894, "learning_rate": 0.0005, "loss": 2.1102, "step": 165920 }, { "epoch": 0.6315705335596782, "grad_norm": 0.12311989068984985, "learning_rate": 0.0005, "loss": 2.1104, "step": 165930 }, { "epoch": 0.6316085960278008, "grad_norm": 0.124355249106884, "learning_rate": 0.0005, "loss": 2.1112, "step": 165940 }, { "epoch": 0.6316466584959235, "grad_norm": 0.13882076740264893, "learning_rate": 0.0005, "loss": 2.1117, "step": 165950 }, { "epoch": 0.6316847209640462, "grad_norm": 0.13310198485851288, "learning_rate": 0.0005, "loss": 2.1318, "step": 165960 }, { "epoch": 0.6317227834321689, "grad_norm": 0.11607959866523743, "learning_rate": 0.0005, "loss": 2.0976, "step": 165970 }, { "epoch": 0.6317608459002916, "grad_norm": 0.15148335695266724, "learning_rate": 0.0005, "loss": 2.1157, "step": 165980 }, { "epoch": 0.6317989083684142, "grad_norm": 0.13620083034038544, "learning_rate": 0.0005, "loss": 2.1035, "step": 165990 }, { "epoch": 0.6318369708365369, "grad_norm": 0.12425015866756439, "learning_rate": 0.0005, "loss": 2.1254, "step": 166000 }, { "epoch": 0.6318750333046597, "grad_norm": 0.13320602476596832, "learning_rate": 0.0005, "loss": 2.1035, "step": 166010 }, { "epoch": 0.6319130957727823, "grad_norm": 0.12140514701604843, "learning_rate": 0.0005, "loss": 2.1099, "step": 166020 }, { "epoch": 0.631951158240905, "grad_norm": 0.11672339588403702, "learning_rate": 0.0005, "loss": 2.1104, "step": 166030 }, { "epoch": 0.6319892207090276, "grad_norm": 0.1160837858915329, "learning_rate": 0.0005, "loss": 2.1112, "step": 166040 }, { "epoch": 0.6320272831771503, "grad_norm": 0.1330985575914383, "learning_rate": 0.0005, "loss": 2.1027, "step": 166050 }, { "epoch": 0.6320653456452731, "grad_norm": 0.13461607694625854, "learning_rate": 0.0005, "loss": 2.1178, "step": 166060 }, { "epoch": 0.6321034081133957, "grad_norm": 0.12698423862457275, "learning_rate": 0.0005, "loss": 2.1113, "step": 166070 }, { "epoch": 0.6321414705815184, "grad_norm": 0.12149754166603088, "learning_rate": 0.0005, "loss": 2.1171, "step": 166080 }, { "epoch": 0.632179533049641, "grad_norm": 0.12259469926357269, "learning_rate": 0.0005, "loss": 2.1164, "step": 166090 }, { "epoch": 0.6322175955177638, "grad_norm": 0.11817657947540283, "learning_rate": 0.0005, "loss": 2.1212, "step": 166100 }, { "epoch": 0.6322556579858865, "grad_norm": 0.13104508817195892, "learning_rate": 0.0005, "loss": 2.1012, "step": 166110 }, { "epoch": 0.6322937204540091, "grad_norm": 0.1280125230550766, "learning_rate": 0.0005, "loss": 2.1204, "step": 166120 }, { "epoch": 0.6323317829221318, "grad_norm": 0.132890984416008, "learning_rate": 0.0005, "loss": 2.1024, "step": 166130 }, { "epoch": 0.6323698453902545, "grad_norm": 0.1223866418004036, "learning_rate": 0.0005, "loss": 2.0962, "step": 166140 }, { "epoch": 0.6324079078583772, "grad_norm": 0.11968637257814407, "learning_rate": 0.0005, "loss": 2.1142, "step": 166150 }, { "epoch": 0.6324459703264999, "grad_norm": 0.12027929723262787, "learning_rate": 0.0005, "loss": 2.0807, "step": 166160 }, { "epoch": 0.6324840327946225, "grad_norm": 0.11879881471395493, "learning_rate": 0.0005, "loss": 2.1105, "step": 166170 }, { "epoch": 0.6325220952627452, "grad_norm": 0.13272880017757416, "learning_rate": 0.0005, "loss": 2.1228, "step": 166180 }, { "epoch": 0.6325601577308679, "grad_norm": 0.12231330573558807, "learning_rate": 0.0005, "loss": 2.1095, "step": 166190 }, { "epoch": 0.6325982201989906, "grad_norm": 0.13635841012001038, "learning_rate": 0.0005, "loss": 2.1088, "step": 166200 }, { "epoch": 0.6326362826671132, "grad_norm": 0.12453529238700867, "learning_rate": 0.0005, "loss": 2.0999, "step": 166210 }, { "epoch": 0.6326743451352359, "grad_norm": 0.12656286358833313, "learning_rate": 0.0005, "loss": 2.1093, "step": 166220 }, { "epoch": 0.6327124076033587, "grad_norm": 0.1307314932346344, "learning_rate": 0.0005, "loss": 2.1103, "step": 166230 }, { "epoch": 0.6327504700714813, "grad_norm": 0.12419044226408005, "learning_rate": 0.0005, "loss": 2.1216, "step": 166240 }, { "epoch": 0.632788532539604, "grad_norm": 0.11610493808984756, "learning_rate": 0.0005, "loss": 2.1114, "step": 166250 }, { "epoch": 0.6328265950077266, "grad_norm": 0.12077783793210983, "learning_rate": 0.0005, "loss": 2.1139, "step": 166260 }, { "epoch": 0.6328646574758494, "grad_norm": 0.13767452538013458, "learning_rate": 0.0005, "loss": 2.1075, "step": 166270 }, { "epoch": 0.6329027199439721, "grad_norm": 0.11943716555833817, "learning_rate": 0.0005, "loss": 2.1131, "step": 166280 }, { "epoch": 0.6329407824120947, "grad_norm": 0.12304921448230743, "learning_rate": 0.0005, "loss": 2.1079, "step": 166290 }, { "epoch": 0.6329788448802174, "grad_norm": 0.12921641767024994, "learning_rate": 0.0005, "loss": 2.1138, "step": 166300 }, { "epoch": 0.63301690734834, "grad_norm": 0.12375220656394958, "learning_rate": 0.0005, "loss": 2.1085, "step": 166310 }, { "epoch": 0.6330549698164628, "grad_norm": 0.12074637413024902, "learning_rate": 0.0005, "loss": 2.1074, "step": 166320 }, { "epoch": 0.6330930322845855, "grad_norm": 0.11634485423564911, "learning_rate": 0.0005, "loss": 2.1082, "step": 166330 }, { "epoch": 0.6331310947527081, "grad_norm": 0.1356639862060547, "learning_rate": 0.0005, "loss": 2.1054, "step": 166340 }, { "epoch": 0.6331691572208308, "grad_norm": 0.12911456823349, "learning_rate": 0.0005, "loss": 2.1069, "step": 166350 }, { "epoch": 0.6332072196889535, "grad_norm": 0.11508312076330185, "learning_rate": 0.0005, "loss": 2.1099, "step": 166360 }, { "epoch": 0.6332452821570762, "grad_norm": 0.11945507675409317, "learning_rate": 0.0005, "loss": 2.1136, "step": 166370 }, { "epoch": 0.6332833446251989, "grad_norm": 0.11480210721492767, "learning_rate": 0.0005, "loss": 2.107, "step": 166380 }, { "epoch": 0.6333214070933215, "grad_norm": 0.11745503544807434, "learning_rate": 0.0005, "loss": 2.1072, "step": 166390 }, { "epoch": 0.6333594695614443, "grad_norm": 0.12687115371227264, "learning_rate": 0.0005, "loss": 2.1116, "step": 166400 }, { "epoch": 0.633397532029567, "grad_norm": 0.12765026092529297, "learning_rate": 0.0005, "loss": 2.1025, "step": 166410 }, { "epoch": 0.6334355944976896, "grad_norm": 0.13198712468147278, "learning_rate": 0.0005, "loss": 2.1299, "step": 166420 }, { "epoch": 0.6334736569658123, "grad_norm": 0.12231364101171494, "learning_rate": 0.0005, "loss": 2.1306, "step": 166430 }, { "epoch": 0.633511719433935, "grad_norm": 0.1289694607257843, "learning_rate": 0.0005, "loss": 2.0956, "step": 166440 }, { "epoch": 0.6335497819020577, "grad_norm": 0.1258060783147812, "learning_rate": 0.0005, "loss": 2.0993, "step": 166450 }, { "epoch": 0.6335878443701803, "grad_norm": 0.12296685576438904, "learning_rate": 0.0005, "loss": 2.1228, "step": 166460 }, { "epoch": 0.633625906838303, "grad_norm": 0.11561420559883118, "learning_rate": 0.0005, "loss": 2.1057, "step": 166470 }, { "epoch": 0.6336639693064257, "grad_norm": 0.12371906638145447, "learning_rate": 0.0005, "loss": 2.1001, "step": 166480 }, { "epoch": 0.6337020317745484, "grad_norm": 0.13448134064674377, "learning_rate": 0.0005, "loss": 2.1053, "step": 166490 }, { "epoch": 0.6337400942426711, "grad_norm": 0.139088973402977, "learning_rate": 0.0005, "loss": 2.121, "step": 166500 }, { "epoch": 0.6337781567107937, "grad_norm": 0.116954505443573, "learning_rate": 0.0005, "loss": 2.1167, "step": 166510 }, { "epoch": 0.6338162191789164, "grad_norm": 0.12373578548431396, "learning_rate": 0.0005, "loss": 2.1119, "step": 166520 }, { "epoch": 0.6338542816470392, "grad_norm": 0.1236272007226944, "learning_rate": 0.0005, "loss": 2.1153, "step": 166530 }, { "epoch": 0.6338923441151618, "grad_norm": 0.11625051498413086, "learning_rate": 0.0005, "loss": 2.1087, "step": 166540 }, { "epoch": 0.6339304065832845, "grad_norm": 0.13029366731643677, "learning_rate": 0.0005, "loss": 2.1081, "step": 166550 }, { "epoch": 0.6339684690514071, "grad_norm": 0.12038466334342957, "learning_rate": 0.0005, "loss": 2.1114, "step": 166560 }, { "epoch": 0.6340065315195299, "grad_norm": 0.12874627113342285, "learning_rate": 0.0005, "loss": 2.1039, "step": 166570 }, { "epoch": 0.6340445939876526, "grad_norm": 0.11089406162500381, "learning_rate": 0.0005, "loss": 2.1107, "step": 166580 }, { "epoch": 0.6340826564557752, "grad_norm": 0.12499355524778366, "learning_rate": 0.0005, "loss": 2.115, "step": 166590 }, { "epoch": 0.6341207189238979, "grad_norm": 0.13173672556877136, "learning_rate": 0.0005, "loss": 2.1281, "step": 166600 }, { "epoch": 0.6341587813920205, "grad_norm": 0.1347840279340744, "learning_rate": 0.0005, "loss": 2.0987, "step": 166610 }, { "epoch": 0.6341968438601433, "grad_norm": 0.1311391443014145, "learning_rate": 0.0005, "loss": 2.1109, "step": 166620 }, { "epoch": 0.634234906328266, "grad_norm": 0.12326283007860184, "learning_rate": 0.0005, "loss": 2.123, "step": 166630 }, { "epoch": 0.6342729687963886, "grad_norm": 0.1423521339893341, "learning_rate": 0.0005, "loss": 2.1008, "step": 166640 }, { "epoch": 0.6343110312645113, "grad_norm": 0.1224626898765564, "learning_rate": 0.0005, "loss": 2.1112, "step": 166650 }, { "epoch": 0.634349093732634, "grad_norm": 0.11854662746191025, "learning_rate": 0.0005, "loss": 2.1207, "step": 166660 }, { "epoch": 0.6343871562007567, "grad_norm": 0.11189959943294525, "learning_rate": 0.0005, "loss": 2.1029, "step": 166670 }, { "epoch": 0.6344252186688794, "grad_norm": 0.9422677755355835, "learning_rate": 0.0005, "loss": 2.1281, "step": 166680 }, { "epoch": 0.634463281137002, "grad_norm": 0.1277889758348465, "learning_rate": 0.0005, "loss": 2.0983, "step": 166690 }, { "epoch": 0.6345013436051248, "grad_norm": 0.12071488797664642, "learning_rate": 0.0005, "loss": 2.1209, "step": 166700 }, { "epoch": 0.6345394060732474, "grad_norm": 0.11715003848075867, "learning_rate": 0.0005, "loss": 2.1281, "step": 166710 }, { "epoch": 0.6345774685413701, "grad_norm": 0.12750089168548584, "learning_rate": 0.0005, "loss": 2.1069, "step": 166720 }, { "epoch": 0.6346155310094928, "grad_norm": 0.12331834435462952, "learning_rate": 0.0005, "loss": 2.1062, "step": 166730 }, { "epoch": 0.6346535934776154, "grad_norm": 0.1349371075630188, "learning_rate": 0.0005, "loss": 2.1092, "step": 166740 }, { "epoch": 0.6346916559457382, "grad_norm": 0.12631113827228546, "learning_rate": 0.0005, "loss": 2.1136, "step": 166750 }, { "epoch": 0.6347297184138608, "grad_norm": 0.1224856749176979, "learning_rate": 0.0005, "loss": 2.1049, "step": 166760 }, { "epoch": 0.6347677808819835, "grad_norm": 0.11092979460954666, "learning_rate": 0.0005, "loss": 2.0955, "step": 166770 }, { "epoch": 0.6348058433501061, "grad_norm": 0.11893656104803085, "learning_rate": 0.0005, "loss": 2.1039, "step": 166780 }, { "epoch": 0.6348439058182289, "grad_norm": 0.1263199895620346, "learning_rate": 0.0005, "loss": 2.0971, "step": 166790 }, { "epoch": 0.6348819682863516, "grad_norm": 0.12650194764137268, "learning_rate": 0.0005, "loss": 2.1022, "step": 166800 }, { "epoch": 0.6349200307544742, "grad_norm": 0.12290910631418228, "learning_rate": 0.0005, "loss": 2.1074, "step": 166810 }, { "epoch": 0.6349580932225969, "grad_norm": 0.11825523525476456, "learning_rate": 0.0005, "loss": 2.1133, "step": 166820 }, { "epoch": 0.6349961556907197, "grad_norm": 0.13113102316856384, "learning_rate": 0.0005, "loss": 2.1106, "step": 166830 }, { "epoch": 0.6350342181588423, "grad_norm": 0.12884521484375, "learning_rate": 0.0005, "loss": 2.1055, "step": 166840 }, { "epoch": 0.635072280626965, "grad_norm": 0.11758764088153839, "learning_rate": 0.0005, "loss": 2.1017, "step": 166850 }, { "epoch": 0.6351103430950876, "grad_norm": 0.11539915204048157, "learning_rate": 0.0005, "loss": 2.1135, "step": 166860 }, { "epoch": 0.6351484055632104, "grad_norm": 0.12013541162014008, "learning_rate": 0.0005, "loss": 2.091, "step": 166870 }, { "epoch": 0.635186468031333, "grad_norm": 0.13431543111801147, "learning_rate": 0.0005, "loss": 2.1007, "step": 166880 }, { "epoch": 0.6352245304994557, "grad_norm": 0.1163390502333641, "learning_rate": 0.0005, "loss": 2.117, "step": 166890 }, { "epoch": 0.6352625929675784, "grad_norm": 0.12375345826148987, "learning_rate": 0.0005, "loss": 2.0979, "step": 166900 }, { "epoch": 0.635300655435701, "grad_norm": 0.12268900126218796, "learning_rate": 0.0005, "loss": 2.1174, "step": 166910 }, { "epoch": 0.6353387179038238, "grad_norm": 0.12074095755815506, "learning_rate": 0.0005, "loss": 2.1031, "step": 166920 }, { "epoch": 0.6353767803719464, "grad_norm": 0.1236800029873848, "learning_rate": 0.0005, "loss": 2.1105, "step": 166930 }, { "epoch": 0.6354148428400691, "grad_norm": 0.12753638625144958, "learning_rate": 0.0005, "loss": 2.1233, "step": 166940 }, { "epoch": 0.6354529053081918, "grad_norm": 0.12092048674821854, "learning_rate": 0.0005, "loss": 2.1262, "step": 166950 }, { "epoch": 0.6354909677763145, "grad_norm": 0.11950389295816422, "learning_rate": 0.0005, "loss": 2.1164, "step": 166960 }, { "epoch": 0.6355290302444372, "grad_norm": 0.12420322746038437, "learning_rate": 0.0005, "loss": 2.1218, "step": 166970 }, { "epoch": 0.6355670927125598, "grad_norm": 0.12446198612451553, "learning_rate": 0.0005, "loss": 2.1082, "step": 166980 }, { "epoch": 0.6356051551806825, "grad_norm": 0.12489917129278183, "learning_rate": 0.0005, "loss": 2.1209, "step": 166990 }, { "epoch": 0.6356432176488053, "grad_norm": 0.13189025223255157, "learning_rate": 0.0005, "loss": 2.0984, "step": 167000 }, { "epoch": 0.6356812801169279, "grad_norm": 0.11896958202123642, "learning_rate": 0.0005, "loss": 2.1185, "step": 167010 }, { "epoch": 0.6357193425850506, "grad_norm": 0.12319599837064743, "learning_rate": 0.0005, "loss": 2.0937, "step": 167020 }, { "epoch": 0.6357574050531732, "grad_norm": 0.12853579223155975, "learning_rate": 0.0005, "loss": 2.1168, "step": 167030 }, { "epoch": 0.6357954675212959, "grad_norm": 0.12506678700447083, "learning_rate": 0.0005, "loss": 2.099, "step": 167040 }, { "epoch": 0.6358335299894187, "grad_norm": 0.12734413146972656, "learning_rate": 0.0005, "loss": 2.0984, "step": 167050 }, { "epoch": 0.6358715924575413, "grad_norm": 0.1209283098578453, "learning_rate": 0.0005, "loss": 2.1116, "step": 167060 }, { "epoch": 0.635909654925664, "grad_norm": 0.12446945905685425, "learning_rate": 0.0005, "loss": 2.1144, "step": 167070 }, { "epoch": 0.6359477173937866, "grad_norm": 0.12285857647657394, "learning_rate": 0.0005, "loss": 2.1112, "step": 167080 }, { "epoch": 0.6359857798619094, "grad_norm": 0.12403672933578491, "learning_rate": 0.0005, "loss": 2.1085, "step": 167090 }, { "epoch": 0.6360238423300321, "grad_norm": 0.12527821958065033, "learning_rate": 0.0005, "loss": 2.1191, "step": 167100 }, { "epoch": 0.6360619047981547, "grad_norm": 0.13380716741085052, "learning_rate": 0.0005, "loss": 2.1296, "step": 167110 }, { "epoch": 0.6360999672662774, "grad_norm": 0.12255584448575974, "learning_rate": 0.0005, "loss": 2.122, "step": 167120 }, { "epoch": 0.6361380297344001, "grad_norm": 0.12181650102138519, "learning_rate": 0.0005, "loss": 2.1166, "step": 167130 }, { "epoch": 0.6361760922025228, "grad_norm": 0.11952023953199387, "learning_rate": 0.0005, "loss": 2.1238, "step": 167140 }, { "epoch": 0.6362141546706455, "grad_norm": 0.13203804194927216, "learning_rate": 0.0005, "loss": 2.1175, "step": 167150 }, { "epoch": 0.6362522171387681, "grad_norm": 0.13042880594730377, "learning_rate": 0.0005, "loss": 2.1073, "step": 167160 }, { "epoch": 0.6362902796068908, "grad_norm": 0.12648409605026245, "learning_rate": 0.0005, "loss": 2.1135, "step": 167170 }, { "epoch": 0.6363283420750135, "grad_norm": 0.11189969629049301, "learning_rate": 0.0005, "loss": 2.109, "step": 167180 }, { "epoch": 0.6363664045431362, "grad_norm": 0.12212081998586655, "learning_rate": 0.0005, "loss": 2.0869, "step": 167190 }, { "epoch": 0.6364044670112589, "grad_norm": 0.11594847589731216, "learning_rate": 0.0005, "loss": 2.1043, "step": 167200 }, { "epoch": 0.6364425294793815, "grad_norm": 0.126133993268013, "learning_rate": 0.0005, "loss": 2.0977, "step": 167210 }, { "epoch": 0.6364805919475043, "grad_norm": 0.1368551105260849, "learning_rate": 0.0005, "loss": 2.1183, "step": 167220 }, { "epoch": 0.6365186544156269, "grad_norm": 0.1192522794008255, "learning_rate": 0.0005, "loss": 2.1006, "step": 167230 }, { "epoch": 0.6365567168837496, "grad_norm": 0.1303003877401352, "learning_rate": 0.0005, "loss": 2.1181, "step": 167240 }, { "epoch": 0.6365947793518723, "grad_norm": 0.1174224391579628, "learning_rate": 0.0005, "loss": 2.1213, "step": 167250 }, { "epoch": 0.636632841819995, "grad_norm": 0.14051686227321625, "learning_rate": 0.0005, "loss": 2.1016, "step": 167260 }, { "epoch": 0.6366709042881177, "grad_norm": 0.1523093581199646, "learning_rate": 0.0005, "loss": 2.1068, "step": 167270 }, { "epoch": 0.6367089667562403, "grad_norm": 0.11615092307329178, "learning_rate": 0.0005, "loss": 2.111, "step": 167280 }, { "epoch": 0.636747029224363, "grad_norm": 0.1295924186706543, "learning_rate": 0.0005, "loss": 2.1028, "step": 167290 }, { "epoch": 0.6367850916924858, "grad_norm": 0.11747189611196518, "learning_rate": 0.0005, "loss": 2.114, "step": 167300 }, { "epoch": 0.6368231541606084, "grad_norm": 0.1178893968462944, "learning_rate": 0.0005, "loss": 2.1122, "step": 167310 }, { "epoch": 0.6368612166287311, "grad_norm": 0.12379597872495651, "learning_rate": 0.0005, "loss": 2.0963, "step": 167320 }, { "epoch": 0.6368992790968537, "grad_norm": 0.13348908722400665, "learning_rate": 0.0005, "loss": 2.112, "step": 167330 }, { "epoch": 0.6369373415649764, "grad_norm": 0.12309478968381882, "learning_rate": 0.0005, "loss": 2.116, "step": 167340 }, { "epoch": 0.6369754040330992, "grad_norm": 0.1236153319478035, "learning_rate": 0.0005, "loss": 2.1128, "step": 167350 }, { "epoch": 0.6370134665012218, "grad_norm": 0.11330470442771912, "learning_rate": 0.0005, "loss": 2.1061, "step": 167360 }, { "epoch": 0.6370515289693445, "grad_norm": 0.11641678214073181, "learning_rate": 0.0005, "loss": 2.1162, "step": 167370 }, { "epoch": 0.6370895914374671, "grad_norm": 0.1299334019422531, "learning_rate": 0.0005, "loss": 2.1221, "step": 167380 }, { "epoch": 0.6371276539055899, "grad_norm": 0.12439191341400146, "learning_rate": 0.0005, "loss": 2.1264, "step": 167390 }, { "epoch": 0.6371657163737126, "grad_norm": 0.11246515810489655, "learning_rate": 0.0005, "loss": 2.0909, "step": 167400 }, { "epoch": 0.6372037788418352, "grad_norm": 0.12425049394369125, "learning_rate": 0.0005, "loss": 2.1104, "step": 167410 }, { "epoch": 0.6372418413099579, "grad_norm": 0.11815284937620163, "learning_rate": 0.0005, "loss": 2.1172, "step": 167420 }, { "epoch": 0.6372799037780806, "grad_norm": 0.13013459742069244, "learning_rate": 0.0005, "loss": 2.1034, "step": 167430 }, { "epoch": 0.6373179662462033, "grad_norm": 0.11978629976511002, "learning_rate": 0.0005, "loss": 2.1153, "step": 167440 }, { "epoch": 0.637356028714326, "grad_norm": 0.11710581183433533, "learning_rate": 0.0005, "loss": 2.1105, "step": 167450 }, { "epoch": 0.6373940911824486, "grad_norm": 0.11174238473176956, "learning_rate": 0.0005, "loss": 2.1156, "step": 167460 }, { "epoch": 0.6374321536505713, "grad_norm": 0.13608428835868835, "learning_rate": 0.0005, "loss": 2.099, "step": 167470 }, { "epoch": 0.637470216118694, "grad_norm": 0.12688113749027252, "learning_rate": 0.0005, "loss": 2.1234, "step": 167480 }, { "epoch": 0.6375082785868167, "grad_norm": 0.12677867710590363, "learning_rate": 0.0005, "loss": 2.1114, "step": 167490 }, { "epoch": 0.6375463410549393, "grad_norm": 0.12160094827413559, "learning_rate": 0.0005, "loss": 2.1153, "step": 167500 }, { "epoch": 0.637584403523062, "grad_norm": 0.12581761181354523, "learning_rate": 0.0005, "loss": 2.1099, "step": 167510 }, { "epoch": 0.6376224659911848, "grad_norm": 0.13145707547664642, "learning_rate": 0.0005, "loss": 2.1199, "step": 167520 }, { "epoch": 0.6376605284593074, "grad_norm": 0.12493417412042618, "learning_rate": 0.0005, "loss": 2.0906, "step": 167530 }, { "epoch": 0.6376985909274301, "grad_norm": 0.1158856451511383, "learning_rate": 0.0005, "loss": 2.105, "step": 167540 }, { "epoch": 0.6377366533955527, "grad_norm": 0.11572497338056564, "learning_rate": 0.0005, "loss": 2.123, "step": 167550 }, { "epoch": 0.6377747158636755, "grad_norm": 0.12406877428293228, "learning_rate": 0.0005, "loss": 2.1159, "step": 167560 }, { "epoch": 0.6378127783317982, "grad_norm": 0.12328975647687912, "learning_rate": 0.0005, "loss": 2.1072, "step": 167570 }, { "epoch": 0.6378508407999208, "grad_norm": 0.11333654075860977, "learning_rate": 0.0005, "loss": 2.1061, "step": 167580 }, { "epoch": 0.6378889032680435, "grad_norm": 0.12689661979675293, "learning_rate": 0.0005, "loss": 2.1073, "step": 167590 }, { "epoch": 0.6379269657361661, "grad_norm": 0.11475737392902374, "learning_rate": 0.0005, "loss": 2.1037, "step": 167600 }, { "epoch": 0.6379650282042889, "grad_norm": 0.1381559669971466, "learning_rate": 0.0005, "loss": 2.1236, "step": 167610 }, { "epoch": 0.6380030906724116, "grad_norm": 0.12458667904138565, "learning_rate": 0.0005, "loss": 2.1267, "step": 167620 }, { "epoch": 0.6380411531405342, "grad_norm": 0.1247856616973877, "learning_rate": 0.0005, "loss": 2.1043, "step": 167630 }, { "epoch": 0.6380792156086569, "grad_norm": 0.12368963658809662, "learning_rate": 0.0005, "loss": 2.0934, "step": 167640 }, { "epoch": 0.6381172780767796, "grad_norm": 0.11723518371582031, "learning_rate": 0.0005, "loss": 2.1006, "step": 167650 }, { "epoch": 0.6381553405449023, "grad_norm": 0.12084148079156876, "learning_rate": 0.0005, "loss": 2.1148, "step": 167660 }, { "epoch": 0.638193403013025, "grad_norm": 0.12505698204040527, "learning_rate": 0.0005, "loss": 2.1035, "step": 167670 }, { "epoch": 0.6382314654811476, "grad_norm": 0.12336069345474243, "learning_rate": 0.0005, "loss": 2.1081, "step": 167680 }, { "epoch": 0.6382695279492704, "grad_norm": 0.12355723232030869, "learning_rate": 0.0005, "loss": 2.115, "step": 167690 }, { "epoch": 0.638307590417393, "grad_norm": 0.12749448418617249, "learning_rate": 0.0005, "loss": 2.1104, "step": 167700 }, { "epoch": 0.6383456528855157, "grad_norm": 0.11271673440933228, "learning_rate": 0.0005, "loss": 2.1147, "step": 167710 }, { "epoch": 0.6383837153536384, "grad_norm": 0.12633991241455078, "learning_rate": 0.0005, "loss": 2.1093, "step": 167720 }, { "epoch": 0.6384217778217611, "grad_norm": 0.12690429389476776, "learning_rate": 0.0005, "loss": 2.0995, "step": 167730 }, { "epoch": 0.6384598402898838, "grad_norm": 0.12385845184326172, "learning_rate": 0.0005, "loss": 2.0894, "step": 167740 }, { "epoch": 0.6384979027580064, "grad_norm": 0.11217658221721649, "learning_rate": 0.0005, "loss": 2.1198, "step": 167750 }, { "epoch": 0.6385359652261291, "grad_norm": 0.13572321832180023, "learning_rate": 0.0005, "loss": 2.1161, "step": 167760 }, { "epoch": 0.6385740276942518, "grad_norm": 0.11481758952140808, "learning_rate": 0.0005, "loss": 2.1055, "step": 167770 }, { "epoch": 0.6386120901623745, "grad_norm": 0.12268579751253128, "learning_rate": 0.0005, "loss": 2.1007, "step": 167780 }, { "epoch": 0.6386501526304972, "grad_norm": 0.1343347132205963, "learning_rate": 0.0005, "loss": 2.117, "step": 167790 }, { "epoch": 0.6386882150986198, "grad_norm": 0.12613916397094727, "learning_rate": 0.0005, "loss": 2.1251, "step": 167800 }, { "epoch": 0.6387262775667425, "grad_norm": 0.13326533138751984, "learning_rate": 0.0005, "loss": 2.1061, "step": 167810 }, { "epoch": 0.6387643400348653, "grad_norm": 0.11773496121168137, "learning_rate": 0.0005, "loss": 2.1169, "step": 167820 }, { "epoch": 0.6388024025029879, "grad_norm": 0.11977910995483398, "learning_rate": 0.0005, "loss": 2.124, "step": 167830 }, { "epoch": 0.6388404649711106, "grad_norm": 0.224679633975029, "learning_rate": 0.0005, "loss": 2.105, "step": 167840 }, { "epoch": 0.6388785274392332, "grad_norm": 0.12830469012260437, "learning_rate": 0.0005, "loss": 2.1074, "step": 167850 }, { "epoch": 0.638916589907356, "grad_norm": 0.13360096514225006, "learning_rate": 0.0005, "loss": 2.1197, "step": 167860 }, { "epoch": 0.6389546523754787, "grad_norm": 0.11928125470876694, "learning_rate": 0.0005, "loss": 2.1269, "step": 167870 }, { "epoch": 0.6389927148436013, "grad_norm": 0.11468581110239029, "learning_rate": 0.0005, "loss": 2.1131, "step": 167880 }, { "epoch": 0.639030777311724, "grad_norm": 0.1287572979927063, "learning_rate": 0.0005, "loss": 2.1074, "step": 167890 }, { "epoch": 0.6390688397798466, "grad_norm": 0.12990231812000275, "learning_rate": 0.0005, "loss": 2.0983, "step": 167900 }, { "epoch": 0.6391069022479694, "grad_norm": 0.137055903673172, "learning_rate": 0.0005, "loss": 2.1138, "step": 167910 }, { "epoch": 0.6391449647160921, "grad_norm": 0.13186711072921753, "learning_rate": 0.0005, "loss": 2.1132, "step": 167920 }, { "epoch": 0.6391830271842147, "grad_norm": 0.12623269855976105, "learning_rate": 0.0005, "loss": 2.1129, "step": 167930 }, { "epoch": 0.6392210896523374, "grad_norm": 0.12275701761245728, "learning_rate": 0.0005, "loss": 2.1201, "step": 167940 }, { "epoch": 0.6392591521204601, "grad_norm": 0.12814989686012268, "learning_rate": 0.0005, "loss": 2.1046, "step": 167950 }, { "epoch": 0.6392972145885828, "grad_norm": 0.13093677163124084, "learning_rate": 0.0005, "loss": 2.1063, "step": 167960 }, { "epoch": 0.6393352770567055, "grad_norm": 0.12273939698934555, "learning_rate": 0.0005, "loss": 2.1176, "step": 167970 }, { "epoch": 0.6393733395248281, "grad_norm": 0.1292058527469635, "learning_rate": 0.0005, "loss": 2.1057, "step": 167980 }, { "epoch": 0.6394114019929509, "grad_norm": 0.13064897060394287, "learning_rate": 0.0005, "loss": 2.1101, "step": 167990 }, { "epoch": 0.6394494644610735, "grad_norm": 0.12779417634010315, "learning_rate": 0.0005, "loss": 2.075, "step": 168000 }, { "epoch": 0.6394875269291962, "grad_norm": 0.11623568087816238, "learning_rate": 0.0005, "loss": 2.1028, "step": 168010 }, { "epoch": 0.6395255893973188, "grad_norm": 0.12801700830459595, "learning_rate": 0.0005, "loss": 2.1194, "step": 168020 }, { "epoch": 0.6395636518654416, "grad_norm": 0.12461545318365097, "learning_rate": 0.0005, "loss": 2.1141, "step": 168030 }, { "epoch": 0.6396017143335643, "grad_norm": 0.11495253443717957, "learning_rate": 0.0005, "loss": 2.12, "step": 168040 }, { "epoch": 0.6396397768016869, "grad_norm": 0.1169470027089119, "learning_rate": 0.0005, "loss": 2.1024, "step": 168050 }, { "epoch": 0.6396778392698096, "grad_norm": 0.1238497868180275, "learning_rate": 0.0005, "loss": 2.1058, "step": 168060 }, { "epoch": 0.6397159017379322, "grad_norm": 0.12846608459949493, "learning_rate": 0.0005, "loss": 2.0886, "step": 168070 }, { "epoch": 0.639753964206055, "grad_norm": 0.14013221859931946, "learning_rate": 0.0005, "loss": 2.108, "step": 168080 }, { "epoch": 0.6397920266741777, "grad_norm": 0.11937173455953598, "learning_rate": 0.0005, "loss": 2.1123, "step": 168090 }, { "epoch": 0.6398300891423003, "grad_norm": 0.12790927290916443, "learning_rate": 0.0005, "loss": 2.1142, "step": 168100 }, { "epoch": 0.639868151610423, "grad_norm": 0.11358696222305298, "learning_rate": 0.0005, "loss": 2.1317, "step": 168110 }, { "epoch": 0.6399062140785458, "grad_norm": 0.12121156603097916, "learning_rate": 0.0005, "loss": 2.1301, "step": 168120 }, { "epoch": 0.6399442765466684, "grad_norm": 0.13541017472743988, "learning_rate": 0.0005, "loss": 2.1117, "step": 168130 }, { "epoch": 0.6399823390147911, "grad_norm": 0.12673501670360565, "learning_rate": 0.0005, "loss": 2.1043, "step": 168140 }, { "epoch": 0.6400204014829137, "grad_norm": 0.1475529968738556, "learning_rate": 0.0005, "loss": 2.1119, "step": 168150 }, { "epoch": 0.6400584639510365, "grad_norm": 0.11461649090051651, "learning_rate": 0.0005, "loss": 2.1088, "step": 168160 }, { "epoch": 0.6400965264191592, "grad_norm": 0.12911100685596466, "learning_rate": 0.0005, "loss": 2.1144, "step": 168170 }, { "epoch": 0.6401345888872818, "grad_norm": 0.10865405201911926, "learning_rate": 0.0005, "loss": 2.1083, "step": 168180 }, { "epoch": 0.6401726513554045, "grad_norm": 0.13911312818527222, "learning_rate": 0.0005, "loss": 2.1085, "step": 168190 }, { "epoch": 0.6402107138235271, "grad_norm": 0.12169904261827469, "learning_rate": 0.0005, "loss": 2.1087, "step": 168200 }, { "epoch": 0.6402487762916499, "grad_norm": 0.13228438794612885, "learning_rate": 0.0005, "loss": 2.1053, "step": 168210 }, { "epoch": 0.6402868387597725, "grad_norm": 0.1235433891415596, "learning_rate": 0.0005, "loss": 2.1157, "step": 168220 }, { "epoch": 0.6403249012278952, "grad_norm": 0.13303908705711365, "learning_rate": 0.0005, "loss": 2.1019, "step": 168230 }, { "epoch": 0.6403629636960179, "grad_norm": 0.11614194512367249, "learning_rate": 0.0005, "loss": 2.1052, "step": 168240 }, { "epoch": 0.6404010261641406, "grad_norm": 0.12898878753185272, "learning_rate": 0.0005, "loss": 2.1023, "step": 168250 }, { "epoch": 0.6404390886322633, "grad_norm": 0.1185198649764061, "learning_rate": 0.0005, "loss": 2.0975, "step": 168260 }, { "epoch": 0.6404771511003859, "grad_norm": 0.11852993816137314, "learning_rate": 0.0005, "loss": 2.0982, "step": 168270 }, { "epoch": 0.6405152135685086, "grad_norm": 0.1243414580821991, "learning_rate": 0.0005, "loss": 2.128, "step": 168280 }, { "epoch": 0.6405532760366314, "grad_norm": 0.12120532989501953, "learning_rate": 0.0005, "loss": 2.0965, "step": 168290 }, { "epoch": 0.640591338504754, "grad_norm": 0.1326688826084137, "learning_rate": 0.0005, "loss": 2.0858, "step": 168300 }, { "epoch": 0.6406294009728767, "grad_norm": 0.11883825808763504, "learning_rate": 0.0005, "loss": 2.1102, "step": 168310 }, { "epoch": 0.6406674634409993, "grad_norm": 0.11887935549020767, "learning_rate": 0.0005, "loss": 2.1103, "step": 168320 }, { "epoch": 0.640705525909122, "grad_norm": 0.11938267201185226, "learning_rate": 0.0005, "loss": 2.1067, "step": 168330 }, { "epoch": 0.6407435883772448, "grad_norm": 0.13582032918930054, "learning_rate": 0.0005, "loss": 2.1196, "step": 168340 }, { "epoch": 0.6407816508453674, "grad_norm": 0.12318161129951477, "learning_rate": 0.0005, "loss": 2.1186, "step": 168350 }, { "epoch": 0.6408197133134901, "grad_norm": 0.11993315070867538, "learning_rate": 0.0005, "loss": 2.1131, "step": 168360 }, { "epoch": 0.6408577757816127, "grad_norm": 0.12338174134492874, "learning_rate": 0.0005, "loss": 2.1074, "step": 168370 }, { "epoch": 0.6408958382497355, "grad_norm": 0.11832733452320099, "learning_rate": 0.0005, "loss": 2.1176, "step": 168380 }, { "epoch": 0.6409339007178582, "grad_norm": 0.12378036230802536, "learning_rate": 0.0005, "loss": 2.117, "step": 168390 }, { "epoch": 0.6409719631859808, "grad_norm": 0.13132528960704803, "learning_rate": 0.0005, "loss": 2.1027, "step": 168400 }, { "epoch": 0.6410100256541035, "grad_norm": 0.11649808287620544, "learning_rate": 0.0005, "loss": 2.1078, "step": 168410 }, { "epoch": 0.6410480881222262, "grad_norm": 0.12604773044586182, "learning_rate": 0.0005, "loss": 2.0951, "step": 168420 }, { "epoch": 0.6410861505903489, "grad_norm": 0.12101250886917114, "learning_rate": 0.0005, "loss": 2.1157, "step": 168430 }, { "epoch": 0.6411242130584716, "grad_norm": 0.12148529291152954, "learning_rate": 0.0005, "loss": 2.1022, "step": 168440 }, { "epoch": 0.6411622755265942, "grad_norm": 0.12714166939258575, "learning_rate": 0.0005, "loss": 2.1148, "step": 168450 }, { "epoch": 0.641200337994717, "grad_norm": 0.11625314503908157, "learning_rate": 0.0005, "loss": 2.1125, "step": 168460 }, { "epoch": 0.6412384004628396, "grad_norm": 0.1165272668004036, "learning_rate": 0.0005, "loss": 2.1124, "step": 168470 }, { "epoch": 0.6412764629309623, "grad_norm": 0.12469862401485443, "learning_rate": 0.0005, "loss": 2.1066, "step": 168480 }, { "epoch": 0.641314525399085, "grad_norm": 0.12034633010625839, "learning_rate": 0.0005, "loss": 2.0975, "step": 168490 }, { "epoch": 0.6413525878672076, "grad_norm": 0.1089642271399498, "learning_rate": 0.0005, "loss": 2.1039, "step": 168500 }, { "epoch": 0.6413906503353304, "grad_norm": 0.12195317447185516, "learning_rate": 0.0005, "loss": 2.0986, "step": 168510 }, { "epoch": 0.641428712803453, "grad_norm": 0.13394524157047272, "learning_rate": 0.0005, "loss": 2.1021, "step": 168520 }, { "epoch": 0.6414667752715757, "grad_norm": 0.11951465904712677, "learning_rate": 0.0005, "loss": 2.1216, "step": 168530 }, { "epoch": 0.6415048377396984, "grad_norm": 0.13562636077404022, "learning_rate": 0.0005, "loss": 2.1159, "step": 168540 }, { "epoch": 0.6415429002078211, "grad_norm": 0.12247107923030853, "learning_rate": 0.0005, "loss": 2.0985, "step": 168550 }, { "epoch": 0.6415809626759438, "grad_norm": 0.12110818922519684, "learning_rate": 0.0005, "loss": 2.115, "step": 168560 }, { "epoch": 0.6416190251440664, "grad_norm": 0.12524105608463287, "learning_rate": 0.0005, "loss": 2.1091, "step": 168570 }, { "epoch": 0.6416570876121891, "grad_norm": 0.1352654993534088, "learning_rate": 0.0005, "loss": 2.108, "step": 168580 }, { "epoch": 0.6416951500803119, "grad_norm": 0.1185445487499237, "learning_rate": 0.0005, "loss": 2.1027, "step": 168590 }, { "epoch": 0.6417332125484345, "grad_norm": 0.13428305089473724, "learning_rate": 0.0005, "loss": 2.0976, "step": 168600 }, { "epoch": 0.6417712750165572, "grad_norm": 0.13018237054347992, "learning_rate": 0.0005, "loss": 2.1217, "step": 168610 }, { "epoch": 0.6418093374846798, "grad_norm": 0.13127325475215912, "learning_rate": 0.0005, "loss": 2.113, "step": 168620 }, { "epoch": 0.6418473999528025, "grad_norm": 0.12687428295612335, "learning_rate": 0.0005, "loss": 2.1078, "step": 168630 }, { "epoch": 0.6418854624209253, "grad_norm": 0.14191210269927979, "learning_rate": 0.0005, "loss": 2.1048, "step": 168640 }, { "epoch": 0.6419235248890479, "grad_norm": 0.12420354038476944, "learning_rate": 0.0005, "loss": 2.1053, "step": 168650 }, { "epoch": 0.6419615873571706, "grad_norm": 0.13143634796142578, "learning_rate": 0.0005, "loss": 2.115, "step": 168660 }, { "epoch": 0.6419996498252932, "grad_norm": 0.1170438900589943, "learning_rate": 0.0005, "loss": 2.1105, "step": 168670 }, { "epoch": 0.642037712293416, "grad_norm": 0.14256148040294647, "learning_rate": 0.0005, "loss": 2.1137, "step": 168680 }, { "epoch": 0.6420757747615387, "grad_norm": 0.12867961823940277, "learning_rate": 0.0005, "loss": 2.1073, "step": 168690 }, { "epoch": 0.6421138372296613, "grad_norm": 0.12932276725769043, "learning_rate": 0.0005, "loss": 2.1036, "step": 168700 }, { "epoch": 0.642151899697784, "grad_norm": 0.14709927141666412, "learning_rate": 0.0005, "loss": 2.1122, "step": 168710 }, { "epoch": 0.6421899621659067, "grad_norm": 0.12742719054222107, "learning_rate": 0.0005, "loss": 2.1145, "step": 168720 }, { "epoch": 0.6422280246340294, "grad_norm": 0.12468601018190384, "learning_rate": 0.0005, "loss": 2.1277, "step": 168730 }, { "epoch": 0.642266087102152, "grad_norm": 0.12178006023168564, "learning_rate": 0.0005, "loss": 2.1064, "step": 168740 }, { "epoch": 0.6423041495702747, "grad_norm": 0.12032576650381088, "learning_rate": 0.0005, "loss": 2.1036, "step": 168750 }, { "epoch": 0.6423422120383974, "grad_norm": 0.11936362087726593, "learning_rate": 0.0005, "loss": 2.0937, "step": 168760 }, { "epoch": 0.6423802745065201, "grad_norm": 0.11243180185556412, "learning_rate": 0.0005, "loss": 2.1093, "step": 168770 }, { "epoch": 0.6424183369746428, "grad_norm": 0.12037627398967743, "learning_rate": 0.0005, "loss": 2.1035, "step": 168780 }, { "epoch": 0.6424563994427654, "grad_norm": 0.1435946524143219, "learning_rate": 0.0005, "loss": 2.1195, "step": 168790 }, { "epoch": 0.6424944619108881, "grad_norm": 0.128703311085701, "learning_rate": 0.0005, "loss": 2.1114, "step": 168800 }, { "epoch": 0.6425325243790109, "grad_norm": 0.12024714052677155, "learning_rate": 0.0005, "loss": 2.1189, "step": 168810 }, { "epoch": 0.6425705868471335, "grad_norm": 0.12797857820987701, "learning_rate": 0.0005, "loss": 2.088, "step": 168820 }, { "epoch": 0.6426086493152562, "grad_norm": 0.12992393970489502, "learning_rate": 0.0005, "loss": 2.1102, "step": 168830 }, { "epoch": 0.6426467117833788, "grad_norm": 0.1158052310347557, "learning_rate": 0.0005, "loss": 2.1124, "step": 168840 }, { "epoch": 0.6426847742515016, "grad_norm": 0.12053052335977554, "learning_rate": 0.0005, "loss": 2.1188, "step": 168850 }, { "epoch": 0.6427228367196243, "grad_norm": 0.13656136393547058, "learning_rate": 0.0005, "loss": 2.1015, "step": 168860 }, { "epoch": 0.6427608991877469, "grad_norm": 0.11971770226955414, "learning_rate": 0.0005, "loss": 2.0972, "step": 168870 }, { "epoch": 0.6427989616558696, "grad_norm": 0.11107343435287476, "learning_rate": 0.0005, "loss": 2.0961, "step": 168880 }, { "epoch": 0.6428370241239924, "grad_norm": 0.12417291104793549, "learning_rate": 0.0005, "loss": 2.0989, "step": 168890 }, { "epoch": 0.642875086592115, "grad_norm": 0.12001986801624298, "learning_rate": 0.0005, "loss": 2.1069, "step": 168900 }, { "epoch": 0.6429131490602377, "grad_norm": 0.13117973506450653, "learning_rate": 0.0005, "loss": 2.101, "step": 168910 }, { "epoch": 0.6429512115283603, "grad_norm": 0.13548439741134644, "learning_rate": 0.0005, "loss": 2.09, "step": 168920 }, { "epoch": 0.642989273996483, "grad_norm": 0.11892268061637878, "learning_rate": 0.0005, "loss": 2.114, "step": 168930 }, { "epoch": 0.6430273364646057, "grad_norm": 0.11337054520845413, "learning_rate": 0.0005, "loss": 2.0935, "step": 168940 }, { "epoch": 0.6430653989327284, "grad_norm": 0.12810419499874115, "learning_rate": 0.0005, "loss": 2.1111, "step": 168950 }, { "epoch": 0.6431034614008511, "grad_norm": 0.12267080694437027, "learning_rate": 0.0005, "loss": 2.1157, "step": 168960 }, { "epoch": 0.6431415238689737, "grad_norm": 0.141947939991951, "learning_rate": 0.0005, "loss": 2.103, "step": 168970 }, { "epoch": 0.6431795863370965, "grad_norm": 0.12055546045303345, "learning_rate": 0.0005, "loss": 2.0996, "step": 168980 }, { "epoch": 0.6432176488052191, "grad_norm": 0.13173562288284302, "learning_rate": 0.0005, "loss": 2.0955, "step": 168990 }, { "epoch": 0.6432557112733418, "grad_norm": 0.11383207887411118, "learning_rate": 0.0005, "loss": 2.1043, "step": 169000 }, { "epoch": 0.6432937737414645, "grad_norm": 0.11952648311853409, "learning_rate": 0.0005, "loss": 2.0955, "step": 169010 }, { "epoch": 0.6433318362095872, "grad_norm": 0.12814606726169586, "learning_rate": 0.0005, "loss": 2.1075, "step": 169020 }, { "epoch": 0.6433698986777099, "grad_norm": 0.11744929850101471, "learning_rate": 0.0005, "loss": 2.1005, "step": 169030 }, { "epoch": 0.6434079611458325, "grad_norm": 0.12779894471168518, "learning_rate": 0.0005, "loss": 2.105, "step": 169040 }, { "epoch": 0.6434460236139552, "grad_norm": 0.119678795337677, "learning_rate": 0.0005, "loss": 2.1193, "step": 169050 }, { "epoch": 0.6434840860820779, "grad_norm": 0.1276785284280777, "learning_rate": 0.0005, "loss": 2.1132, "step": 169060 }, { "epoch": 0.6435221485502006, "grad_norm": 0.12187217175960541, "learning_rate": 0.0005, "loss": 2.0983, "step": 169070 }, { "epoch": 0.6435602110183233, "grad_norm": 0.11958392709493637, "learning_rate": 0.0005, "loss": 2.1246, "step": 169080 }, { "epoch": 0.6435982734864459, "grad_norm": 0.12024261802434921, "learning_rate": 0.0005, "loss": 2.1175, "step": 169090 }, { "epoch": 0.6436363359545686, "grad_norm": 0.12304984778165817, "learning_rate": 0.0005, "loss": 2.1118, "step": 169100 }, { "epoch": 0.6436743984226914, "grad_norm": 0.12955528497695923, "learning_rate": 0.0005, "loss": 2.1044, "step": 169110 }, { "epoch": 0.643712460890814, "grad_norm": 0.11826508492231369, "learning_rate": 0.0005, "loss": 2.1212, "step": 169120 }, { "epoch": 0.6437505233589367, "grad_norm": 0.1322208046913147, "learning_rate": 0.0005, "loss": 2.1118, "step": 169130 }, { "epoch": 0.6437885858270593, "grad_norm": 0.1285995990037918, "learning_rate": 0.0005, "loss": 2.1129, "step": 169140 }, { "epoch": 0.6438266482951821, "grad_norm": 0.1301390677690506, "learning_rate": 0.0005, "loss": 2.0966, "step": 169150 }, { "epoch": 0.6438647107633048, "grad_norm": 0.11197373270988464, "learning_rate": 0.0005, "loss": 2.1244, "step": 169160 }, { "epoch": 0.6439027732314274, "grad_norm": 0.12456469982862473, "learning_rate": 0.0005, "loss": 2.1161, "step": 169170 }, { "epoch": 0.6439408356995501, "grad_norm": 0.17039534449577332, "learning_rate": 0.0005, "loss": 2.1177, "step": 169180 }, { "epoch": 0.6439788981676727, "grad_norm": 0.11926776170730591, "learning_rate": 0.0005, "loss": 2.1157, "step": 169190 }, { "epoch": 0.6440169606357955, "grad_norm": 0.12898243963718414, "learning_rate": 0.0005, "loss": 2.1035, "step": 169200 }, { "epoch": 0.6440550231039182, "grad_norm": 0.12652383744716644, "learning_rate": 0.0005, "loss": 2.1016, "step": 169210 }, { "epoch": 0.6440930855720408, "grad_norm": 0.12866918742656708, "learning_rate": 0.0005, "loss": 2.1115, "step": 169220 }, { "epoch": 0.6441311480401635, "grad_norm": 0.12797291576862335, "learning_rate": 0.0005, "loss": 2.1065, "step": 169230 }, { "epoch": 0.6441692105082862, "grad_norm": 0.12362707406282425, "learning_rate": 0.0005, "loss": 2.1148, "step": 169240 }, { "epoch": 0.6442072729764089, "grad_norm": 0.11655452847480774, "learning_rate": 0.0005, "loss": 2.1121, "step": 169250 }, { "epoch": 0.6442453354445316, "grad_norm": 0.13020145893096924, "learning_rate": 0.0005, "loss": 2.1054, "step": 169260 }, { "epoch": 0.6442833979126542, "grad_norm": 0.11631694436073303, "learning_rate": 0.0005, "loss": 2.1133, "step": 169270 }, { "epoch": 0.644321460380777, "grad_norm": 0.12558521330356598, "learning_rate": 0.0005, "loss": 2.1148, "step": 169280 }, { "epoch": 0.6443595228488996, "grad_norm": 0.11559248715639114, "learning_rate": 0.0005, "loss": 2.1195, "step": 169290 }, { "epoch": 0.6443975853170223, "grad_norm": 0.11725137382745743, "learning_rate": 0.0005, "loss": 2.1021, "step": 169300 }, { "epoch": 0.644435647785145, "grad_norm": 0.12991683185100555, "learning_rate": 0.0005, "loss": 2.1201, "step": 169310 }, { "epoch": 0.6444737102532677, "grad_norm": 0.1312405914068222, "learning_rate": 0.0005, "loss": 2.1183, "step": 169320 }, { "epoch": 0.6445117727213904, "grad_norm": 0.12225445359945297, "learning_rate": 0.0005, "loss": 2.1113, "step": 169330 }, { "epoch": 0.644549835189513, "grad_norm": 0.12185630202293396, "learning_rate": 0.0005, "loss": 2.118, "step": 169340 }, { "epoch": 0.6445878976576357, "grad_norm": 0.12531758844852448, "learning_rate": 0.0005, "loss": 2.113, "step": 169350 }, { "epoch": 0.6446259601257583, "grad_norm": 0.12139049172401428, "learning_rate": 0.0005, "loss": 2.1092, "step": 169360 }, { "epoch": 0.6446640225938811, "grad_norm": 0.11470047384500504, "learning_rate": 0.0005, "loss": 2.1062, "step": 169370 }, { "epoch": 0.6447020850620038, "grad_norm": 0.12508058547973633, "learning_rate": 0.0005, "loss": 2.1209, "step": 169380 }, { "epoch": 0.6447401475301264, "grad_norm": 0.11169907450675964, "learning_rate": 0.0005, "loss": 2.0984, "step": 169390 }, { "epoch": 0.6447782099982491, "grad_norm": 0.1254015862941742, "learning_rate": 0.0005, "loss": 2.1218, "step": 169400 }, { "epoch": 0.6448162724663719, "grad_norm": 0.1406705379486084, "learning_rate": 0.0005, "loss": 2.1069, "step": 169410 }, { "epoch": 0.6448543349344945, "grad_norm": 0.14721862971782684, "learning_rate": 0.0005, "loss": 2.1095, "step": 169420 }, { "epoch": 0.6448923974026172, "grad_norm": 0.13507451117038727, "learning_rate": 0.0005, "loss": 2.1172, "step": 169430 }, { "epoch": 0.6449304598707398, "grad_norm": 0.11473426967859268, "learning_rate": 0.0005, "loss": 2.1162, "step": 169440 }, { "epoch": 0.6449685223388626, "grad_norm": 0.1197873130440712, "learning_rate": 0.0005, "loss": 2.11, "step": 169450 }, { "epoch": 0.6450065848069853, "grad_norm": 0.11934750527143478, "learning_rate": 0.0005, "loss": 2.1222, "step": 169460 }, { "epoch": 0.6450446472751079, "grad_norm": 0.12757621705532074, "learning_rate": 0.0005, "loss": 2.1037, "step": 169470 }, { "epoch": 0.6450827097432306, "grad_norm": 0.1281720995903015, "learning_rate": 0.0005, "loss": 2.1158, "step": 169480 }, { "epoch": 0.6451207722113532, "grad_norm": 0.15214742720127106, "learning_rate": 0.0005, "loss": 2.1158, "step": 169490 }, { "epoch": 0.645158834679476, "grad_norm": 0.14202375710010529, "learning_rate": 0.0005, "loss": 2.1091, "step": 169500 }, { "epoch": 0.6451968971475986, "grad_norm": 0.1248500794172287, "learning_rate": 0.0005, "loss": 2.1227, "step": 169510 }, { "epoch": 0.6452349596157213, "grad_norm": 0.13348935544490814, "learning_rate": 0.0005, "loss": 2.1343, "step": 169520 }, { "epoch": 0.645273022083844, "grad_norm": 0.11995500326156616, "learning_rate": 0.0005, "loss": 2.1103, "step": 169530 }, { "epoch": 0.6453110845519667, "grad_norm": 0.11561872065067291, "learning_rate": 0.0005, "loss": 2.1148, "step": 169540 }, { "epoch": 0.6453491470200894, "grad_norm": 0.11225131154060364, "learning_rate": 0.0005, "loss": 2.1029, "step": 169550 }, { "epoch": 0.645387209488212, "grad_norm": 0.12102378159761429, "learning_rate": 0.0005, "loss": 2.1041, "step": 169560 }, { "epoch": 0.6454252719563347, "grad_norm": 0.12393181771039963, "learning_rate": 0.0005, "loss": 2.1048, "step": 169570 }, { "epoch": 0.6454633344244575, "grad_norm": 0.12839457392692566, "learning_rate": 0.0005, "loss": 2.1173, "step": 169580 }, { "epoch": 0.6455013968925801, "grad_norm": 0.11687374114990234, "learning_rate": 0.0005, "loss": 2.1074, "step": 169590 }, { "epoch": 0.6455394593607028, "grad_norm": 0.13319134712219238, "learning_rate": 0.0005, "loss": 2.113, "step": 169600 }, { "epoch": 0.6455775218288254, "grad_norm": 0.11894795298576355, "learning_rate": 0.0005, "loss": 2.1255, "step": 169610 }, { "epoch": 0.6456155842969481, "grad_norm": 0.1231246292591095, "learning_rate": 0.0005, "loss": 2.1025, "step": 169620 }, { "epoch": 0.6456536467650709, "grad_norm": 0.12345793843269348, "learning_rate": 0.0005, "loss": 2.1189, "step": 169630 }, { "epoch": 0.6456917092331935, "grad_norm": 0.13803525269031525, "learning_rate": 0.0005, "loss": 2.1237, "step": 169640 }, { "epoch": 0.6457297717013162, "grad_norm": 0.14578431844711304, "learning_rate": 0.0005, "loss": 2.0963, "step": 169650 }, { "epoch": 0.6457678341694388, "grad_norm": 0.12052863836288452, "learning_rate": 0.0005, "loss": 2.1092, "step": 169660 }, { "epoch": 0.6458058966375616, "grad_norm": 0.1255030333995819, "learning_rate": 0.0005, "loss": 2.1118, "step": 169670 }, { "epoch": 0.6458439591056843, "grad_norm": 0.1355823576450348, "learning_rate": 0.0005, "loss": 2.101, "step": 169680 }, { "epoch": 0.6458820215738069, "grad_norm": 0.12269733846187592, "learning_rate": 0.0005, "loss": 2.1097, "step": 169690 }, { "epoch": 0.6459200840419296, "grad_norm": 0.12395049631595612, "learning_rate": 0.0005, "loss": 2.1117, "step": 169700 }, { "epoch": 0.6459581465100523, "grad_norm": 0.12342299520969391, "learning_rate": 0.0005, "loss": 2.1172, "step": 169710 }, { "epoch": 0.645996208978175, "grad_norm": 0.12586908042430878, "learning_rate": 0.0005, "loss": 2.0957, "step": 169720 }, { "epoch": 0.6460342714462977, "grad_norm": 0.1380547434091568, "learning_rate": 0.0005, "loss": 2.0924, "step": 169730 }, { "epoch": 0.6460723339144203, "grad_norm": 0.12601609528064728, "learning_rate": 0.0005, "loss": 2.1043, "step": 169740 }, { "epoch": 0.6461103963825431, "grad_norm": 0.1351451873779297, "learning_rate": 0.0005, "loss": 2.1122, "step": 169750 }, { "epoch": 0.6461484588506657, "grad_norm": 0.11888038367033005, "learning_rate": 0.0005, "loss": 2.1102, "step": 169760 }, { "epoch": 0.6461865213187884, "grad_norm": 0.12636907398700714, "learning_rate": 0.0005, "loss": 2.1269, "step": 169770 }, { "epoch": 0.646224583786911, "grad_norm": 0.12032492458820343, "learning_rate": 0.0005, "loss": 2.113, "step": 169780 }, { "epoch": 0.6462626462550337, "grad_norm": 0.12223067134618759, "learning_rate": 0.0005, "loss": 2.0996, "step": 169790 }, { "epoch": 0.6463007087231565, "grad_norm": 0.12942957878112793, "learning_rate": 0.0005, "loss": 2.111, "step": 169800 }, { "epoch": 0.6463387711912791, "grad_norm": 0.1295655220746994, "learning_rate": 0.0005, "loss": 2.114, "step": 169810 }, { "epoch": 0.6463768336594018, "grad_norm": 0.12919200956821442, "learning_rate": 0.0005, "loss": 2.1198, "step": 169820 }, { "epoch": 0.6464148961275245, "grad_norm": 0.12342769652605057, "learning_rate": 0.0005, "loss": 2.107, "step": 169830 }, { "epoch": 0.6464529585956472, "grad_norm": 0.13746656477451324, "learning_rate": 0.0005, "loss": 2.115, "step": 169840 }, { "epoch": 0.6464910210637699, "grad_norm": 0.1307177096605301, "learning_rate": 0.0005, "loss": 2.1049, "step": 169850 }, { "epoch": 0.6465290835318925, "grad_norm": 0.12498698383569717, "learning_rate": 0.0005, "loss": 2.1121, "step": 169860 }, { "epoch": 0.6465671460000152, "grad_norm": 0.12511016428470612, "learning_rate": 0.0005, "loss": 2.1252, "step": 169870 }, { "epoch": 0.646605208468138, "grad_norm": 0.1254969835281372, "learning_rate": 0.0005, "loss": 2.0998, "step": 169880 }, { "epoch": 0.6466432709362606, "grad_norm": 0.11852476000785828, "learning_rate": 0.0005, "loss": 2.0992, "step": 169890 }, { "epoch": 0.6466813334043833, "grad_norm": 0.7008293271064758, "learning_rate": 0.0005, "loss": 2.1068, "step": 169900 }, { "epoch": 0.6467193958725059, "grad_norm": 0.1426885724067688, "learning_rate": 0.0005, "loss": 2.1166, "step": 169910 }, { "epoch": 0.6467574583406286, "grad_norm": 0.12015167623758316, "learning_rate": 0.0005, "loss": 2.0981, "step": 169920 }, { "epoch": 0.6467955208087514, "grad_norm": 0.12605907022953033, "learning_rate": 0.0005, "loss": 2.1266, "step": 169930 }, { "epoch": 0.646833583276874, "grad_norm": 0.1266433447599411, "learning_rate": 0.0005, "loss": 2.109, "step": 169940 }, { "epoch": 0.6468716457449967, "grad_norm": 0.1276005059480667, "learning_rate": 0.0005, "loss": 2.108, "step": 169950 }, { "epoch": 0.6469097082131193, "grad_norm": 0.11537665873765945, "learning_rate": 0.0005, "loss": 2.1018, "step": 169960 }, { "epoch": 0.6469477706812421, "grad_norm": 0.14385004341602325, "learning_rate": 0.0005, "loss": 2.1047, "step": 169970 }, { "epoch": 0.6469858331493648, "grad_norm": 0.12152372300624847, "learning_rate": 0.0005, "loss": 2.1009, "step": 169980 }, { "epoch": 0.6470238956174874, "grad_norm": 0.12667691707611084, "learning_rate": 0.0005, "loss": 2.1045, "step": 169990 }, { "epoch": 0.6470619580856101, "grad_norm": 0.1236126646399498, "learning_rate": 0.0005, "loss": 2.1168, "step": 170000 }, { "epoch": 0.6471000205537328, "grad_norm": 0.13405680656433105, "learning_rate": 0.0005, "loss": 2.1422, "step": 170010 }, { "epoch": 0.6471380830218555, "grad_norm": 0.13311605155467987, "learning_rate": 0.0005, "loss": 2.1059, "step": 170020 }, { "epoch": 0.6471761454899781, "grad_norm": 0.12090720236301422, "learning_rate": 0.0005, "loss": 2.104, "step": 170030 }, { "epoch": 0.6472142079581008, "grad_norm": 0.12787151336669922, "learning_rate": 0.0005, "loss": 2.1098, "step": 170040 }, { "epoch": 0.6472522704262235, "grad_norm": 0.12532693147659302, "learning_rate": 0.0005, "loss": 2.1175, "step": 170050 }, { "epoch": 0.6472903328943462, "grad_norm": 0.12059324234724045, "learning_rate": 0.0005, "loss": 2.109, "step": 170060 }, { "epoch": 0.6473283953624689, "grad_norm": 0.13537880778312683, "learning_rate": 0.0005, "loss": 2.1004, "step": 170070 }, { "epoch": 0.6473664578305915, "grad_norm": 0.12789538502693176, "learning_rate": 0.0005, "loss": 2.1061, "step": 170080 }, { "epoch": 0.6474045202987142, "grad_norm": 0.11624449491500854, "learning_rate": 0.0005, "loss": 2.1138, "step": 170090 }, { "epoch": 0.647442582766837, "grad_norm": 0.14559069275856018, "learning_rate": 0.0005, "loss": 2.123, "step": 170100 }, { "epoch": 0.6474806452349596, "grad_norm": 0.12032388895750046, "learning_rate": 0.0005, "loss": 2.0995, "step": 170110 }, { "epoch": 0.6475187077030823, "grad_norm": 0.12226753681898117, "learning_rate": 0.0005, "loss": 2.1084, "step": 170120 }, { "epoch": 0.6475567701712049, "grad_norm": 0.12323250621557236, "learning_rate": 0.0005, "loss": 2.1256, "step": 170130 }, { "epoch": 0.6475948326393277, "grad_norm": 0.12948326766490936, "learning_rate": 0.0005, "loss": 2.1198, "step": 170140 }, { "epoch": 0.6476328951074504, "grad_norm": 0.1208270862698555, "learning_rate": 0.0005, "loss": 2.1105, "step": 170150 }, { "epoch": 0.647670957575573, "grad_norm": 0.1250876635313034, "learning_rate": 0.0005, "loss": 2.1257, "step": 170160 }, { "epoch": 0.6477090200436957, "grad_norm": 0.1304120272397995, "learning_rate": 0.0005, "loss": 2.1298, "step": 170170 }, { "epoch": 0.6477470825118185, "grad_norm": 0.1235019862651825, "learning_rate": 0.0005, "loss": 2.115, "step": 170180 }, { "epoch": 0.6477851449799411, "grad_norm": 0.1243685707449913, "learning_rate": 0.0005, "loss": 2.1094, "step": 170190 }, { "epoch": 0.6478232074480638, "grad_norm": 0.120770163834095, "learning_rate": 0.0005, "loss": 2.1035, "step": 170200 }, { "epoch": 0.6478612699161864, "grad_norm": 0.11926085501909256, "learning_rate": 0.0005, "loss": 2.0995, "step": 170210 }, { "epoch": 0.6478993323843091, "grad_norm": 0.11803922057151794, "learning_rate": 0.0005, "loss": 2.1186, "step": 170220 }, { "epoch": 0.6479373948524318, "grad_norm": 0.12076990306377411, "learning_rate": 0.0005, "loss": 2.0987, "step": 170230 }, { "epoch": 0.6479754573205545, "grad_norm": 0.12457573413848877, "learning_rate": 0.0005, "loss": 2.0887, "step": 170240 }, { "epoch": 0.6480135197886772, "grad_norm": 0.13837842643260956, "learning_rate": 0.0005, "loss": 2.1328, "step": 170250 }, { "epoch": 0.6480515822567998, "grad_norm": 0.12683911621570587, "learning_rate": 0.0005, "loss": 2.0827, "step": 170260 }, { "epoch": 0.6480896447249226, "grad_norm": 0.11728407442569733, "learning_rate": 0.0005, "loss": 2.1102, "step": 170270 }, { "epoch": 0.6481277071930452, "grad_norm": 0.11798442155122757, "learning_rate": 0.0005, "loss": 2.0946, "step": 170280 }, { "epoch": 0.6481657696611679, "grad_norm": 0.12231793254613876, "learning_rate": 0.0005, "loss": 2.1226, "step": 170290 }, { "epoch": 0.6482038321292906, "grad_norm": 0.12606288492679596, "learning_rate": 0.0005, "loss": 2.1187, "step": 170300 }, { "epoch": 0.6482418945974133, "grad_norm": 0.1174217239022255, "learning_rate": 0.0005, "loss": 2.1301, "step": 170310 }, { "epoch": 0.648279957065536, "grad_norm": 0.12881214916706085, "learning_rate": 0.0005, "loss": 2.1202, "step": 170320 }, { "epoch": 0.6483180195336586, "grad_norm": 0.12366227805614471, "learning_rate": 0.0005, "loss": 2.0936, "step": 170330 }, { "epoch": 0.6483560820017813, "grad_norm": 0.11601834744215012, "learning_rate": 0.0005, "loss": 2.1066, "step": 170340 }, { "epoch": 0.648394144469904, "grad_norm": 0.13191868364810944, "learning_rate": 0.0005, "loss": 2.1104, "step": 170350 }, { "epoch": 0.6484322069380267, "grad_norm": 0.13391204178333282, "learning_rate": 0.0005, "loss": 2.1097, "step": 170360 }, { "epoch": 0.6484702694061494, "grad_norm": 0.12104548513889313, "learning_rate": 0.0005, "loss": 2.117, "step": 170370 }, { "epoch": 0.648508331874272, "grad_norm": 0.12440919131040573, "learning_rate": 0.0005, "loss": 2.1094, "step": 170380 }, { "epoch": 0.6485463943423947, "grad_norm": 0.1321982890367508, "learning_rate": 0.0005, "loss": 2.1241, "step": 170390 }, { "epoch": 0.6485844568105175, "grad_norm": 0.13380879163742065, "learning_rate": 0.0005, "loss": 2.1076, "step": 170400 }, { "epoch": 0.6486225192786401, "grad_norm": 0.1223221942782402, "learning_rate": 0.0005, "loss": 2.1011, "step": 170410 }, { "epoch": 0.6486605817467628, "grad_norm": 0.12063425034284592, "learning_rate": 0.0005, "loss": 2.1103, "step": 170420 }, { "epoch": 0.6486986442148854, "grad_norm": 0.12745217978954315, "learning_rate": 0.0005, "loss": 2.1109, "step": 170430 }, { "epoch": 0.6487367066830082, "grad_norm": 0.11377807706594467, "learning_rate": 0.0005, "loss": 2.1014, "step": 170440 }, { "epoch": 0.6487747691511309, "grad_norm": 0.12785880267620087, "learning_rate": 0.0005, "loss": 2.1111, "step": 170450 }, { "epoch": 0.6488128316192535, "grad_norm": 0.12305945158004761, "learning_rate": 0.0005, "loss": 2.095, "step": 170460 }, { "epoch": 0.6488508940873762, "grad_norm": 0.11621784418821335, "learning_rate": 0.0005, "loss": 2.1026, "step": 170470 }, { "epoch": 0.6488889565554988, "grad_norm": 0.11889064311981201, "learning_rate": 0.0005, "loss": 2.1087, "step": 170480 }, { "epoch": 0.6489270190236216, "grad_norm": 0.1268673688173294, "learning_rate": 0.0005, "loss": 2.1106, "step": 170490 }, { "epoch": 0.6489650814917443, "grad_norm": 0.13656675815582275, "learning_rate": 0.0005, "loss": 2.0941, "step": 170500 }, { "epoch": 0.6490031439598669, "grad_norm": 0.12452402710914612, "learning_rate": 0.0005, "loss": 2.1192, "step": 170510 }, { "epoch": 0.6490412064279896, "grad_norm": 0.12301170825958252, "learning_rate": 0.0005, "loss": 2.1009, "step": 170520 }, { "epoch": 0.6490792688961123, "grad_norm": 0.12130080908536911, "learning_rate": 0.0005, "loss": 2.1194, "step": 170530 }, { "epoch": 0.649117331364235, "grad_norm": 0.12391924858093262, "learning_rate": 0.0005, "loss": 2.105, "step": 170540 }, { "epoch": 0.6491553938323577, "grad_norm": 0.11749352514743805, "learning_rate": 0.0005, "loss": 2.108, "step": 170550 }, { "epoch": 0.6491934563004803, "grad_norm": 0.11723422259092331, "learning_rate": 0.0005, "loss": 2.0985, "step": 170560 }, { "epoch": 0.6492315187686031, "grad_norm": 0.1299586147069931, "learning_rate": 0.0005, "loss": 2.1216, "step": 170570 }, { "epoch": 0.6492695812367257, "grad_norm": 0.12474636733531952, "learning_rate": 0.0005, "loss": 2.1002, "step": 170580 }, { "epoch": 0.6493076437048484, "grad_norm": 0.11184942722320557, "learning_rate": 0.0005, "loss": 2.1178, "step": 170590 }, { "epoch": 0.649345706172971, "grad_norm": 0.1352064609527588, "learning_rate": 0.0005, "loss": 2.0927, "step": 170600 }, { "epoch": 0.6493837686410938, "grad_norm": 0.12824389338493347, "learning_rate": 0.0005, "loss": 2.1158, "step": 170610 }, { "epoch": 0.6494218311092165, "grad_norm": 0.1299542784690857, "learning_rate": 0.0005, "loss": 2.1118, "step": 170620 }, { "epoch": 0.6494598935773391, "grad_norm": 0.12447993457317352, "learning_rate": 0.0005, "loss": 2.1154, "step": 170630 }, { "epoch": 0.6494979560454618, "grad_norm": 0.13263040781021118, "learning_rate": 0.0005, "loss": 2.1265, "step": 170640 }, { "epoch": 0.6495360185135844, "grad_norm": 0.12033121287822723, "learning_rate": 0.0005, "loss": 2.1058, "step": 170650 }, { "epoch": 0.6495740809817072, "grad_norm": 0.1425001323223114, "learning_rate": 0.0005, "loss": 2.116, "step": 170660 }, { "epoch": 0.6496121434498299, "grad_norm": 0.12598814070224762, "learning_rate": 0.0005, "loss": 2.1098, "step": 170670 }, { "epoch": 0.6496502059179525, "grad_norm": 0.1267545521259308, "learning_rate": 0.0005, "loss": 2.1157, "step": 170680 }, { "epoch": 0.6496882683860752, "grad_norm": 0.12284674495458603, "learning_rate": 0.0005, "loss": 2.1042, "step": 170690 }, { "epoch": 0.649726330854198, "grad_norm": 0.12898008525371552, "learning_rate": 0.0005, "loss": 2.1036, "step": 170700 }, { "epoch": 0.6497643933223206, "grad_norm": 0.12302932143211365, "learning_rate": 0.0005, "loss": 2.1154, "step": 170710 }, { "epoch": 0.6498024557904433, "grad_norm": 0.12430860847234726, "learning_rate": 0.0005, "loss": 2.1021, "step": 170720 }, { "epoch": 0.6498405182585659, "grad_norm": 0.12097886204719543, "learning_rate": 0.0005, "loss": 2.1091, "step": 170730 }, { "epoch": 0.6498785807266887, "grad_norm": 0.11994773149490356, "learning_rate": 0.0005, "loss": 2.108, "step": 170740 }, { "epoch": 0.6499166431948113, "grad_norm": 0.11827217042446136, "learning_rate": 0.0005, "loss": 2.1069, "step": 170750 }, { "epoch": 0.649954705662934, "grad_norm": 0.12851224839687347, "learning_rate": 0.0005, "loss": 2.1009, "step": 170760 }, { "epoch": 0.6499927681310567, "grad_norm": 0.11409155279397964, "learning_rate": 0.0005, "loss": 2.1027, "step": 170770 }, { "epoch": 0.6500308305991793, "grad_norm": 0.12571172416210175, "learning_rate": 0.0005, "loss": 2.1243, "step": 170780 }, { "epoch": 0.6500688930673021, "grad_norm": 0.13297589123249054, "learning_rate": 0.0005, "loss": 2.0942, "step": 170790 }, { "epoch": 0.6501069555354247, "grad_norm": 0.1160137876868248, "learning_rate": 0.0005, "loss": 2.1042, "step": 170800 }, { "epoch": 0.6501450180035474, "grad_norm": 0.11829045414924622, "learning_rate": 0.0005, "loss": 2.108, "step": 170810 }, { "epoch": 0.6501830804716701, "grad_norm": 0.13427163660526276, "learning_rate": 0.0005, "loss": 2.109, "step": 170820 }, { "epoch": 0.6502211429397928, "grad_norm": 0.13045386970043182, "learning_rate": 0.0005, "loss": 2.1024, "step": 170830 }, { "epoch": 0.6502592054079155, "grad_norm": 0.12804299592971802, "learning_rate": 0.0005, "loss": 2.1135, "step": 170840 }, { "epoch": 0.6502972678760381, "grad_norm": 0.12977930903434753, "learning_rate": 0.0005, "loss": 2.115, "step": 170850 }, { "epoch": 0.6503353303441608, "grad_norm": 0.12637241184711456, "learning_rate": 0.0005, "loss": 2.1251, "step": 170860 }, { "epoch": 0.6503733928122836, "grad_norm": 0.12315292656421661, "learning_rate": 0.0005, "loss": 2.0996, "step": 170870 }, { "epoch": 0.6504114552804062, "grad_norm": 0.12074718624353409, "learning_rate": 0.0005, "loss": 2.1174, "step": 170880 }, { "epoch": 0.6504495177485289, "grad_norm": 0.11984572559595108, "learning_rate": 0.0005, "loss": 2.1158, "step": 170890 }, { "epoch": 0.6504875802166515, "grad_norm": 0.12621347606182098, "learning_rate": 0.0005, "loss": 2.1013, "step": 170900 }, { "epoch": 0.6505256426847742, "grad_norm": 0.1321704387664795, "learning_rate": 0.0005, "loss": 2.1018, "step": 170910 }, { "epoch": 0.650563705152897, "grad_norm": 0.1266314685344696, "learning_rate": 0.0005, "loss": 2.1085, "step": 170920 }, { "epoch": 0.6506017676210196, "grad_norm": 0.1264774650335312, "learning_rate": 0.0005, "loss": 2.1071, "step": 170930 }, { "epoch": 0.6506398300891423, "grad_norm": 0.13591910898685455, "learning_rate": 0.0005, "loss": 2.0969, "step": 170940 }, { "epoch": 0.6506778925572649, "grad_norm": 0.13695189356803894, "learning_rate": 0.0005, "loss": 2.1107, "step": 170950 }, { "epoch": 0.6507159550253877, "grad_norm": 0.12579090893268585, "learning_rate": 0.0005, "loss": 2.1008, "step": 170960 }, { "epoch": 0.6507540174935104, "grad_norm": 0.1222383975982666, "learning_rate": 0.0005, "loss": 2.1058, "step": 170970 }, { "epoch": 0.650792079961633, "grad_norm": 0.11862023919820786, "learning_rate": 0.0005, "loss": 2.1121, "step": 170980 }, { "epoch": 0.6508301424297557, "grad_norm": 0.12775588035583496, "learning_rate": 0.0005, "loss": 2.1088, "step": 170990 }, { "epoch": 0.6508682048978784, "grad_norm": 0.1321268528699875, "learning_rate": 0.0005, "loss": 2.1068, "step": 171000 }, { "epoch": 0.6509062673660011, "grad_norm": 0.11592495441436768, "learning_rate": 0.0005, "loss": 2.1132, "step": 171010 }, { "epoch": 0.6509443298341238, "grad_norm": 0.12910479307174683, "learning_rate": 0.0005, "loss": 2.1091, "step": 171020 }, { "epoch": 0.6509823923022464, "grad_norm": 0.13574986159801483, "learning_rate": 0.0005, "loss": 2.1191, "step": 171030 }, { "epoch": 0.6510204547703692, "grad_norm": 0.12799003720283508, "learning_rate": 0.0005, "loss": 2.1177, "step": 171040 }, { "epoch": 0.6510585172384918, "grad_norm": 0.11744488775730133, "learning_rate": 0.0005, "loss": 2.1131, "step": 171050 }, { "epoch": 0.6510965797066145, "grad_norm": 0.1317628026008606, "learning_rate": 0.0005, "loss": 2.1166, "step": 171060 }, { "epoch": 0.6511346421747372, "grad_norm": 0.12473851442337036, "learning_rate": 0.0005, "loss": 2.1199, "step": 171070 }, { "epoch": 0.6511727046428598, "grad_norm": 0.12542445957660675, "learning_rate": 0.0005, "loss": 2.1154, "step": 171080 }, { "epoch": 0.6512107671109826, "grad_norm": 0.16549521684646606, "learning_rate": 0.0005, "loss": 2.1136, "step": 171090 }, { "epoch": 0.6512488295791052, "grad_norm": 0.12249605357646942, "learning_rate": 0.0005, "loss": 2.106, "step": 171100 }, { "epoch": 0.6512868920472279, "grad_norm": 0.12234670668840408, "learning_rate": 0.0005, "loss": 2.1007, "step": 171110 }, { "epoch": 0.6513249545153506, "grad_norm": 0.11434841901063919, "learning_rate": 0.0005, "loss": 2.1102, "step": 171120 }, { "epoch": 0.6513630169834733, "grad_norm": 0.12157260626554489, "learning_rate": 0.0005, "loss": 2.1174, "step": 171130 }, { "epoch": 0.651401079451596, "grad_norm": 0.12111295759677887, "learning_rate": 0.0005, "loss": 2.1295, "step": 171140 }, { "epoch": 0.6514391419197186, "grad_norm": 0.1188582181930542, "learning_rate": 0.0005, "loss": 2.1113, "step": 171150 }, { "epoch": 0.6514772043878413, "grad_norm": 0.12025199085474014, "learning_rate": 0.0005, "loss": 2.1124, "step": 171160 }, { "epoch": 0.6515152668559641, "grad_norm": 0.14027582108974457, "learning_rate": 0.0005, "loss": 2.1072, "step": 171170 }, { "epoch": 0.6515533293240867, "grad_norm": 0.13581660389900208, "learning_rate": 0.0005, "loss": 2.1128, "step": 171180 }, { "epoch": 0.6515913917922094, "grad_norm": 0.11547748744487762, "learning_rate": 0.0005, "loss": 2.1115, "step": 171190 }, { "epoch": 0.651629454260332, "grad_norm": 0.12545788288116455, "learning_rate": 0.0005, "loss": 2.1064, "step": 171200 }, { "epoch": 0.6516675167284547, "grad_norm": 0.11708662658929825, "learning_rate": 0.0005, "loss": 2.1069, "step": 171210 }, { "epoch": 0.6517055791965775, "grad_norm": 0.11985498666763306, "learning_rate": 0.0005, "loss": 2.1206, "step": 171220 }, { "epoch": 0.6517436416647001, "grad_norm": 0.12114997953176498, "learning_rate": 0.0005, "loss": 2.1147, "step": 171230 }, { "epoch": 0.6517817041328228, "grad_norm": 0.11515387892723083, "learning_rate": 0.0005, "loss": 2.1005, "step": 171240 }, { "epoch": 0.6518197666009454, "grad_norm": 0.13565857708454132, "learning_rate": 0.0005, "loss": 2.1022, "step": 171250 }, { "epoch": 0.6518578290690682, "grad_norm": 0.1242651715874672, "learning_rate": 0.0005, "loss": 2.1146, "step": 171260 }, { "epoch": 0.6518958915371909, "grad_norm": 0.12579038739204407, "learning_rate": 0.0005, "loss": 2.1043, "step": 171270 }, { "epoch": 0.6519339540053135, "grad_norm": 0.12866291403770447, "learning_rate": 0.0005, "loss": 2.1037, "step": 171280 }, { "epoch": 0.6519720164734362, "grad_norm": 0.12336907535791397, "learning_rate": 0.0005, "loss": 2.1128, "step": 171290 }, { "epoch": 0.6520100789415589, "grad_norm": 0.1254909634590149, "learning_rate": 0.0005, "loss": 2.1137, "step": 171300 }, { "epoch": 0.6520481414096816, "grad_norm": 0.12830188870429993, "learning_rate": 0.0005, "loss": 2.1012, "step": 171310 }, { "epoch": 0.6520862038778042, "grad_norm": 0.14979737997055054, "learning_rate": 0.0005, "loss": 2.1038, "step": 171320 }, { "epoch": 0.6521242663459269, "grad_norm": 0.12184132635593414, "learning_rate": 0.0005, "loss": 2.0956, "step": 171330 }, { "epoch": 0.6521623288140496, "grad_norm": 0.1344393640756607, "learning_rate": 0.0005, "loss": 2.0814, "step": 171340 }, { "epoch": 0.6522003912821723, "grad_norm": 0.14398813247680664, "learning_rate": 0.0005, "loss": 2.1108, "step": 171350 }, { "epoch": 0.652238453750295, "grad_norm": 0.13092774152755737, "learning_rate": 0.0005, "loss": 2.1222, "step": 171360 }, { "epoch": 0.6522765162184176, "grad_norm": 0.1261938065290451, "learning_rate": 0.0005, "loss": 2.102, "step": 171370 }, { "epoch": 0.6523145786865403, "grad_norm": 0.1248980313539505, "learning_rate": 0.0005, "loss": 2.1173, "step": 171380 }, { "epoch": 0.6523526411546631, "grad_norm": 0.127181276679039, "learning_rate": 0.0005, "loss": 2.1015, "step": 171390 }, { "epoch": 0.6523907036227857, "grad_norm": 0.12705473601818085, "learning_rate": 0.0005, "loss": 2.1013, "step": 171400 }, { "epoch": 0.6524287660909084, "grad_norm": 0.13760901987552643, "learning_rate": 0.0005, "loss": 2.1242, "step": 171410 }, { "epoch": 0.652466828559031, "grad_norm": 0.13452285528182983, "learning_rate": 0.0005, "loss": 2.1201, "step": 171420 }, { "epoch": 0.6525048910271538, "grad_norm": 0.11699914187192917, "learning_rate": 0.0005, "loss": 2.1102, "step": 171430 }, { "epoch": 0.6525429534952765, "grad_norm": 0.12803451716899872, "learning_rate": 0.0005, "loss": 2.1036, "step": 171440 }, { "epoch": 0.6525810159633991, "grad_norm": 0.12715654075145721, "learning_rate": 0.0005, "loss": 2.1115, "step": 171450 }, { "epoch": 0.6526190784315218, "grad_norm": 0.1353614181280136, "learning_rate": 0.0005, "loss": 2.1053, "step": 171460 }, { "epoch": 0.6526571408996446, "grad_norm": 0.1400173157453537, "learning_rate": 0.0005, "loss": 2.0991, "step": 171470 }, { "epoch": 0.6526952033677672, "grad_norm": 0.11517337709665298, "learning_rate": 0.0005, "loss": 2.1144, "step": 171480 }, { "epoch": 0.6527332658358899, "grad_norm": 0.1215863898396492, "learning_rate": 0.0005, "loss": 2.1088, "step": 171490 }, { "epoch": 0.6527713283040125, "grad_norm": 0.11860140413045883, "learning_rate": 0.0005, "loss": 2.1113, "step": 171500 }, { "epoch": 0.6528093907721352, "grad_norm": 0.1134290024638176, "learning_rate": 0.0005, "loss": 2.1094, "step": 171510 }, { "epoch": 0.652847453240258, "grad_norm": 0.12387970834970474, "learning_rate": 0.0005, "loss": 2.1071, "step": 171520 }, { "epoch": 0.6528855157083806, "grad_norm": 0.13404084742069244, "learning_rate": 0.0005, "loss": 2.1053, "step": 171530 }, { "epoch": 0.6529235781765033, "grad_norm": 0.1261999011039734, "learning_rate": 0.0005, "loss": 2.115, "step": 171540 }, { "epoch": 0.6529616406446259, "grad_norm": 0.12469957768917084, "learning_rate": 0.0005, "loss": 2.0986, "step": 171550 }, { "epoch": 0.6529997031127487, "grad_norm": 0.1313467025756836, "learning_rate": 0.0005, "loss": 2.0964, "step": 171560 }, { "epoch": 0.6530377655808713, "grad_norm": 0.13312993943691254, "learning_rate": 0.0005, "loss": 2.1036, "step": 171570 }, { "epoch": 0.653075828048994, "grad_norm": 0.1249455064535141, "learning_rate": 0.0005, "loss": 2.1155, "step": 171580 }, { "epoch": 0.6531138905171167, "grad_norm": 0.1262136846780777, "learning_rate": 0.0005, "loss": 2.1097, "step": 171590 }, { "epoch": 0.6531519529852394, "grad_norm": 0.12008432298898697, "learning_rate": 0.0005, "loss": 2.1188, "step": 171600 }, { "epoch": 0.6531900154533621, "grad_norm": 0.12458830326795578, "learning_rate": 0.0005, "loss": 2.1129, "step": 171610 }, { "epoch": 0.6532280779214847, "grad_norm": 0.1217493787407875, "learning_rate": 0.0005, "loss": 2.115, "step": 171620 }, { "epoch": 0.6532661403896074, "grad_norm": 0.12446153908967972, "learning_rate": 0.0005, "loss": 2.1003, "step": 171630 }, { "epoch": 0.65330420285773, "grad_norm": 0.13291476666927338, "learning_rate": 0.0005, "loss": 2.1196, "step": 171640 }, { "epoch": 0.6533422653258528, "grad_norm": 0.11692068725824356, "learning_rate": 0.0005, "loss": 2.1029, "step": 171650 }, { "epoch": 0.6533803277939755, "grad_norm": 0.12520849704742432, "learning_rate": 0.0005, "loss": 2.1113, "step": 171660 }, { "epoch": 0.6534183902620981, "grad_norm": 0.1229141503572464, "learning_rate": 0.0005, "loss": 2.097, "step": 171670 }, { "epoch": 0.6534564527302208, "grad_norm": 0.12377126514911652, "learning_rate": 0.0005, "loss": 2.1133, "step": 171680 }, { "epoch": 0.6534945151983436, "grad_norm": 0.12698081135749817, "learning_rate": 0.0005, "loss": 2.1141, "step": 171690 }, { "epoch": 0.6535325776664662, "grad_norm": 0.12466852366924286, "learning_rate": 0.0005, "loss": 2.0966, "step": 171700 }, { "epoch": 0.6535706401345889, "grad_norm": 0.1278296560049057, "learning_rate": 0.0005, "loss": 2.1061, "step": 171710 }, { "epoch": 0.6536087026027115, "grad_norm": 0.1254255324602127, "learning_rate": 0.0005, "loss": 2.1133, "step": 171720 }, { "epoch": 0.6536467650708343, "grad_norm": 0.12170296162366867, "learning_rate": 0.0005, "loss": 2.1086, "step": 171730 }, { "epoch": 0.653684827538957, "grad_norm": 0.12227907031774521, "learning_rate": 0.0005, "loss": 2.0974, "step": 171740 }, { "epoch": 0.6537228900070796, "grad_norm": 0.14366976916790009, "learning_rate": 0.0005, "loss": 2.1081, "step": 171750 }, { "epoch": 0.6537609524752023, "grad_norm": 0.12183597683906555, "learning_rate": 0.0005, "loss": 2.116, "step": 171760 }, { "epoch": 0.6537990149433249, "grad_norm": 0.12633612751960754, "learning_rate": 0.0005, "loss": 2.1145, "step": 171770 }, { "epoch": 0.6538370774114477, "grad_norm": 0.12033534049987793, "learning_rate": 0.0005, "loss": 2.1008, "step": 171780 }, { "epoch": 0.6538751398795704, "grad_norm": 0.11813070625066757, "learning_rate": 0.0005, "loss": 2.1148, "step": 171790 }, { "epoch": 0.653913202347693, "grad_norm": 0.14022228121757507, "learning_rate": 0.0005, "loss": 2.1082, "step": 171800 }, { "epoch": 0.6539512648158157, "grad_norm": 0.1259172260761261, "learning_rate": 0.0005, "loss": 2.1174, "step": 171810 }, { "epoch": 0.6539893272839384, "grad_norm": 0.13032588362693787, "learning_rate": 0.0005, "loss": 2.1172, "step": 171820 }, { "epoch": 0.6540273897520611, "grad_norm": 0.12416426092386246, "learning_rate": 0.0005, "loss": 2.1125, "step": 171830 }, { "epoch": 0.6540654522201838, "grad_norm": 0.1398206502199173, "learning_rate": 0.0005, "loss": 2.1078, "step": 171840 }, { "epoch": 0.6541035146883064, "grad_norm": 0.11684484779834747, "learning_rate": 0.0005, "loss": 2.0972, "step": 171850 }, { "epoch": 0.6541415771564292, "grad_norm": 0.12237782776355743, "learning_rate": 0.0005, "loss": 2.1255, "step": 171860 }, { "epoch": 0.6541796396245518, "grad_norm": 0.1241404116153717, "learning_rate": 0.0005, "loss": 2.1047, "step": 171870 }, { "epoch": 0.6542177020926745, "grad_norm": 0.12162047624588013, "learning_rate": 0.0005, "loss": 2.1224, "step": 171880 }, { "epoch": 0.6542557645607971, "grad_norm": 0.12076914310455322, "learning_rate": 0.0005, "loss": 2.1105, "step": 171890 }, { "epoch": 0.6542938270289199, "grad_norm": 0.14396323263645172, "learning_rate": 0.0005, "loss": 2.0975, "step": 171900 }, { "epoch": 0.6543318894970426, "grad_norm": 0.1287434846162796, "learning_rate": 0.0005, "loss": 2.1148, "step": 171910 }, { "epoch": 0.6543699519651652, "grad_norm": 0.13110186159610748, "learning_rate": 0.0005, "loss": 2.1159, "step": 171920 }, { "epoch": 0.6544080144332879, "grad_norm": 0.12009651213884354, "learning_rate": 0.0005, "loss": 2.1113, "step": 171930 }, { "epoch": 0.6544460769014105, "grad_norm": 0.11705582588911057, "learning_rate": 0.0005, "loss": 2.1101, "step": 171940 }, { "epoch": 0.6544841393695333, "grad_norm": 0.12955498695373535, "learning_rate": 0.0005, "loss": 2.131, "step": 171950 }, { "epoch": 0.654522201837656, "grad_norm": 0.11973123997449875, "learning_rate": 0.0005, "loss": 2.1122, "step": 171960 }, { "epoch": 0.6545602643057786, "grad_norm": 0.13571450114250183, "learning_rate": 0.0005, "loss": 2.1246, "step": 171970 }, { "epoch": 0.6545983267739013, "grad_norm": 0.1409195512533188, "learning_rate": 0.0005, "loss": 2.1166, "step": 171980 }, { "epoch": 0.654636389242024, "grad_norm": 0.13034707307815552, "learning_rate": 0.0005, "loss": 2.1228, "step": 171990 }, { "epoch": 0.6546744517101467, "grad_norm": 0.1197667196393013, "learning_rate": 0.0005, "loss": 2.1135, "step": 172000 }, { "epoch": 0.6547125141782694, "grad_norm": 0.12414491921663284, "learning_rate": 0.0005, "loss": 2.0912, "step": 172010 }, { "epoch": 0.654750576646392, "grad_norm": 0.12749461829662323, "learning_rate": 0.0005, "loss": 2.1092, "step": 172020 }, { "epoch": 0.6547886391145148, "grad_norm": 0.12330741435289383, "learning_rate": 0.0005, "loss": 2.1143, "step": 172030 }, { "epoch": 0.6548267015826374, "grad_norm": 0.12864083051681519, "learning_rate": 0.0005, "loss": 2.1061, "step": 172040 }, { "epoch": 0.6548647640507601, "grad_norm": 0.12212818115949631, "learning_rate": 0.0005, "loss": 2.1117, "step": 172050 }, { "epoch": 0.6549028265188828, "grad_norm": 0.12514182925224304, "learning_rate": 0.0005, "loss": 2.1097, "step": 172060 }, { "epoch": 0.6549408889870054, "grad_norm": 0.11073462665081024, "learning_rate": 0.0005, "loss": 2.118, "step": 172070 }, { "epoch": 0.6549789514551282, "grad_norm": 0.11813299357891083, "learning_rate": 0.0005, "loss": 2.0974, "step": 172080 }, { "epoch": 0.6550170139232508, "grad_norm": 0.12076397985219955, "learning_rate": 0.0005, "loss": 2.1198, "step": 172090 }, { "epoch": 0.6550550763913735, "grad_norm": 0.12106800079345703, "learning_rate": 0.0005, "loss": 2.1155, "step": 172100 }, { "epoch": 0.6550931388594962, "grad_norm": 0.23853981494903564, "learning_rate": 0.0005, "loss": 2.1071, "step": 172110 }, { "epoch": 0.6551312013276189, "grad_norm": 0.12523460388183594, "learning_rate": 0.0005, "loss": 2.1045, "step": 172120 }, { "epoch": 0.6551692637957416, "grad_norm": 0.12649975717067719, "learning_rate": 0.0005, "loss": 2.1015, "step": 172130 }, { "epoch": 0.6552073262638642, "grad_norm": 0.11596899479627609, "learning_rate": 0.0005, "loss": 2.111, "step": 172140 }, { "epoch": 0.6552453887319869, "grad_norm": 0.12488400936126709, "learning_rate": 0.0005, "loss": 2.1048, "step": 172150 }, { "epoch": 0.6552834512001097, "grad_norm": 0.13245391845703125, "learning_rate": 0.0005, "loss": 2.1074, "step": 172160 }, { "epoch": 0.6553215136682323, "grad_norm": 0.1204514130949974, "learning_rate": 0.0005, "loss": 2.1099, "step": 172170 }, { "epoch": 0.655359576136355, "grad_norm": 0.1243198812007904, "learning_rate": 0.0005, "loss": 2.114, "step": 172180 }, { "epoch": 0.6553976386044776, "grad_norm": 0.1361195594072342, "learning_rate": 0.0005, "loss": 2.1088, "step": 172190 }, { "epoch": 0.6554357010726003, "grad_norm": 0.13096532225608826, "learning_rate": 0.0005, "loss": 2.1215, "step": 172200 }, { "epoch": 0.6554737635407231, "grad_norm": 0.11856479197740555, "learning_rate": 0.0005, "loss": 2.1132, "step": 172210 }, { "epoch": 0.6555118260088457, "grad_norm": 0.1288810521364212, "learning_rate": 0.0005, "loss": 2.1069, "step": 172220 }, { "epoch": 0.6555498884769684, "grad_norm": 0.12508545815944672, "learning_rate": 0.0005, "loss": 2.1189, "step": 172230 }, { "epoch": 0.655587950945091, "grad_norm": 0.13051195442676544, "learning_rate": 0.0005, "loss": 2.112, "step": 172240 }, { "epoch": 0.6556260134132138, "grad_norm": 0.1230180487036705, "learning_rate": 0.0005, "loss": 2.1137, "step": 172250 }, { "epoch": 0.6556640758813365, "grad_norm": 0.11832129955291748, "learning_rate": 0.0005, "loss": 2.118, "step": 172260 }, { "epoch": 0.6557021383494591, "grad_norm": 0.13429515063762665, "learning_rate": 0.0005, "loss": 2.0976, "step": 172270 }, { "epoch": 0.6557402008175818, "grad_norm": 0.1276344656944275, "learning_rate": 0.0005, "loss": 2.0926, "step": 172280 }, { "epoch": 0.6557782632857045, "grad_norm": 0.11800602823495865, "learning_rate": 0.0005, "loss": 2.1054, "step": 172290 }, { "epoch": 0.6558163257538272, "grad_norm": 0.12008919566869736, "learning_rate": 0.0005, "loss": 2.0926, "step": 172300 }, { "epoch": 0.6558543882219499, "grad_norm": 0.12468130886554718, "learning_rate": 0.0005, "loss": 2.1029, "step": 172310 }, { "epoch": 0.6558924506900725, "grad_norm": 0.12327641993761063, "learning_rate": 0.0005, "loss": 2.1046, "step": 172320 }, { "epoch": 0.6559305131581953, "grad_norm": 0.13303974270820618, "learning_rate": 0.0005, "loss": 2.1209, "step": 172330 }, { "epoch": 0.6559685756263179, "grad_norm": 0.12919138371944427, "learning_rate": 0.0005, "loss": 2.1048, "step": 172340 }, { "epoch": 0.6560066380944406, "grad_norm": 0.11894699186086655, "learning_rate": 0.0005, "loss": 2.096, "step": 172350 }, { "epoch": 0.6560447005625633, "grad_norm": 0.13580971956253052, "learning_rate": 0.0005, "loss": 2.1116, "step": 172360 }, { "epoch": 0.6560827630306859, "grad_norm": 0.13319773972034454, "learning_rate": 0.0005, "loss": 2.123, "step": 172370 }, { "epoch": 0.6561208254988087, "grad_norm": 0.1443890482187271, "learning_rate": 0.0005, "loss": 2.1163, "step": 172380 }, { "epoch": 0.6561588879669313, "grad_norm": 0.1339932531118393, "learning_rate": 0.0005, "loss": 2.1086, "step": 172390 }, { "epoch": 0.656196950435054, "grad_norm": 0.1335560381412506, "learning_rate": 0.0005, "loss": 2.1207, "step": 172400 }, { "epoch": 0.6562350129031767, "grad_norm": 0.13227705657482147, "learning_rate": 0.0005, "loss": 2.1151, "step": 172410 }, { "epoch": 0.6562730753712994, "grad_norm": 0.12547624111175537, "learning_rate": 0.0005, "loss": 2.1205, "step": 172420 }, { "epoch": 0.6563111378394221, "grad_norm": 0.11724413186311722, "learning_rate": 0.0005, "loss": 2.1142, "step": 172430 }, { "epoch": 0.6563492003075447, "grad_norm": 0.1592647284269333, "learning_rate": 0.0005, "loss": 2.1192, "step": 172440 }, { "epoch": 0.6563872627756674, "grad_norm": 0.13125862181186676, "learning_rate": 0.0005, "loss": 2.0955, "step": 172450 }, { "epoch": 0.6564253252437902, "grad_norm": 0.12079490721225739, "learning_rate": 0.0005, "loss": 2.1021, "step": 172460 }, { "epoch": 0.6564633877119128, "grad_norm": 0.13198938965797424, "learning_rate": 0.0005, "loss": 2.1077, "step": 172470 }, { "epoch": 0.6565014501800355, "grad_norm": 0.1353512406349182, "learning_rate": 0.0005, "loss": 2.1147, "step": 172480 }, { "epoch": 0.6565395126481581, "grad_norm": 0.1280246526002884, "learning_rate": 0.0005, "loss": 2.0884, "step": 172490 }, { "epoch": 0.6565775751162808, "grad_norm": 0.11841772496700287, "learning_rate": 0.0005, "loss": 2.1095, "step": 172500 }, { "epoch": 0.6566156375844036, "grad_norm": 0.12437329441308975, "learning_rate": 0.0005, "loss": 2.1181, "step": 172510 }, { "epoch": 0.6566537000525262, "grad_norm": 0.12280859798192978, "learning_rate": 0.0005, "loss": 2.1095, "step": 172520 }, { "epoch": 0.6566917625206489, "grad_norm": 0.13854913413524628, "learning_rate": 0.0005, "loss": 2.1088, "step": 172530 }, { "epoch": 0.6567298249887715, "grad_norm": 0.1186220794916153, "learning_rate": 0.0005, "loss": 2.1058, "step": 172540 }, { "epoch": 0.6567678874568943, "grad_norm": 0.13003933429718018, "learning_rate": 0.0005, "loss": 2.0988, "step": 172550 }, { "epoch": 0.656805949925017, "grad_norm": 0.12726987898349762, "learning_rate": 0.0005, "loss": 2.1008, "step": 172560 }, { "epoch": 0.6568440123931396, "grad_norm": 0.1203669011592865, "learning_rate": 0.0005, "loss": 2.1062, "step": 172570 }, { "epoch": 0.6568820748612623, "grad_norm": 0.1172369197010994, "learning_rate": 0.0005, "loss": 2.1146, "step": 172580 }, { "epoch": 0.656920137329385, "grad_norm": 0.11729346215724945, "learning_rate": 0.0005, "loss": 2.091, "step": 172590 }, { "epoch": 0.6569581997975077, "grad_norm": 0.11557607352733612, "learning_rate": 0.0005, "loss": 2.1147, "step": 172600 }, { "epoch": 0.6569962622656303, "grad_norm": 0.11810766160488129, "learning_rate": 0.0005, "loss": 2.1119, "step": 172610 }, { "epoch": 0.657034324733753, "grad_norm": 0.12994596362113953, "learning_rate": 0.0005, "loss": 2.1071, "step": 172620 }, { "epoch": 0.6570723872018758, "grad_norm": 0.12548710405826569, "learning_rate": 0.0005, "loss": 2.116, "step": 172630 }, { "epoch": 0.6571104496699984, "grad_norm": 0.12391366064548492, "learning_rate": 0.0005, "loss": 2.0999, "step": 172640 }, { "epoch": 0.6571485121381211, "grad_norm": 0.11640439182519913, "learning_rate": 0.0005, "loss": 2.1051, "step": 172650 }, { "epoch": 0.6571865746062437, "grad_norm": 0.1241813525557518, "learning_rate": 0.0005, "loss": 2.1072, "step": 172660 }, { "epoch": 0.6572246370743664, "grad_norm": 0.12873999774456024, "learning_rate": 0.0005, "loss": 2.1081, "step": 172670 }, { "epoch": 0.6572626995424892, "grad_norm": 0.11288134753704071, "learning_rate": 0.0005, "loss": 2.1032, "step": 172680 }, { "epoch": 0.6573007620106118, "grad_norm": 0.11984800547361374, "learning_rate": 0.0005, "loss": 2.119, "step": 172690 }, { "epoch": 0.6573388244787345, "grad_norm": 0.12152191251516342, "learning_rate": 0.0005, "loss": 2.1197, "step": 172700 }, { "epoch": 0.6573768869468571, "grad_norm": 0.11136773973703384, "learning_rate": 0.0005, "loss": 2.108, "step": 172710 }, { "epoch": 0.6574149494149799, "grad_norm": 0.12120914459228516, "learning_rate": 0.0005, "loss": 2.1186, "step": 172720 }, { "epoch": 0.6574530118831026, "grad_norm": 0.12348100543022156, "learning_rate": 0.0005, "loss": 2.1073, "step": 172730 }, { "epoch": 0.6574910743512252, "grad_norm": 0.11988083273172379, "learning_rate": 0.0005, "loss": 2.1104, "step": 172740 }, { "epoch": 0.6575291368193479, "grad_norm": 0.1392463594675064, "learning_rate": 0.0005, "loss": 2.1071, "step": 172750 }, { "epoch": 0.6575671992874706, "grad_norm": 0.11580455303192139, "learning_rate": 0.0005, "loss": 2.1091, "step": 172760 }, { "epoch": 0.6576052617555933, "grad_norm": 0.12519559264183044, "learning_rate": 0.0005, "loss": 2.1078, "step": 172770 }, { "epoch": 0.657643324223716, "grad_norm": 0.11238522827625275, "learning_rate": 0.0005, "loss": 2.1273, "step": 172780 }, { "epoch": 0.6576813866918386, "grad_norm": 0.11753600835800171, "learning_rate": 0.0005, "loss": 2.0989, "step": 172790 }, { "epoch": 0.6577194491599613, "grad_norm": 0.1268373727798462, "learning_rate": 0.0005, "loss": 2.1049, "step": 172800 }, { "epoch": 0.657757511628084, "grad_norm": 0.11925873160362244, "learning_rate": 0.0005, "loss": 2.0942, "step": 172810 }, { "epoch": 0.6577955740962067, "grad_norm": 0.1523437798023224, "learning_rate": 0.0005, "loss": 2.134, "step": 172820 }, { "epoch": 0.6578336365643294, "grad_norm": 0.12391243875026703, "learning_rate": 0.0005, "loss": 2.1136, "step": 172830 }, { "epoch": 0.657871699032452, "grad_norm": 0.1167418509721756, "learning_rate": 0.0005, "loss": 2.109, "step": 172840 }, { "epoch": 0.6579097615005748, "grad_norm": 0.1174694374203682, "learning_rate": 0.0005, "loss": 2.0989, "step": 172850 }, { "epoch": 0.6579478239686974, "grad_norm": 0.1145000234246254, "learning_rate": 0.0005, "loss": 2.1164, "step": 172860 }, { "epoch": 0.6579858864368201, "grad_norm": 0.12418124824762344, "learning_rate": 0.0005, "loss": 2.1116, "step": 172870 }, { "epoch": 0.6580239489049428, "grad_norm": 0.12741424143314362, "learning_rate": 0.0005, "loss": 2.1165, "step": 172880 }, { "epoch": 0.6580620113730655, "grad_norm": 0.11886553466320038, "learning_rate": 0.0005, "loss": 2.1147, "step": 172890 }, { "epoch": 0.6581000738411882, "grad_norm": 0.12890352308750153, "learning_rate": 0.0005, "loss": 2.1025, "step": 172900 }, { "epoch": 0.6581381363093108, "grad_norm": 0.13043367862701416, "learning_rate": 0.0005, "loss": 2.1172, "step": 172910 }, { "epoch": 0.6581761987774335, "grad_norm": 0.14133760333061218, "learning_rate": 0.0005, "loss": 2.1123, "step": 172920 }, { "epoch": 0.6582142612455562, "grad_norm": 0.13013096153736115, "learning_rate": 0.0005, "loss": 2.0985, "step": 172930 }, { "epoch": 0.6582523237136789, "grad_norm": 0.12168702483177185, "learning_rate": 0.0005, "loss": 2.1086, "step": 172940 }, { "epoch": 0.6582903861818016, "grad_norm": 0.11807643622159958, "learning_rate": 0.0005, "loss": 2.1141, "step": 172950 }, { "epoch": 0.6583284486499242, "grad_norm": 0.1260489970445633, "learning_rate": 0.0005, "loss": 2.1009, "step": 172960 }, { "epoch": 0.6583665111180469, "grad_norm": 0.12451190501451492, "learning_rate": 0.0005, "loss": 2.1079, "step": 172970 }, { "epoch": 0.6584045735861697, "grad_norm": 0.12765002250671387, "learning_rate": 0.0005, "loss": 2.1131, "step": 172980 }, { "epoch": 0.6584426360542923, "grad_norm": 0.12903667986392975, "learning_rate": 0.0005, "loss": 2.0893, "step": 172990 }, { "epoch": 0.658480698522415, "grad_norm": 0.12422466278076172, "learning_rate": 0.0005, "loss": 2.1042, "step": 173000 }, { "epoch": 0.6585187609905376, "grad_norm": 0.1248549371957779, "learning_rate": 0.0005, "loss": 2.0992, "step": 173010 }, { "epoch": 0.6585568234586604, "grad_norm": 0.12342771142721176, "learning_rate": 0.0005, "loss": 2.1155, "step": 173020 }, { "epoch": 0.6585948859267831, "grad_norm": 0.136073037981987, "learning_rate": 0.0005, "loss": 2.114, "step": 173030 }, { "epoch": 0.6586329483949057, "grad_norm": 0.12524139881134033, "learning_rate": 0.0005, "loss": 2.1189, "step": 173040 }, { "epoch": 0.6586710108630284, "grad_norm": 0.13240216672420502, "learning_rate": 0.0005, "loss": 2.113, "step": 173050 }, { "epoch": 0.6587090733311511, "grad_norm": 0.12460118532180786, "learning_rate": 0.0005, "loss": 2.0964, "step": 173060 }, { "epoch": 0.6587471357992738, "grad_norm": 0.1181638091802597, "learning_rate": 0.0005, "loss": 2.1138, "step": 173070 }, { "epoch": 0.6587851982673965, "grad_norm": 0.11996463686227798, "learning_rate": 0.0005, "loss": 2.1031, "step": 173080 }, { "epoch": 0.6588232607355191, "grad_norm": 0.1309623420238495, "learning_rate": 0.0005, "loss": 2.1063, "step": 173090 }, { "epoch": 0.6588613232036418, "grad_norm": 0.1276891827583313, "learning_rate": 0.0005, "loss": 2.1097, "step": 173100 }, { "epoch": 0.6588993856717645, "grad_norm": 0.13824163377285004, "learning_rate": 0.0005, "loss": 2.0969, "step": 173110 }, { "epoch": 0.6589374481398872, "grad_norm": 0.11203496158123016, "learning_rate": 0.0005, "loss": 2.1136, "step": 173120 }, { "epoch": 0.6589755106080099, "grad_norm": 0.12256965786218643, "learning_rate": 0.0005, "loss": 2.1103, "step": 173130 }, { "epoch": 0.6590135730761325, "grad_norm": 0.1368960738182068, "learning_rate": 0.0005, "loss": 2.1174, "step": 173140 }, { "epoch": 0.6590516355442553, "grad_norm": 0.12021365761756897, "learning_rate": 0.0005, "loss": 2.1184, "step": 173150 }, { "epoch": 0.6590896980123779, "grad_norm": 0.1191093698143959, "learning_rate": 0.0005, "loss": 2.1161, "step": 173160 }, { "epoch": 0.6591277604805006, "grad_norm": 0.12375283986330032, "learning_rate": 0.0005, "loss": 2.1164, "step": 173170 }, { "epoch": 0.6591658229486232, "grad_norm": 0.12380228191614151, "learning_rate": 0.0005, "loss": 2.1135, "step": 173180 }, { "epoch": 0.659203885416746, "grad_norm": 0.12744921445846558, "learning_rate": 0.0005, "loss": 2.1025, "step": 173190 }, { "epoch": 0.6592419478848687, "grad_norm": 0.12835119664669037, "learning_rate": 0.0005, "loss": 2.1005, "step": 173200 }, { "epoch": 0.6592800103529913, "grad_norm": 0.13462497293949127, "learning_rate": 0.0005, "loss": 2.117, "step": 173210 }, { "epoch": 0.659318072821114, "grad_norm": 0.14495985209941864, "learning_rate": 0.0005, "loss": 2.0958, "step": 173220 }, { "epoch": 0.6593561352892366, "grad_norm": 0.11991511285305023, "learning_rate": 0.0005, "loss": 2.094, "step": 173230 }, { "epoch": 0.6593941977573594, "grad_norm": 0.14063900709152222, "learning_rate": 0.0005, "loss": 2.1013, "step": 173240 }, { "epoch": 0.6594322602254821, "grad_norm": 0.12147396802902222, "learning_rate": 0.0005, "loss": 2.104, "step": 173250 }, { "epoch": 0.6594703226936047, "grad_norm": 0.12060917913913727, "learning_rate": 0.0005, "loss": 2.0877, "step": 173260 }, { "epoch": 0.6595083851617274, "grad_norm": 0.12398088723421097, "learning_rate": 0.0005, "loss": 2.1064, "step": 173270 }, { "epoch": 0.6595464476298502, "grad_norm": 0.12243727594614029, "learning_rate": 0.0005, "loss": 2.1117, "step": 173280 }, { "epoch": 0.6595845100979728, "grad_norm": 0.1569921225309372, "learning_rate": 0.0005, "loss": 2.1017, "step": 173290 }, { "epoch": 0.6596225725660955, "grad_norm": 0.1303899735212326, "learning_rate": 0.0005, "loss": 2.1172, "step": 173300 }, { "epoch": 0.6596606350342181, "grad_norm": 0.12057130038738251, "learning_rate": 0.0005, "loss": 2.1151, "step": 173310 }, { "epoch": 0.6596986975023409, "grad_norm": 0.13421890139579773, "learning_rate": 0.0005, "loss": 2.123, "step": 173320 }, { "epoch": 0.6597367599704635, "grad_norm": 0.12553319334983826, "learning_rate": 0.0005, "loss": 2.1167, "step": 173330 }, { "epoch": 0.6597748224385862, "grad_norm": 0.12315362691879272, "learning_rate": 0.0005, "loss": 2.1139, "step": 173340 }, { "epoch": 0.6598128849067089, "grad_norm": 0.11498898267745972, "learning_rate": 0.0005, "loss": 2.1143, "step": 173350 }, { "epoch": 0.6598509473748315, "grad_norm": 0.12755295634269714, "learning_rate": 0.0005, "loss": 2.1032, "step": 173360 }, { "epoch": 0.6598890098429543, "grad_norm": 0.12583117187023163, "learning_rate": 0.0005, "loss": 2.1038, "step": 173370 }, { "epoch": 0.659927072311077, "grad_norm": 0.12035936117172241, "learning_rate": 0.0005, "loss": 2.1174, "step": 173380 }, { "epoch": 0.6599651347791996, "grad_norm": 0.13051927089691162, "learning_rate": 0.0005, "loss": 2.1085, "step": 173390 }, { "epoch": 0.6600031972473223, "grad_norm": 0.13374698162078857, "learning_rate": 0.0005, "loss": 2.1095, "step": 173400 }, { "epoch": 0.660041259715445, "grad_norm": 0.11594285815954208, "learning_rate": 0.0005, "loss": 2.1034, "step": 173410 }, { "epoch": 0.6600793221835677, "grad_norm": 0.11974643170833588, "learning_rate": 0.0005, "loss": 2.1105, "step": 173420 }, { "epoch": 0.6601173846516903, "grad_norm": 0.12264318764209747, "learning_rate": 0.0005, "loss": 2.104, "step": 173430 }, { "epoch": 0.660155447119813, "grad_norm": 0.13385246694087982, "learning_rate": 0.0005, "loss": 2.1243, "step": 173440 }, { "epoch": 0.6601935095879358, "grad_norm": 0.13724219799041748, "learning_rate": 0.0005, "loss": 2.1137, "step": 173450 }, { "epoch": 0.6602315720560584, "grad_norm": 0.1183377206325531, "learning_rate": 0.0005, "loss": 2.0973, "step": 173460 }, { "epoch": 0.6602696345241811, "grad_norm": 0.11857128888368607, "learning_rate": 0.0005, "loss": 2.1242, "step": 173470 }, { "epoch": 0.6603076969923037, "grad_norm": 0.11552588641643524, "learning_rate": 0.0005, "loss": 2.1241, "step": 173480 }, { "epoch": 0.6603457594604265, "grad_norm": 0.12726780772209167, "learning_rate": 0.0005, "loss": 2.1139, "step": 173490 }, { "epoch": 0.6603838219285492, "grad_norm": 0.12919867038726807, "learning_rate": 0.0005, "loss": 2.1009, "step": 173500 }, { "epoch": 0.6604218843966718, "grad_norm": 0.12737435102462769, "learning_rate": 0.0005, "loss": 2.1176, "step": 173510 }, { "epoch": 0.6604599468647945, "grad_norm": 0.12323548644781113, "learning_rate": 0.0005, "loss": 2.1095, "step": 173520 }, { "epoch": 0.6604980093329171, "grad_norm": 0.12535181641578674, "learning_rate": 0.0005, "loss": 2.1093, "step": 173530 }, { "epoch": 0.6605360718010399, "grad_norm": 0.1355310082435608, "learning_rate": 0.0005, "loss": 2.1076, "step": 173540 }, { "epoch": 0.6605741342691626, "grad_norm": 0.1210881695151329, "learning_rate": 0.0005, "loss": 2.092, "step": 173550 }, { "epoch": 0.6606121967372852, "grad_norm": 0.12017525732517242, "learning_rate": 0.0005, "loss": 2.1093, "step": 173560 }, { "epoch": 0.6606502592054079, "grad_norm": 0.12086072564125061, "learning_rate": 0.0005, "loss": 2.1001, "step": 173570 }, { "epoch": 0.6606883216735306, "grad_norm": 0.12978361546993256, "learning_rate": 0.0005, "loss": 2.115, "step": 173580 }, { "epoch": 0.6607263841416533, "grad_norm": 0.1455615758895874, "learning_rate": 0.0005, "loss": 2.1091, "step": 173590 }, { "epoch": 0.660764446609776, "grad_norm": 0.1265958696603775, "learning_rate": 0.0005, "loss": 2.1094, "step": 173600 }, { "epoch": 0.6608025090778986, "grad_norm": 0.12732741236686707, "learning_rate": 0.0005, "loss": 2.1104, "step": 173610 }, { "epoch": 0.6608405715460214, "grad_norm": 0.12790100276470184, "learning_rate": 0.0005, "loss": 2.0933, "step": 173620 }, { "epoch": 0.660878634014144, "grad_norm": 0.13768728077411652, "learning_rate": 0.0005, "loss": 2.1017, "step": 173630 }, { "epoch": 0.6609166964822667, "grad_norm": 0.1262575089931488, "learning_rate": 0.0005, "loss": 2.1182, "step": 173640 }, { "epoch": 0.6609547589503894, "grad_norm": 0.11182989180088043, "learning_rate": 0.0005, "loss": 2.1129, "step": 173650 }, { "epoch": 0.660992821418512, "grad_norm": 0.11728381365537643, "learning_rate": 0.0005, "loss": 2.1137, "step": 173660 }, { "epoch": 0.6610308838866348, "grad_norm": 0.12345705926418304, "learning_rate": 0.0005, "loss": 2.1282, "step": 173670 }, { "epoch": 0.6610689463547574, "grad_norm": 0.13155001401901245, "learning_rate": 0.0005, "loss": 2.108, "step": 173680 }, { "epoch": 0.6611070088228801, "grad_norm": 0.11630726605653763, "learning_rate": 0.0005, "loss": 2.1075, "step": 173690 }, { "epoch": 0.6611450712910028, "grad_norm": 0.11791592091321945, "learning_rate": 0.0005, "loss": 2.098, "step": 173700 }, { "epoch": 0.6611831337591255, "grad_norm": 0.35240083932876587, "learning_rate": 0.0005, "loss": 2.0907, "step": 173710 }, { "epoch": 0.6612211962272482, "grad_norm": 0.1190033033490181, "learning_rate": 0.0005, "loss": 2.1068, "step": 173720 }, { "epoch": 0.6612592586953708, "grad_norm": 0.124099962413311, "learning_rate": 0.0005, "loss": 2.1269, "step": 173730 }, { "epoch": 0.6612973211634935, "grad_norm": 0.13553419709205627, "learning_rate": 0.0005, "loss": 2.1197, "step": 173740 }, { "epoch": 0.6613353836316163, "grad_norm": 0.12280543893575668, "learning_rate": 0.0005, "loss": 2.1292, "step": 173750 }, { "epoch": 0.6613734460997389, "grad_norm": 0.1327347308397293, "learning_rate": 0.0005, "loss": 2.1059, "step": 173760 }, { "epoch": 0.6614115085678616, "grad_norm": 0.13505050539970398, "learning_rate": 0.0005, "loss": 2.096, "step": 173770 }, { "epoch": 0.6614495710359842, "grad_norm": 0.135938823223114, "learning_rate": 0.0005, "loss": 2.1269, "step": 173780 }, { "epoch": 0.6614876335041069, "grad_norm": 0.14170552790164948, "learning_rate": 0.0005, "loss": 2.1123, "step": 173790 }, { "epoch": 0.6615256959722297, "grad_norm": 0.1215779259800911, "learning_rate": 0.0005, "loss": 2.1142, "step": 173800 }, { "epoch": 0.6615637584403523, "grad_norm": 0.12040174007415771, "learning_rate": 0.0005, "loss": 2.1155, "step": 173810 }, { "epoch": 0.661601820908475, "grad_norm": 0.11516829580068588, "learning_rate": 0.0005, "loss": 2.108, "step": 173820 }, { "epoch": 0.6616398833765976, "grad_norm": 0.1317477524280548, "learning_rate": 0.0005, "loss": 2.1097, "step": 173830 }, { "epoch": 0.6616779458447204, "grad_norm": 0.12069617956876755, "learning_rate": 0.0005, "loss": 2.102, "step": 173840 }, { "epoch": 0.661716008312843, "grad_norm": 0.13325481116771698, "learning_rate": 0.0005, "loss": 2.1008, "step": 173850 }, { "epoch": 0.6617540707809657, "grad_norm": 0.14849242568016052, "learning_rate": 0.0005, "loss": 2.1105, "step": 173860 }, { "epoch": 0.6617921332490884, "grad_norm": 0.13232289254665375, "learning_rate": 0.0005, "loss": 2.1182, "step": 173870 }, { "epoch": 0.6618301957172111, "grad_norm": 0.12283754348754883, "learning_rate": 0.0005, "loss": 2.1007, "step": 173880 }, { "epoch": 0.6618682581853338, "grad_norm": 0.1293504387140274, "learning_rate": 0.0005, "loss": 2.1124, "step": 173890 }, { "epoch": 0.6619063206534564, "grad_norm": 0.11680664122104645, "learning_rate": 0.0005, "loss": 2.1114, "step": 173900 }, { "epoch": 0.6619443831215791, "grad_norm": 0.11964026093482971, "learning_rate": 0.0005, "loss": 2.1148, "step": 173910 }, { "epoch": 0.6619824455897019, "grad_norm": 0.11271769553422928, "learning_rate": 0.0005, "loss": 2.1059, "step": 173920 }, { "epoch": 0.6620205080578245, "grad_norm": 0.12464918941259384, "learning_rate": 0.0005, "loss": 2.1262, "step": 173930 }, { "epoch": 0.6620585705259472, "grad_norm": 0.15808416903018951, "learning_rate": 0.0005, "loss": 2.1107, "step": 173940 }, { "epoch": 0.6620966329940698, "grad_norm": 0.12016769498586655, "learning_rate": 0.0005, "loss": 2.114, "step": 173950 }, { "epoch": 0.6621346954621925, "grad_norm": 0.14003989100456238, "learning_rate": 0.0005, "loss": 2.1032, "step": 173960 }, { "epoch": 0.6621727579303153, "grad_norm": 0.11997903883457184, "learning_rate": 0.0005, "loss": 2.1212, "step": 173970 }, { "epoch": 0.6622108203984379, "grad_norm": 0.11551333963871002, "learning_rate": 0.0005, "loss": 2.1016, "step": 173980 }, { "epoch": 0.6622488828665606, "grad_norm": 0.11689604818820953, "learning_rate": 0.0005, "loss": 2.1087, "step": 173990 }, { "epoch": 0.6622869453346832, "grad_norm": 0.1281474381685257, "learning_rate": 0.0005, "loss": 2.1087, "step": 174000 }, { "epoch": 0.662325007802806, "grad_norm": 0.11026114225387573, "learning_rate": 0.0005, "loss": 2.108, "step": 174010 }, { "epoch": 0.6623630702709287, "grad_norm": 0.11417057365179062, "learning_rate": 0.0005, "loss": 2.1115, "step": 174020 }, { "epoch": 0.6624011327390513, "grad_norm": 0.11177896708250046, "learning_rate": 0.0005, "loss": 2.1074, "step": 174030 }, { "epoch": 0.662439195207174, "grad_norm": 0.12440768629312515, "learning_rate": 0.0005, "loss": 2.1038, "step": 174040 }, { "epoch": 0.6624772576752967, "grad_norm": 0.11963693797588348, "learning_rate": 0.0005, "loss": 2.1255, "step": 174050 }, { "epoch": 0.6625153201434194, "grad_norm": 0.1275544911623001, "learning_rate": 0.0005, "loss": 2.1109, "step": 174060 }, { "epoch": 0.6625533826115421, "grad_norm": 0.2038632482290268, "learning_rate": 0.0005, "loss": 2.11, "step": 174070 }, { "epoch": 0.6625914450796647, "grad_norm": 0.1305026113986969, "learning_rate": 0.0005, "loss": 2.1173, "step": 174080 }, { "epoch": 0.6626295075477874, "grad_norm": 0.12949995696544647, "learning_rate": 0.0005, "loss": 2.1163, "step": 174090 }, { "epoch": 0.6626675700159101, "grad_norm": 0.13585160672664642, "learning_rate": 0.0005, "loss": 2.1213, "step": 174100 }, { "epoch": 0.6627056324840328, "grad_norm": 0.12164440751075745, "learning_rate": 0.0005, "loss": 2.0915, "step": 174110 }, { "epoch": 0.6627436949521555, "grad_norm": 0.13014480471611023, "learning_rate": 0.0005, "loss": 2.1176, "step": 174120 }, { "epoch": 0.6627817574202781, "grad_norm": 0.12387395650148392, "learning_rate": 0.0005, "loss": 2.0995, "step": 174130 }, { "epoch": 0.6628198198884009, "grad_norm": 0.12452687323093414, "learning_rate": 0.0005, "loss": 2.1131, "step": 174140 }, { "epoch": 0.6628578823565235, "grad_norm": 0.14395101368427277, "learning_rate": 0.0005, "loss": 2.0964, "step": 174150 }, { "epoch": 0.6628959448246462, "grad_norm": 0.1168159618973732, "learning_rate": 0.0005, "loss": 2.1111, "step": 174160 }, { "epoch": 0.6629340072927689, "grad_norm": 0.134388267993927, "learning_rate": 0.0005, "loss": 2.0997, "step": 174170 }, { "epoch": 0.6629720697608916, "grad_norm": 0.1289610117673874, "learning_rate": 0.0005, "loss": 2.1107, "step": 174180 }, { "epoch": 0.6630101322290143, "grad_norm": 0.12149546295404434, "learning_rate": 0.0005, "loss": 2.1041, "step": 174190 }, { "epoch": 0.6630481946971369, "grad_norm": 0.11487753689289093, "learning_rate": 0.0005, "loss": 2.1182, "step": 174200 }, { "epoch": 0.6630862571652596, "grad_norm": 0.11515262722969055, "learning_rate": 0.0005, "loss": 2.1032, "step": 174210 }, { "epoch": 0.6631243196333823, "grad_norm": 0.12902528047561646, "learning_rate": 0.0005, "loss": 2.1123, "step": 174220 }, { "epoch": 0.663162382101505, "grad_norm": 0.12115098536014557, "learning_rate": 0.0005, "loss": 2.1172, "step": 174230 }, { "epoch": 0.6632004445696277, "grad_norm": 0.12723582983016968, "learning_rate": 0.0005, "loss": 2.1042, "step": 174240 }, { "epoch": 0.6632385070377503, "grad_norm": 0.12236615270376205, "learning_rate": 0.0005, "loss": 2.1119, "step": 174250 }, { "epoch": 0.663276569505873, "grad_norm": 0.135558620095253, "learning_rate": 0.0005, "loss": 2.1193, "step": 174260 }, { "epoch": 0.6633146319739958, "grad_norm": 0.11501994729042053, "learning_rate": 0.0005, "loss": 2.0963, "step": 174270 }, { "epoch": 0.6633526944421184, "grad_norm": 0.13081204891204834, "learning_rate": 0.0005, "loss": 2.1141, "step": 174280 }, { "epoch": 0.6633907569102411, "grad_norm": 0.13843820989131927, "learning_rate": 0.0005, "loss": 2.1114, "step": 174290 }, { "epoch": 0.6634288193783637, "grad_norm": 0.13076812028884888, "learning_rate": 0.0005, "loss": 2.1042, "step": 174300 }, { "epoch": 0.6634668818464865, "grad_norm": 0.12675538659095764, "learning_rate": 0.0005, "loss": 2.118, "step": 174310 }, { "epoch": 0.6635049443146092, "grad_norm": 0.12187536060810089, "learning_rate": 0.0005, "loss": 2.1079, "step": 174320 }, { "epoch": 0.6635430067827318, "grad_norm": 0.11600963771343231, "learning_rate": 0.0005, "loss": 2.1081, "step": 174330 }, { "epoch": 0.6635810692508545, "grad_norm": 0.12720301747322083, "learning_rate": 0.0005, "loss": 2.1168, "step": 174340 }, { "epoch": 0.6636191317189772, "grad_norm": 0.1272958666086197, "learning_rate": 0.0005, "loss": 2.1202, "step": 174350 }, { "epoch": 0.6636571941870999, "grad_norm": 0.11979950964450836, "learning_rate": 0.0005, "loss": 2.1165, "step": 174360 }, { "epoch": 0.6636952566552226, "grad_norm": 0.132895827293396, "learning_rate": 0.0005, "loss": 2.123, "step": 174370 }, { "epoch": 0.6637333191233452, "grad_norm": 0.139450341463089, "learning_rate": 0.0005, "loss": 2.107, "step": 174380 }, { "epoch": 0.6637713815914679, "grad_norm": 0.12335019558668137, "learning_rate": 0.0005, "loss": 2.1039, "step": 174390 }, { "epoch": 0.6638094440595906, "grad_norm": 0.11418977379798889, "learning_rate": 0.0005, "loss": 2.1084, "step": 174400 }, { "epoch": 0.6638475065277133, "grad_norm": 0.1271081268787384, "learning_rate": 0.0005, "loss": 2.0918, "step": 174410 }, { "epoch": 0.663885568995836, "grad_norm": 0.13359275460243225, "learning_rate": 0.0005, "loss": 2.1023, "step": 174420 }, { "epoch": 0.6639236314639586, "grad_norm": 0.12090206891298294, "learning_rate": 0.0005, "loss": 2.1176, "step": 174430 }, { "epoch": 0.6639616939320814, "grad_norm": 0.11663159728050232, "learning_rate": 0.0005, "loss": 2.1151, "step": 174440 }, { "epoch": 0.663999756400204, "grad_norm": 0.11631562560796738, "learning_rate": 0.0005, "loss": 2.1053, "step": 174450 }, { "epoch": 0.6640378188683267, "grad_norm": 0.12493157386779785, "learning_rate": 0.0005, "loss": 2.109, "step": 174460 }, { "epoch": 0.6640758813364493, "grad_norm": 0.1239357739686966, "learning_rate": 0.0005, "loss": 2.1181, "step": 174470 }, { "epoch": 0.6641139438045721, "grad_norm": 0.1262454092502594, "learning_rate": 0.0005, "loss": 2.1038, "step": 174480 }, { "epoch": 0.6641520062726948, "grad_norm": 0.12332665920257568, "learning_rate": 0.0005, "loss": 2.1027, "step": 174490 }, { "epoch": 0.6641900687408174, "grad_norm": 0.13212019205093384, "learning_rate": 0.0005, "loss": 2.0915, "step": 174500 }, { "epoch": 0.6642281312089401, "grad_norm": 0.13737590610980988, "learning_rate": 0.0005, "loss": 2.1131, "step": 174510 }, { "epoch": 0.6642661936770627, "grad_norm": 0.12693600356578827, "learning_rate": 0.0005, "loss": 2.1154, "step": 174520 }, { "epoch": 0.6643042561451855, "grad_norm": 0.14551861584186554, "learning_rate": 0.0005, "loss": 2.1057, "step": 174530 }, { "epoch": 0.6643423186133082, "grad_norm": 0.11993174999952316, "learning_rate": 0.0005, "loss": 2.1078, "step": 174540 }, { "epoch": 0.6643803810814308, "grad_norm": 0.11988063901662827, "learning_rate": 0.0005, "loss": 2.0993, "step": 174550 }, { "epoch": 0.6644184435495535, "grad_norm": 0.11576548963785172, "learning_rate": 0.0005, "loss": 2.0968, "step": 174560 }, { "epoch": 0.6644565060176763, "grad_norm": 0.1163567528128624, "learning_rate": 0.0005, "loss": 2.1101, "step": 174570 }, { "epoch": 0.6644945684857989, "grad_norm": 0.11626116186380386, "learning_rate": 0.0005, "loss": 2.1048, "step": 174580 }, { "epoch": 0.6645326309539216, "grad_norm": 0.13025660812854767, "learning_rate": 0.0005, "loss": 2.115, "step": 174590 }, { "epoch": 0.6645706934220442, "grad_norm": 0.1261567324399948, "learning_rate": 0.0005, "loss": 2.1225, "step": 174600 }, { "epoch": 0.664608755890167, "grad_norm": 0.1174742728471756, "learning_rate": 0.0005, "loss": 2.1097, "step": 174610 }, { "epoch": 0.6646468183582896, "grad_norm": 0.12801344692707062, "learning_rate": 0.0005, "loss": 2.1004, "step": 174620 }, { "epoch": 0.6646848808264123, "grad_norm": 0.11963155120611191, "learning_rate": 0.0005, "loss": 2.1134, "step": 174630 }, { "epoch": 0.664722943294535, "grad_norm": 0.14293743669986725, "learning_rate": 0.0005, "loss": 2.1111, "step": 174640 }, { "epoch": 0.6647610057626576, "grad_norm": 0.12802202999591827, "learning_rate": 0.0005, "loss": 2.1133, "step": 174650 }, { "epoch": 0.6647990682307804, "grad_norm": 0.13224215805530548, "learning_rate": 0.0005, "loss": 2.1151, "step": 174660 }, { "epoch": 0.664837130698903, "grad_norm": 0.1264164298772812, "learning_rate": 0.0005, "loss": 2.1046, "step": 174670 }, { "epoch": 0.6648751931670257, "grad_norm": 0.12990862131118774, "learning_rate": 0.0005, "loss": 2.1192, "step": 174680 }, { "epoch": 0.6649132556351484, "grad_norm": 0.11904025822877884, "learning_rate": 0.0005, "loss": 2.1102, "step": 174690 }, { "epoch": 0.6649513181032711, "grad_norm": 0.1135101467370987, "learning_rate": 0.0005, "loss": 2.1081, "step": 174700 }, { "epoch": 0.6649893805713938, "grad_norm": 0.12086432427167892, "learning_rate": 0.0005, "loss": 2.1223, "step": 174710 }, { "epoch": 0.6650274430395164, "grad_norm": 0.13415375351905823, "learning_rate": 0.0005, "loss": 2.1246, "step": 174720 }, { "epoch": 0.6650655055076391, "grad_norm": 0.29723674058914185, "learning_rate": 0.0005, "loss": 2.0995, "step": 174730 }, { "epoch": 0.6651035679757619, "grad_norm": 0.12538795173168182, "learning_rate": 0.0005, "loss": 2.136, "step": 174740 }, { "epoch": 0.6651416304438845, "grad_norm": 0.11672871559858322, "learning_rate": 0.0005, "loss": 2.1099, "step": 174750 }, { "epoch": 0.6651796929120072, "grad_norm": 0.12080489099025726, "learning_rate": 0.0005, "loss": 2.1198, "step": 174760 }, { "epoch": 0.6652177553801298, "grad_norm": 0.12584912776947021, "learning_rate": 0.0005, "loss": 2.1116, "step": 174770 }, { "epoch": 0.6652558178482526, "grad_norm": 0.12897630035877228, "learning_rate": 0.0005, "loss": 2.1064, "step": 174780 }, { "epoch": 0.6652938803163753, "grad_norm": 0.11987827718257904, "learning_rate": 0.0005, "loss": 2.1091, "step": 174790 }, { "epoch": 0.6653319427844979, "grad_norm": 0.12624815106391907, "learning_rate": 0.0005, "loss": 2.111, "step": 174800 }, { "epoch": 0.6653700052526206, "grad_norm": 0.11816033720970154, "learning_rate": 0.0005, "loss": 2.1176, "step": 174810 }, { "epoch": 0.6654080677207432, "grad_norm": 0.12806962430477142, "learning_rate": 0.0005, "loss": 2.1105, "step": 174820 }, { "epoch": 0.665446130188866, "grad_norm": 0.13350574672222137, "learning_rate": 0.0005, "loss": 2.0976, "step": 174830 }, { "epoch": 0.6654841926569887, "grad_norm": 0.12472176551818848, "learning_rate": 0.0005, "loss": 2.1141, "step": 174840 }, { "epoch": 0.6655222551251113, "grad_norm": 0.11982429772615433, "learning_rate": 0.0005, "loss": 2.0889, "step": 174850 }, { "epoch": 0.665560317593234, "grad_norm": 0.11519755423069, "learning_rate": 0.0005, "loss": 2.1061, "step": 174860 }, { "epoch": 0.6655983800613567, "grad_norm": 0.12131005525588989, "learning_rate": 0.0005, "loss": 2.103, "step": 174870 }, { "epoch": 0.6656364425294794, "grad_norm": 0.13292309641838074, "learning_rate": 0.0005, "loss": 2.1126, "step": 174880 }, { "epoch": 0.6656745049976021, "grad_norm": 0.13719289004802704, "learning_rate": 0.0005, "loss": 2.1095, "step": 174890 }, { "epoch": 0.6657125674657247, "grad_norm": 0.1265268474817276, "learning_rate": 0.0005, "loss": 2.1109, "step": 174900 }, { "epoch": 0.6657506299338475, "grad_norm": 0.11568540334701538, "learning_rate": 0.0005, "loss": 2.1129, "step": 174910 }, { "epoch": 0.6657886924019701, "grad_norm": 0.12374504655599594, "learning_rate": 0.0005, "loss": 2.1051, "step": 174920 }, { "epoch": 0.6658267548700928, "grad_norm": 0.1308293640613556, "learning_rate": 0.0005, "loss": 2.0927, "step": 174930 }, { "epoch": 0.6658648173382155, "grad_norm": 0.12477356940507889, "learning_rate": 0.0005, "loss": 2.1067, "step": 174940 }, { "epoch": 0.6659028798063381, "grad_norm": 0.1219056025147438, "learning_rate": 0.0005, "loss": 2.0891, "step": 174950 }, { "epoch": 0.6659409422744609, "grad_norm": 0.11442694067955017, "learning_rate": 0.0005, "loss": 2.1126, "step": 174960 }, { "epoch": 0.6659790047425835, "grad_norm": 0.11663993448019028, "learning_rate": 0.0005, "loss": 2.1116, "step": 174970 }, { "epoch": 0.6660170672107062, "grad_norm": 0.11952158063650131, "learning_rate": 0.0005, "loss": 2.0908, "step": 174980 }, { "epoch": 0.6660551296788289, "grad_norm": 0.12595412135124207, "learning_rate": 0.0005, "loss": 2.0944, "step": 174990 }, { "epoch": 0.6660931921469516, "grad_norm": 0.12811562418937683, "learning_rate": 0.0005, "loss": 2.1061, "step": 175000 }, { "epoch": 0.6661312546150743, "grad_norm": 0.11032678186893463, "learning_rate": 0.0005, "loss": 2.1114, "step": 175010 }, { "epoch": 0.6661693170831969, "grad_norm": 0.13032512366771698, "learning_rate": 0.0005, "loss": 2.1128, "step": 175020 }, { "epoch": 0.6662073795513196, "grad_norm": 0.1279965043067932, "learning_rate": 0.0005, "loss": 2.1153, "step": 175030 }, { "epoch": 0.6662454420194424, "grad_norm": 0.12894755601882935, "learning_rate": 0.0005, "loss": 2.1081, "step": 175040 }, { "epoch": 0.666283504487565, "grad_norm": 0.13770896196365356, "learning_rate": 0.0005, "loss": 2.0926, "step": 175050 }, { "epoch": 0.6663215669556877, "grad_norm": 0.12088262289762497, "learning_rate": 0.0005, "loss": 2.1024, "step": 175060 }, { "epoch": 0.6663596294238103, "grad_norm": 0.13149189949035645, "learning_rate": 0.0005, "loss": 2.114, "step": 175070 }, { "epoch": 0.666397691891933, "grad_norm": 0.11546579003334045, "learning_rate": 0.0005, "loss": 2.0955, "step": 175080 }, { "epoch": 0.6664357543600558, "grad_norm": 0.12365476787090302, "learning_rate": 0.0005, "loss": 2.1146, "step": 175090 }, { "epoch": 0.6664738168281784, "grad_norm": 0.12172803282737732, "learning_rate": 0.0005, "loss": 2.0895, "step": 175100 }, { "epoch": 0.6665118792963011, "grad_norm": 0.1241191178560257, "learning_rate": 0.0005, "loss": 2.1075, "step": 175110 }, { "epoch": 0.6665499417644237, "grad_norm": 0.13243572413921356, "learning_rate": 0.0005, "loss": 2.1226, "step": 175120 }, { "epoch": 0.6665880042325465, "grad_norm": 0.11853521317243576, "learning_rate": 0.0005, "loss": 2.1064, "step": 175130 }, { "epoch": 0.6666260667006692, "grad_norm": 0.12455012649297714, "learning_rate": 0.0005, "loss": 2.094, "step": 175140 }, { "epoch": 0.6666641291687918, "grad_norm": 0.1241413950920105, "learning_rate": 0.0005, "loss": 2.1038, "step": 175150 }, { "epoch": 0.6667021916369145, "grad_norm": 0.1175464391708374, "learning_rate": 0.0005, "loss": 2.1145, "step": 175160 }, { "epoch": 0.6667402541050372, "grad_norm": 0.12016557157039642, "learning_rate": 0.0005, "loss": 2.1145, "step": 175170 }, { "epoch": 0.6667783165731599, "grad_norm": 0.11607896536588669, "learning_rate": 0.0005, "loss": 2.1247, "step": 175180 }, { "epoch": 0.6668163790412825, "grad_norm": 0.11710387468338013, "learning_rate": 0.0005, "loss": 2.1154, "step": 175190 }, { "epoch": 0.6668544415094052, "grad_norm": 0.13466401398181915, "learning_rate": 0.0005, "loss": 2.1178, "step": 175200 }, { "epoch": 0.666892503977528, "grad_norm": 0.13053859770298004, "learning_rate": 0.0005, "loss": 2.1292, "step": 175210 }, { "epoch": 0.6669305664456506, "grad_norm": 0.13781042397022247, "learning_rate": 0.0005, "loss": 2.1124, "step": 175220 }, { "epoch": 0.6669686289137733, "grad_norm": 0.11386517435312271, "learning_rate": 0.0005, "loss": 2.1107, "step": 175230 }, { "epoch": 0.6670066913818959, "grad_norm": 0.12546059489250183, "learning_rate": 0.0005, "loss": 2.129, "step": 175240 }, { "epoch": 0.6670447538500186, "grad_norm": 0.11651482433080673, "learning_rate": 0.0005, "loss": 2.1138, "step": 175250 }, { "epoch": 0.6670828163181414, "grad_norm": 0.1311427354812622, "learning_rate": 0.0005, "loss": 2.114, "step": 175260 }, { "epoch": 0.667120878786264, "grad_norm": 0.1285676211118698, "learning_rate": 0.0005, "loss": 2.1156, "step": 175270 }, { "epoch": 0.6671589412543867, "grad_norm": 0.11666958779096603, "learning_rate": 0.0005, "loss": 2.1114, "step": 175280 }, { "epoch": 0.6671970037225093, "grad_norm": 0.12320812791585922, "learning_rate": 0.0005, "loss": 2.1002, "step": 175290 }, { "epoch": 0.6672350661906321, "grad_norm": 0.14052678644657135, "learning_rate": 0.0005, "loss": 2.099, "step": 175300 }, { "epoch": 0.6672731286587548, "grad_norm": 0.12256249040365219, "learning_rate": 0.0005, "loss": 2.1103, "step": 175310 }, { "epoch": 0.6673111911268774, "grad_norm": 0.13572126626968384, "learning_rate": 0.0005, "loss": 2.105, "step": 175320 }, { "epoch": 0.6673492535950001, "grad_norm": 0.13958248496055603, "learning_rate": 0.0005, "loss": 2.0991, "step": 175330 }, { "epoch": 0.6673873160631228, "grad_norm": 0.12266869097948074, "learning_rate": 0.0005, "loss": 2.1041, "step": 175340 }, { "epoch": 0.6674253785312455, "grad_norm": 0.11703468859195709, "learning_rate": 0.0005, "loss": 2.0954, "step": 175350 }, { "epoch": 0.6674634409993682, "grad_norm": 0.12484674155712128, "learning_rate": 0.0005, "loss": 2.1063, "step": 175360 }, { "epoch": 0.6675015034674908, "grad_norm": 0.20197418332099915, "learning_rate": 0.0005, "loss": 2.1273, "step": 175370 }, { "epoch": 0.6675395659356135, "grad_norm": 0.11558258533477783, "learning_rate": 0.0005, "loss": 2.1206, "step": 175380 }, { "epoch": 0.6675776284037362, "grad_norm": 0.11437583714723587, "learning_rate": 0.0005, "loss": 2.109, "step": 175390 }, { "epoch": 0.6676156908718589, "grad_norm": 0.11593539267778397, "learning_rate": 0.0005, "loss": 2.1056, "step": 175400 }, { "epoch": 0.6676537533399816, "grad_norm": 0.13235677778720856, "learning_rate": 0.0005, "loss": 2.0951, "step": 175410 }, { "epoch": 0.6676918158081042, "grad_norm": 0.13780459761619568, "learning_rate": 0.0005, "loss": 2.1091, "step": 175420 }, { "epoch": 0.667729878276227, "grad_norm": 0.1119593009352684, "learning_rate": 0.0005, "loss": 2.0994, "step": 175430 }, { "epoch": 0.6677679407443496, "grad_norm": 0.11851577460765839, "learning_rate": 0.0005, "loss": 2.1119, "step": 175440 }, { "epoch": 0.6678060032124723, "grad_norm": 0.12241306155920029, "learning_rate": 0.0005, "loss": 2.0935, "step": 175450 }, { "epoch": 0.667844065680595, "grad_norm": 0.11685778200626373, "learning_rate": 0.0005, "loss": 2.1102, "step": 175460 }, { "epoch": 0.6678821281487177, "grad_norm": 0.1231299638748169, "learning_rate": 0.0005, "loss": 2.1075, "step": 175470 }, { "epoch": 0.6679201906168404, "grad_norm": 0.1288386881351471, "learning_rate": 0.0005, "loss": 2.0983, "step": 175480 }, { "epoch": 0.667958253084963, "grad_norm": 0.14113663136959076, "learning_rate": 0.0005, "loss": 2.1183, "step": 175490 }, { "epoch": 0.6679963155530857, "grad_norm": 0.12154048681259155, "learning_rate": 0.0005, "loss": 2.1015, "step": 175500 }, { "epoch": 0.6680343780212084, "grad_norm": 0.11648967117071152, "learning_rate": 0.0005, "loss": 2.1037, "step": 175510 }, { "epoch": 0.6680724404893311, "grad_norm": 0.1199147179722786, "learning_rate": 0.0005, "loss": 2.1099, "step": 175520 }, { "epoch": 0.6681105029574538, "grad_norm": 0.1248493567109108, "learning_rate": 0.0005, "loss": 2.0968, "step": 175530 }, { "epoch": 0.6681485654255764, "grad_norm": 0.12896087765693665, "learning_rate": 0.0005, "loss": 2.1064, "step": 175540 }, { "epoch": 0.6681866278936991, "grad_norm": 0.11197390407323837, "learning_rate": 0.0005, "loss": 2.0893, "step": 175550 }, { "epoch": 0.6682246903618219, "grad_norm": 0.1222924143075943, "learning_rate": 0.0005, "loss": 2.1083, "step": 175560 }, { "epoch": 0.6682627528299445, "grad_norm": 0.12692193686962128, "learning_rate": 0.0005, "loss": 2.0893, "step": 175570 }, { "epoch": 0.6683008152980672, "grad_norm": 0.12094826251268387, "learning_rate": 0.0005, "loss": 2.1149, "step": 175580 }, { "epoch": 0.6683388777661898, "grad_norm": 0.12729638814926147, "learning_rate": 0.0005, "loss": 2.1072, "step": 175590 }, { "epoch": 0.6683769402343126, "grad_norm": 0.12060044705867767, "learning_rate": 0.0005, "loss": 2.1102, "step": 175600 }, { "epoch": 0.6684150027024353, "grad_norm": 0.11878161132335663, "learning_rate": 0.0005, "loss": 2.1154, "step": 175610 }, { "epoch": 0.6684530651705579, "grad_norm": 0.12017234414815903, "learning_rate": 0.0005, "loss": 2.1162, "step": 175620 }, { "epoch": 0.6684911276386806, "grad_norm": 0.13166853785514832, "learning_rate": 0.0005, "loss": 2.1078, "step": 175630 }, { "epoch": 0.6685291901068033, "grad_norm": 0.1585860550403595, "learning_rate": 0.0005, "loss": 2.0934, "step": 175640 }, { "epoch": 0.668567252574926, "grad_norm": 0.12804687023162842, "learning_rate": 0.0005, "loss": 2.1133, "step": 175650 }, { "epoch": 0.6686053150430487, "grad_norm": 0.121377132833004, "learning_rate": 0.0005, "loss": 2.1022, "step": 175660 }, { "epoch": 0.6686433775111713, "grad_norm": 0.11736253648996353, "learning_rate": 0.0005, "loss": 2.1081, "step": 175670 }, { "epoch": 0.668681439979294, "grad_norm": 0.11880221962928772, "learning_rate": 0.0005, "loss": 2.113, "step": 175680 }, { "epoch": 0.6687195024474167, "grad_norm": 0.29473286867141724, "learning_rate": 0.0005, "loss": 2.1127, "step": 175690 }, { "epoch": 0.6687575649155394, "grad_norm": 0.120498426258564, "learning_rate": 0.0005, "loss": 2.1156, "step": 175700 }, { "epoch": 0.668795627383662, "grad_norm": 0.13548476994037628, "learning_rate": 0.0005, "loss": 2.1101, "step": 175710 }, { "epoch": 0.6688336898517847, "grad_norm": 0.12871968746185303, "learning_rate": 0.0005, "loss": 2.1027, "step": 175720 }, { "epoch": 0.6688717523199075, "grad_norm": 0.1298273652791977, "learning_rate": 0.0005, "loss": 2.1079, "step": 175730 }, { "epoch": 0.6689098147880301, "grad_norm": 0.14113155007362366, "learning_rate": 0.0005, "loss": 2.1081, "step": 175740 }, { "epoch": 0.6689478772561528, "grad_norm": 0.11712589859962463, "learning_rate": 0.0005, "loss": 2.1179, "step": 175750 }, { "epoch": 0.6689859397242754, "grad_norm": 0.12501053512096405, "learning_rate": 0.0005, "loss": 2.1106, "step": 175760 }, { "epoch": 0.6690240021923982, "grad_norm": 0.13220301270484924, "learning_rate": 0.0005, "loss": 2.1226, "step": 175770 }, { "epoch": 0.6690620646605209, "grad_norm": 0.13822099566459656, "learning_rate": 0.0005, "loss": 2.0945, "step": 175780 }, { "epoch": 0.6691001271286435, "grad_norm": 0.11559640616178513, "learning_rate": 0.0005, "loss": 2.1263, "step": 175790 }, { "epoch": 0.6691381895967662, "grad_norm": 0.12654858827590942, "learning_rate": 0.0005, "loss": 2.1041, "step": 175800 }, { "epoch": 0.6691762520648888, "grad_norm": 0.12716756761074066, "learning_rate": 0.0005, "loss": 2.1205, "step": 175810 }, { "epoch": 0.6692143145330116, "grad_norm": 0.1146780326962471, "learning_rate": 0.0005, "loss": 2.1049, "step": 175820 }, { "epoch": 0.6692523770011343, "grad_norm": 0.13051363825798035, "learning_rate": 0.0005, "loss": 2.1225, "step": 175830 }, { "epoch": 0.6692904394692569, "grad_norm": 0.4045941233634949, "learning_rate": 0.0005, "loss": 2.1215, "step": 175840 }, { "epoch": 0.6693285019373796, "grad_norm": 0.12739895284175873, "learning_rate": 0.0005, "loss": 2.1282, "step": 175850 }, { "epoch": 0.6693665644055024, "grad_norm": 0.12500180304050446, "learning_rate": 0.0005, "loss": 2.1059, "step": 175860 }, { "epoch": 0.669404626873625, "grad_norm": 0.12794901430606842, "learning_rate": 0.0005, "loss": 2.1187, "step": 175870 }, { "epoch": 0.6694426893417477, "grad_norm": 0.12256273627281189, "learning_rate": 0.0005, "loss": 2.1224, "step": 175880 }, { "epoch": 0.6694807518098703, "grad_norm": 0.1283980906009674, "learning_rate": 0.0005, "loss": 2.1071, "step": 175890 }, { "epoch": 0.6695188142779931, "grad_norm": 0.12353162467479706, "learning_rate": 0.0005, "loss": 2.1085, "step": 175900 }, { "epoch": 0.6695568767461157, "grad_norm": 0.1328476071357727, "learning_rate": 0.0005, "loss": 2.1066, "step": 175910 }, { "epoch": 0.6695949392142384, "grad_norm": 0.13343828916549683, "learning_rate": 0.0005, "loss": 2.1043, "step": 175920 }, { "epoch": 0.6696330016823611, "grad_norm": 0.1255650371313095, "learning_rate": 0.0005, "loss": 2.0965, "step": 175930 }, { "epoch": 0.6696710641504837, "grad_norm": 0.13649240136146545, "learning_rate": 0.0005, "loss": 2.1138, "step": 175940 }, { "epoch": 0.6697091266186065, "grad_norm": 0.12300781905651093, "learning_rate": 0.0005, "loss": 2.0956, "step": 175950 }, { "epoch": 0.6697471890867291, "grad_norm": 0.1329295039176941, "learning_rate": 0.0005, "loss": 2.1163, "step": 175960 }, { "epoch": 0.6697852515548518, "grad_norm": 0.12757021188735962, "learning_rate": 0.0005, "loss": 2.0984, "step": 175970 }, { "epoch": 0.6698233140229745, "grad_norm": 0.11467017978429794, "learning_rate": 0.0005, "loss": 2.1032, "step": 175980 }, { "epoch": 0.6698613764910972, "grad_norm": 0.1320362240076065, "learning_rate": 0.0005, "loss": 2.1127, "step": 175990 }, { "epoch": 0.6698994389592199, "grad_norm": 0.11636713147163391, "learning_rate": 0.0005, "loss": 2.0993, "step": 176000 }, { "epoch": 0.6699375014273425, "grad_norm": 0.11335117369890213, "learning_rate": 0.0005, "loss": 2.1074, "step": 176010 }, { "epoch": 0.6699755638954652, "grad_norm": 0.12051542848348618, "learning_rate": 0.0005, "loss": 2.1075, "step": 176020 }, { "epoch": 0.670013626363588, "grad_norm": 0.11330889165401459, "learning_rate": 0.0005, "loss": 2.1101, "step": 176030 }, { "epoch": 0.6700516888317106, "grad_norm": 0.11903239041566849, "learning_rate": 0.0005, "loss": 2.1233, "step": 176040 }, { "epoch": 0.6700897512998333, "grad_norm": 0.12131945043802261, "learning_rate": 0.0005, "loss": 2.1034, "step": 176050 }, { "epoch": 0.6701278137679559, "grad_norm": 0.1258198618888855, "learning_rate": 0.0005, "loss": 2.1047, "step": 176060 }, { "epoch": 0.6701658762360787, "grad_norm": 0.12380845844745636, "learning_rate": 0.0005, "loss": 2.1089, "step": 176070 }, { "epoch": 0.6702039387042014, "grad_norm": 0.11659497767686844, "learning_rate": 0.0005, "loss": 2.112, "step": 176080 }, { "epoch": 0.670242001172324, "grad_norm": 0.12323492020368576, "learning_rate": 0.0005, "loss": 2.1003, "step": 176090 }, { "epoch": 0.6702800636404467, "grad_norm": 0.1283530592918396, "learning_rate": 0.0005, "loss": 2.1057, "step": 176100 }, { "epoch": 0.6703181261085693, "grad_norm": 0.12053488940000534, "learning_rate": 0.0005, "loss": 2.1125, "step": 176110 }, { "epoch": 0.6703561885766921, "grad_norm": 0.12956029176712036, "learning_rate": 0.0005, "loss": 2.0991, "step": 176120 }, { "epoch": 0.6703942510448148, "grad_norm": 0.13247571885585785, "learning_rate": 0.0005, "loss": 2.0964, "step": 176130 }, { "epoch": 0.6704323135129374, "grad_norm": 0.11778081953525543, "learning_rate": 0.0005, "loss": 2.1072, "step": 176140 }, { "epoch": 0.6704703759810601, "grad_norm": 0.11774902790784836, "learning_rate": 0.0005, "loss": 2.1163, "step": 176150 }, { "epoch": 0.6705084384491828, "grad_norm": 0.12733864784240723, "learning_rate": 0.0005, "loss": 2.1001, "step": 176160 }, { "epoch": 0.6705465009173055, "grad_norm": 0.12119722366333008, "learning_rate": 0.0005, "loss": 2.0957, "step": 176170 }, { "epoch": 0.6705845633854282, "grad_norm": 0.1252257525920868, "learning_rate": 0.0005, "loss": 2.1214, "step": 176180 }, { "epoch": 0.6706226258535508, "grad_norm": 0.1208445280790329, "learning_rate": 0.0005, "loss": 2.1275, "step": 176190 }, { "epoch": 0.6706606883216736, "grad_norm": 0.14327453076839447, "learning_rate": 0.0005, "loss": 2.1069, "step": 176200 }, { "epoch": 0.6706987507897962, "grad_norm": 0.1284928023815155, "learning_rate": 0.0005, "loss": 2.1228, "step": 176210 }, { "epoch": 0.6707368132579189, "grad_norm": 0.1296626329421997, "learning_rate": 0.0005, "loss": 2.1101, "step": 176220 }, { "epoch": 0.6707748757260416, "grad_norm": 0.14004792273044586, "learning_rate": 0.0005, "loss": 2.1217, "step": 176230 }, { "epoch": 0.6708129381941642, "grad_norm": 0.1218409538269043, "learning_rate": 0.0005, "loss": 2.1109, "step": 176240 }, { "epoch": 0.670851000662287, "grad_norm": 0.12494488805532455, "learning_rate": 0.0005, "loss": 2.1173, "step": 176250 }, { "epoch": 0.6708890631304096, "grad_norm": 0.1340954452753067, "learning_rate": 0.0005, "loss": 2.1202, "step": 176260 }, { "epoch": 0.6709271255985323, "grad_norm": 0.13070081174373627, "learning_rate": 0.0005, "loss": 2.1153, "step": 176270 }, { "epoch": 0.670965188066655, "grad_norm": 0.1265803575515747, "learning_rate": 0.0005, "loss": 2.0996, "step": 176280 }, { "epoch": 0.6710032505347777, "grad_norm": 0.1216389462351799, "learning_rate": 0.0005, "loss": 2.1369, "step": 176290 }, { "epoch": 0.6710413130029004, "grad_norm": 0.12888644635677338, "learning_rate": 0.0005, "loss": 2.1167, "step": 176300 }, { "epoch": 0.671079375471023, "grad_norm": 0.12916290760040283, "learning_rate": 0.0005, "loss": 2.1074, "step": 176310 }, { "epoch": 0.6711174379391457, "grad_norm": 0.12547896802425385, "learning_rate": 0.0005, "loss": 2.1051, "step": 176320 }, { "epoch": 0.6711555004072685, "grad_norm": 0.122887521982193, "learning_rate": 0.0005, "loss": 2.1195, "step": 176330 }, { "epoch": 0.6711935628753911, "grad_norm": 0.13145895302295685, "learning_rate": 0.0005, "loss": 2.1202, "step": 176340 }, { "epoch": 0.6712316253435138, "grad_norm": 0.11573679000139236, "learning_rate": 0.0005, "loss": 2.1242, "step": 176350 }, { "epoch": 0.6712696878116364, "grad_norm": 0.12110036611557007, "learning_rate": 0.0005, "loss": 2.0973, "step": 176360 }, { "epoch": 0.6713077502797591, "grad_norm": 0.1120925024151802, "learning_rate": 0.0005, "loss": 2.096, "step": 176370 }, { "epoch": 0.6713458127478819, "grad_norm": 0.12430943548679352, "learning_rate": 0.0005, "loss": 2.1183, "step": 176380 }, { "epoch": 0.6713838752160045, "grad_norm": 0.11478005349636078, "learning_rate": 0.0005, "loss": 2.1029, "step": 176390 }, { "epoch": 0.6714219376841272, "grad_norm": 0.12330004572868347, "learning_rate": 0.0005, "loss": 2.097, "step": 176400 }, { "epoch": 0.6714600001522498, "grad_norm": 0.13458773493766785, "learning_rate": 0.0005, "loss": 2.1044, "step": 176410 }, { "epoch": 0.6714980626203726, "grad_norm": 0.12437058240175247, "learning_rate": 0.0005, "loss": 2.1027, "step": 176420 }, { "epoch": 0.6715361250884953, "grad_norm": 0.1219138354063034, "learning_rate": 0.0005, "loss": 2.111, "step": 176430 }, { "epoch": 0.6715741875566179, "grad_norm": 0.12024988234043121, "learning_rate": 0.0005, "loss": 2.0999, "step": 176440 }, { "epoch": 0.6716122500247406, "grad_norm": 0.12701474130153656, "learning_rate": 0.0005, "loss": 2.1175, "step": 176450 }, { "epoch": 0.6716503124928633, "grad_norm": 0.13415849208831787, "learning_rate": 0.0005, "loss": 2.1146, "step": 176460 }, { "epoch": 0.671688374960986, "grad_norm": 0.23941706120967865, "learning_rate": 0.0005, "loss": 2.1164, "step": 176470 }, { "epoch": 0.6717264374291086, "grad_norm": 0.12130671739578247, "learning_rate": 0.0005, "loss": 2.1288, "step": 176480 }, { "epoch": 0.6717644998972313, "grad_norm": 0.12588223814964294, "learning_rate": 0.0005, "loss": 2.1127, "step": 176490 }, { "epoch": 0.6718025623653541, "grad_norm": 0.12544754147529602, "learning_rate": 0.0005, "loss": 2.1114, "step": 176500 }, { "epoch": 0.6718406248334767, "grad_norm": 0.13205012679100037, "learning_rate": 0.0005, "loss": 2.1004, "step": 176510 }, { "epoch": 0.6718786873015994, "grad_norm": 0.12384752929210663, "learning_rate": 0.0005, "loss": 2.1159, "step": 176520 }, { "epoch": 0.671916749769722, "grad_norm": 0.12456027418375015, "learning_rate": 0.0005, "loss": 2.1037, "step": 176530 }, { "epoch": 0.6719548122378447, "grad_norm": 0.12547491490840912, "learning_rate": 0.0005, "loss": 2.0956, "step": 176540 }, { "epoch": 0.6719928747059675, "grad_norm": 0.12561234831809998, "learning_rate": 0.0005, "loss": 2.114, "step": 176550 }, { "epoch": 0.6720309371740901, "grad_norm": 0.11695345491170883, "learning_rate": 0.0005, "loss": 2.1189, "step": 176560 }, { "epoch": 0.6720689996422128, "grad_norm": 0.11866967380046844, "learning_rate": 0.0005, "loss": 2.1, "step": 176570 }, { "epoch": 0.6721070621103354, "grad_norm": 0.12426942586898804, "learning_rate": 0.0005, "loss": 2.1012, "step": 176580 }, { "epoch": 0.6721451245784582, "grad_norm": 0.12986068427562714, "learning_rate": 0.0005, "loss": 2.1189, "step": 176590 }, { "epoch": 0.6721831870465809, "grad_norm": 0.13200722634792328, "learning_rate": 0.0005, "loss": 2.0985, "step": 176600 }, { "epoch": 0.6722212495147035, "grad_norm": 0.12788031995296478, "learning_rate": 0.0005, "loss": 2.0862, "step": 176610 }, { "epoch": 0.6722593119828262, "grad_norm": 0.12382833659648895, "learning_rate": 0.0005, "loss": 2.0934, "step": 176620 }, { "epoch": 0.672297374450949, "grad_norm": 0.12163611501455307, "learning_rate": 0.0005, "loss": 2.098, "step": 176630 }, { "epoch": 0.6723354369190716, "grad_norm": 0.13965623080730438, "learning_rate": 0.0005, "loss": 2.1204, "step": 176640 }, { "epoch": 0.6723734993871943, "grad_norm": 0.11982572823762894, "learning_rate": 0.0005, "loss": 2.1156, "step": 176650 }, { "epoch": 0.6724115618553169, "grad_norm": 0.12545208632946014, "learning_rate": 0.0005, "loss": 2.1085, "step": 176660 }, { "epoch": 0.6724496243234396, "grad_norm": 0.11891097575426102, "learning_rate": 0.0005, "loss": 2.1059, "step": 176670 }, { "epoch": 0.6724876867915623, "grad_norm": 0.11695777624845505, "learning_rate": 0.0005, "loss": 2.112, "step": 176680 }, { "epoch": 0.672525749259685, "grad_norm": 0.12013732641935349, "learning_rate": 0.0005, "loss": 2.1132, "step": 176690 }, { "epoch": 0.6725638117278077, "grad_norm": 0.12866584956645966, "learning_rate": 0.0005, "loss": 2.1066, "step": 176700 }, { "epoch": 0.6726018741959303, "grad_norm": 0.11633819341659546, "learning_rate": 0.0005, "loss": 2.1024, "step": 176710 }, { "epoch": 0.6726399366640531, "grad_norm": 0.12568669021129608, "learning_rate": 0.0005, "loss": 2.1236, "step": 176720 }, { "epoch": 0.6726779991321757, "grad_norm": 0.12346602976322174, "learning_rate": 0.0005, "loss": 2.1193, "step": 176730 }, { "epoch": 0.6727160616002984, "grad_norm": 0.1353200525045395, "learning_rate": 0.0005, "loss": 2.1148, "step": 176740 }, { "epoch": 0.672754124068421, "grad_norm": 0.1326470673084259, "learning_rate": 0.0005, "loss": 2.1229, "step": 176750 }, { "epoch": 0.6727921865365438, "grad_norm": 0.12330491840839386, "learning_rate": 0.0005, "loss": 2.1143, "step": 176760 }, { "epoch": 0.6728302490046665, "grad_norm": 0.11250167340040207, "learning_rate": 0.0005, "loss": 2.0979, "step": 176770 }, { "epoch": 0.6728683114727891, "grad_norm": 0.13047778606414795, "learning_rate": 0.0005, "loss": 2.1103, "step": 176780 }, { "epoch": 0.6729063739409118, "grad_norm": 0.12224044650793076, "learning_rate": 0.0005, "loss": 2.1226, "step": 176790 }, { "epoch": 0.6729444364090345, "grad_norm": 0.11687453091144562, "learning_rate": 0.0005, "loss": 2.1119, "step": 176800 }, { "epoch": 0.6729824988771572, "grad_norm": 0.12105058878660202, "learning_rate": 0.0005, "loss": 2.111, "step": 176810 }, { "epoch": 0.6730205613452799, "grad_norm": 0.13791713118553162, "learning_rate": 0.0005, "loss": 2.1081, "step": 176820 }, { "epoch": 0.6730586238134025, "grad_norm": 0.14121802151203156, "learning_rate": 0.0005, "loss": 2.1022, "step": 176830 }, { "epoch": 0.6730966862815252, "grad_norm": 0.11477218568325043, "learning_rate": 0.0005, "loss": 2.1209, "step": 176840 }, { "epoch": 0.673134748749648, "grad_norm": 0.11514034867286682, "learning_rate": 0.0005, "loss": 2.1186, "step": 176850 }, { "epoch": 0.6731728112177706, "grad_norm": 0.13139691948890686, "learning_rate": 0.0005, "loss": 2.115, "step": 176860 }, { "epoch": 0.6732108736858933, "grad_norm": 0.13228735327720642, "learning_rate": 0.0005, "loss": 2.1127, "step": 176870 }, { "epoch": 0.6732489361540159, "grad_norm": 0.1252131164073944, "learning_rate": 0.0005, "loss": 2.1036, "step": 176880 }, { "epoch": 0.6732869986221387, "grad_norm": 0.11720944195985794, "learning_rate": 0.0005, "loss": 2.1225, "step": 176890 }, { "epoch": 0.6733250610902614, "grad_norm": 0.13159362971782684, "learning_rate": 0.0005, "loss": 2.1071, "step": 176900 }, { "epoch": 0.673363123558384, "grad_norm": 0.13194140791893005, "learning_rate": 0.0005, "loss": 2.1061, "step": 176910 }, { "epoch": 0.6734011860265067, "grad_norm": 0.125912606716156, "learning_rate": 0.0005, "loss": 2.1007, "step": 176920 }, { "epoch": 0.6734392484946294, "grad_norm": 0.12334372103214264, "learning_rate": 0.0005, "loss": 2.1074, "step": 176930 }, { "epoch": 0.6734773109627521, "grad_norm": 0.11493158340454102, "learning_rate": 0.0005, "loss": 2.1183, "step": 176940 }, { "epoch": 0.6735153734308748, "grad_norm": 0.11860795319080353, "learning_rate": 0.0005, "loss": 2.093, "step": 176950 }, { "epoch": 0.6735534358989974, "grad_norm": 0.1266835480928421, "learning_rate": 0.0005, "loss": 2.0922, "step": 176960 }, { "epoch": 0.6735914983671201, "grad_norm": 0.1444019377231598, "learning_rate": 0.0005, "loss": 2.1083, "step": 176970 }, { "epoch": 0.6736295608352428, "grad_norm": 0.12035851180553436, "learning_rate": 0.0005, "loss": 2.0961, "step": 176980 }, { "epoch": 0.6736676233033655, "grad_norm": 0.11523926258087158, "learning_rate": 0.0005, "loss": 2.0926, "step": 176990 }, { "epoch": 0.6737056857714881, "grad_norm": 0.12259594351053238, "learning_rate": 0.0005, "loss": 2.1127, "step": 177000 }, { "epoch": 0.6737437482396108, "grad_norm": 0.1452437937259674, "learning_rate": 0.0005, "loss": 2.1015, "step": 177010 }, { "epoch": 0.6737818107077336, "grad_norm": 0.12240692973136902, "learning_rate": 0.0005, "loss": 2.108, "step": 177020 }, { "epoch": 0.6738198731758562, "grad_norm": 0.11802651733160019, "learning_rate": 0.0005, "loss": 2.1062, "step": 177030 }, { "epoch": 0.6738579356439789, "grad_norm": 0.13415051996707916, "learning_rate": 0.0005, "loss": 2.1005, "step": 177040 }, { "epoch": 0.6738959981121015, "grad_norm": 0.11771440505981445, "learning_rate": 0.0005, "loss": 2.1094, "step": 177050 }, { "epoch": 0.6739340605802243, "grad_norm": 0.11489039659500122, "learning_rate": 0.0005, "loss": 2.1058, "step": 177060 }, { "epoch": 0.673972123048347, "grad_norm": 0.1397712081670761, "learning_rate": 0.0005, "loss": 2.1054, "step": 177070 }, { "epoch": 0.6740101855164696, "grad_norm": 0.12139667570590973, "learning_rate": 0.0005, "loss": 2.1066, "step": 177080 }, { "epoch": 0.6740482479845923, "grad_norm": 0.12221745401620865, "learning_rate": 0.0005, "loss": 2.1116, "step": 177090 }, { "epoch": 0.6740863104527149, "grad_norm": 0.12911903858184814, "learning_rate": 0.0005, "loss": 2.1199, "step": 177100 }, { "epoch": 0.6741243729208377, "grad_norm": 0.123677559196949, "learning_rate": 0.0005, "loss": 2.1078, "step": 177110 }, { "epoch": 0.6741624353889604, "grad_norm": 0.12174209207296371, "learning_rate": 0.0005, "loss": 2.0907, "step": 177120 }, { "epoch": 0.674200497857083, "grad_norm": 0.11167836934328079, "learning_rate": 0.0005, "loss": 2.1077, "step": 177130 }, { "epoch": 0.6742385603252057, "grad_norm": 0.12730008363723755, "learning_rate": 0.0005, "loss": 2.1175, "step": 177140 }, { "epoch": 0.6742766227933285, "grad_norm": 0.11940035223960876, "learning_rate": 0.0005, "loss": 2.1029, "step": 177150 }, { "epoch": 0.6743146852614511, "grad_norm": 0.11655905097723007, "learning_rate": 0.0005, "loss": 2.1189, "step": 177160 }, { "epoch": 0.6743527477295738, "grad_norm": 0.12851646542549133, "learning_rate": 0.0005, "loss": 2.1147, "step": 177170 }, { "epoch": 0.6743908101976964, "grad_norm": 0.11784732341766357, "learning_rate": 0.0005, "loss": 2.0976, "step": 177180 }, { "epoch": 0.6744288726658192, "grad_norm": 0.14028826355934143, "learning_rate": 0.0005, "loss": 2.1073, "step": 177190 }, { "epoch": 0.6744669351339418, "grad_norm": 0.131543830037117, "learning_rate": 0.0005, "loss": 2.1065, "step": 177200 }, { "epoch": 0.6745049976020645, "grad_norm": 0.16741855442523956, "learning_rate": 0.0005, "loss": 2.1226, "step": 177210 }, { "epoch": 0.6745430600701872, "grad_norm": 0.14125947654247284, "learning_rate": 0.0005, "loss": 2.1059, "step": 177220 }, { "epoch": 0.6745811225383099, "grad_norm": 0.1236991360783577, "learning_rate": 0.0005, "loss": 2.0987, "step": 177230 }, { "epoch": 0.6746191850064326, "grad_norm": 0.11668677628040314, "learning_rate": 0.0005, "loss": 2.1244, "step": 177240 }, { "epoch": 0.6746572474745552, "grad_norm": 0.1334819495677948, "learning_rate": 0.0005, "loss": 2.1149, "step": 177250 }, { "epoch": 0.6746953099426779, "grad_norm": 0.12094828486442566, "learning_rate": 0.0005, "loss": 2.1099, "step": 177260 }, { "epoch": 0.6747333724108006, "grad_norm": 0.14832857251167297, "learning_rate": 0.0005, "loss": 2.112, "step": 177270 }, { "epoch": 0.6747714348789233, "grad_norm": 0.15081220865249634, "learning_rate": 0.0005, "loss": 2.1203, "step": 177280 }, { "epoch": 0.674809497347046, "grad_norm": 0.12344935536384583, "learning_rate": 0.0005, "loss": 2.1156, "step": 177290 }, { "epoch": 0.6748475598151686, "grad_norm": 0.13707692921161652, "learning_rate": 0.0005, "loss": 2.1217, "step": 177300 }, { "epoch": 0.6748856222832913, "grad_norm": 0.11331111192703247, "learning_rate": 0.0005, "loss": 2.1095, "step": 177310 }, { "epoch": 0.6749236847514141, "grad_norm": 0.13219937682151794, "learning_rate": 0.0005, "loss": 2.1138, "step": 177320 }, { "epoch": 0.6749617472195367, "grad_norm": 0.12428843975067139, "learning_rate": 0.0005, "loss": 2.1155, "step": 177330 }, { "epoch": 0.6749998096876594, "grad_norm": 0.1201782301068306, "learning_rate": 0.0005, "loss": 2.0849, "step": 177340 }, { "epoch": 0.675037872155782, "grad_norm": 0.13102507591247559, "learning_rate": 0.0005, "loss": 2.1091, "step": 177350 }, { "epoch": 0.6750759346239048, "grad_norm": 0.12345290929079056, "learning_rate": 0.0005, "loss": 2.1051, "step": 177360 }, { "epoch": 0.6751139970920275, "grad_norm": 0.11714234948158264, "learning_rate": 0.0005, "loss": 2.0884, "step": 177370 }, { "epoch": 0.6751520595601501, "grad_norm": 0.11517336964607239, "learning_rate": 0.0005, "loss": 2.1208, "step": 177380 }, { "epoch": 0.6751901220282728, "grad_norm": 0.11558566987514496, "learning_rate": 0.0005, "loss": 2.1133, "step": 177390 }, { "epoch": 0.6752281844963954, "grad_norm": 0.12705738842487335, "learning_rate": 0.0005, "loss": 2.1087, "step": 177400 }, { "epoch": 0.6752662469645182, "grad_norm": 0.12877146899700165, "learning_rate": 0.0005, "loss": 2.1203, "step": 177410 }, { "epoch": 0.6753043094326409, "grad_norm": 0.12679651379585266, "learning_rate": 0.0005, "loss": 2.1023, "step": 177420 }, { "epoch": 0.6753423719007635, "grad_norm": 0.1234024167060852, "learning_rate": 0.0005, "loss": 2.1067, "step": 177430 }, { "epoch": 0.6753804343688862, "grad_norm": 0.12910224497318268, "learning_rate": 0.0005, "loss": 2.1187, "step": 177440 }, { "epoch": 0.6754184968370089, "grad_norm": 0.12047523260116577, "learning_rate": 0.0005, "loss": 2.1126, "step": 177450 }, { "epoch": 0.6754565593051316, "grad_norm": 0.13041920959949493, "learning_rate": 0.0005, "loss": 2.1041, "step": 177460 }, { "epoch": 0.6754946217732543, "grad_norm": 0.1203002780675888, "learning_rate": 0.0005, "loss": 2.1104, "step": 177470 }, { "epoch": 0.6755326842413769, "grad_norm": 0.114230215549469, "learning_rate": 0.0005, "loss": 2.1171, "step": 177480 }, { "epoch": 0.6755707467094997, "grad_norm": 0.12507209181785583, "learning_rate": 0.0005, "loss": 2.1147, "step": 177490 }, { "epoch": 0.6756088091776223, "grad_norm": 0.13496264815330505, "learning_rate": 0.0005, "loss": 2.1165, "step": 177500 }, { "epoch": 0.675646871645745, "grad_norm": 0.14469777047634125, "learning_rate": 0.0005, "loss": 2.1155, "step": 177510 }, { "epoch": 0.6756849341138677, "grad_norm": 0.12933559715747833, "learning_rate": 0.0005, "loss": 2.1349, "step": 177520 }, { "epoch": 0.6757229965819903, "grad_norm": 0.1213747188448906, "learning_rate": 0.0005, "loss": 2.1074, "step": 177530 }, { "epoch": 0.6757610590501131, "grad_norm": 0.1251504123210907, "learning_rate": 0.0005, "loss": 2.1002, "step": 177540 }, { "epoch": 0.6757991215182357, "grad_norm": 0.13115982711315155, "learning_rate": 0.0005, "loss": 2.0957, "step": 177550 }, { "epoch": 0.6758371839863584, "grad_norm": 0.12193156778812408, "learning_rate": 0.0005, "loss": 2.1218, "step": 177560 }, { "epoch": 0.675875246454481, "grad_norm": 0.11421877890825272, "learning_rate": 0.0005, "loss": 2.1042, "step": 177570 }, { "epoch": 0.6759133089226038, "grad_norm": 0.11429056525230408, "learning_rate": 0.0005, "loss": 2.1144, "step": 177580 }, { "epoch": 0.6759513713907265, "grad_norm": 0.11777948588132858, "learning_rate": 0.0005, "loss": 2.1106, "step": 177590 }, { "epoch": 0.6759894338588491, "grad_norm": 0.1239769235253334, "learning_rate": 0.0005, "loss": 2.1198, "step": 177600 }, { "epoch": 0.6760274963269718, "grad_norm": 0.11587876081466675, "learning_rate": 0.0005, "loss": 2.1128, "step": 177610 }, { "epoch": 0.6760655587950946, "grad_norm": 0.11251964420080185, "learning_rate": 0.0005, "loss": 2.1005, "step": 177620 }, { "epoch": 0.6761036212632172, "grad_norm": 0.13177922368049622, "learning_rate": 0.0005, "loss": 2.1211, "step": 177630 }, { "epoch": 0.6761416837313399, "grad_norm": 0.12955419719219208, "learning_rate": 0.0005, "loss": 2.0921, "step": 177640 }, { "epoch": 0.6761797461994625, "grad_norm": 0.12349336594343185, "learning_rate": 0.0005, "loss": 2.1004, "step": 177650 }, { "epoch": 0.6762178086675853, "grad_norm": 0.1187133714556694, "learning_rate": 0.0005, "loss": 2.1217, "step": 177660 }, { "epoch": 0.676255871135708, "grad_norm": 0.11961386352777481, "learning_rate": 0.0005, "loss": 2.1224, "step": 177670 }, { "epoch": 0.6762939336038306, "grad_norm": 0.12203500419855118, "learning_rate": 0.0005, "loss": 2.0934, "step": 177680 }, { "epoch": 0.6763319960719533, "grad_norm": 0.10937876999378204, "learning_rate": 0.0005, "loss": 2.1003, "step": 177690 }, { "epoch": 0.6763700585400759, "grad_norm": 0.12402454763650894, "learning_rate": 0.0005, "loss": 2.1106, "step": 177700 }, { "epoch": 0.6764081210081987, "grad_norm": 0.12096264958381653, "learning_rate": 0.0005, "loss": 2.1309, "step": 177710 }, { "epoch": 0.6764461834763213, "grad_norm": 0.11508809030056, "learning_rate": 0.0005, "loss": 2.0907, "step": 177720 }, { "epoch": 0.676484245944444, "grad_norm": 0.120554119348526, "learning_rate": 0.0005, "loss": 2.1052, "step": 177730 }, { "epoch": 0.6765223084125667, "grad_norm": 0.14930492639541626, "learning_rate": 0.0005, "loss": 2.1002, "step": 177740 }, { "epoch": 0.6765603708806894, "grad_norm": 0.12888970971107483, "learning_rate": 0.0005, "loss": 2.1079, "step": 177750 }, { "epoch": 0.6765984333488121, "grad_norm": 0.11661666631698608, "learning_rate": 0.0005, "loss": 2.1196, "step": 177760 }, { "epoch": 0.6766364958169347, "grad_norm": 0.1314840316772461, "learning_rate": 0.0005, "loss": 2.1023, "step": 177770 }, { "epoch": 0.6766745582850574, "grad_norm": 0.1238323450088501, "learning_rate": 0.0005, "loss": 2.1224, "step": 177780 }, { "epoch": 0.6767126207531802, "grad_norm": 0.12378830462694168, "learning_rate": 0.0005, "loss": 2.1105, "step": 177790 }, { "epoch": 0.6767506832213028, "grad_norm": 0.12708117067813873, "learning_rate": 0.0005, "loss": 2.0985, "step": 177800 }, { "epoch": 0.6767887456894255, "grad_norm": 0.1207418218255043, "learning_rate": 0.0005, "loss": 2.1116, "step": 177810 }, { "epoch": 0.6768268081575481, "grad_norm": 0.12296409159898758, "learning_rate": 0.0005, "loss": 2.1118, "step": 177820 }, { "epoch": 0.6768648706256708, "grad_norm": 0.1191890686750412, "learning_rate": 0.0005, "loss": 2.1176, "step": 177830 }, { "epoch": 0.6769029330937936, "grad_norm": 0.1247776448726654, "learning_rate": 0.0005, "loss": 2.1119, "step": 177840 }, { "epoch": 0.6769409955619162, "grad_norm": 0.11682870239019394, "learning_rate": 0.0005, "loss": 2.1126, "step": 177850 }, { "epoch": 0.6769790580300389, "grad_norm": 0.11881222575902939, "learning_rate": 0.0005, "loss": 2.1139, "step": 177860 }, { "epoch": 0.6770171204981615, "grad_norm": 0.1070871651172638, "learning_rate": 0.0005, "loss": 2.1161, "step": 177870 }, { "epoch": 0.6770551829662843, "grad_norm": 0.12376029789447784, "learning_rate": 0.0005, "loss": 2.1097, "step": 177880 }, { "epoch": 0.677093245434407, "grad_norm": 0.12242919206619263, "learning_rate": 0.0005, "loss": 2.1248, "step": 177890 }, { "epoch": 0.6771313079025296, "grad_norm": 0.12933039665222168, "learning_rate": 0.0005, "loss": 2.1034, "step": 177900 }, { "epoch": 0.6771693703706523, "grad_norm": 0.1156318187713623, "learning_rate": 0.0005, "loss": 2.0935, "step": 177910 }, { "epoch": 0.677207432838775, "grad_norm": 0.11402034759521484, "learning_rate": 0.0005, "loss": 2.1174, "step": 177920 }, { "epoch": 0.6772454953068977, "grad_norm": 0.12201918661594391, "learning_rate": 0.0005, "loss": 2.1048, "step": 177930 }, { "epoch": 0.6772835577750204, "grad_norm": 0.12645022571086884, "learning_rate": 0.0005, "loss": 2.1167, "step": 177940 }, { "epoch": 0.677321620243143, "grad_norm": 0.12442043423652649, "learning_rate": 0.0005, "loss": 2.1127, "step": 177950 }, { "epoch": 0.6773596827112657, "grad_norm": 0.1325632780790329, "learning_rate": 0.0005, "loss": 2.1057, "step": 177960 }, { "epoch": 0.6773977451793884, "grad_norm": 0.1344526708126068, "learning_rate": 0.0005, "loss": 2.1023, "step": 177970 }, { "epoch": 0.6774358076475111, "grad_norm": 0.13039939105510712, "learning_rate": 0.0005, "loss": 2.1105, "step": 177980 }, { "epoch": 0.6774738701156338, "grad_norm": 0.13478504121303558, "learning_rate": 0.0005, "loss": 2.0844, "step": 177990 }, { "epoch": 0.6775119325837564, "grad_norm": 0.11849367618560791, "learning_rate": 0.0005, "loss": 2.1023, "step": 178000 }, { "epoch": 0.6775499950518792, "grad_norm": 0.13681115210056305, "learning_rate": 0.0005, "loss": 2.0949, "step": 178010 }, { "epoch": 0.6775880575200018, "grad_norm": 0.12495207786560059, "learning_rate": 0.0005, "loss": 2.1223, "step": 178020 }, { "epoch": 0.6776261199881245, "grad_norm": 0.11878683418035507, "learning_rate": 0.0005, "loss": 2.096, "step": 178030 }, { "epoch": 0.6776641824562472, "grad_norm": 0.11636865884065628, "learning_rate": 0.0005, "loss": 2.1002, "step": 178040 }, { "epoch": 0.6777022449243699, "grad_norm": 0.11892195045948029, "learning_rate": 0.0005, "loss": 2.1187, "step": 178050 }, { "epoch": 0.6777403073924926, "grad_norm": 0.11692816019058228, "learning_rate": 0.0005, "loss": 2.1041, "step": 178060 }, { "epoch": 0.6777783698606152, "grad_norm": 0.13492970168590546, "learning_rate": 0.0005, "loss": 2.1044, "step": 178070 }, { "epoch": 0.6778164323287379, "grad_norm": 0.13853606581687927, "learning_rate": 0.0005, "loss": 2.1058, "step": 178080 }, { "epoch": 0.6778544947968607, "grad_norm": 0.127889484167099, "learning_rate": 0.0005, "loss": 2.123, "step": 178090 }, { "epoch": 0.6778925572649833, "grad_norm": 0.12552450597286224, "learning_rate": 0.0005, "loss": 2.1087, "step": 178100 }, { "epoch": 0.677930619733106, "grad_norm": 0.13038252294063568, "learning_rate": 0.0005, "loss": 2.1015, "step": 178110 }, { "epoch": 0.6779686822012286, "grad_norm": 0.12474310398101807, "learning_rate": 0.0005, "loss": 2.1147, "step": 178120 }, { "epoch": 0.6780067446693513, "grad_norm": 0.1274314820766449, "learning_rate": 0.0005, "loss": 2.1193, "step": 178130 }, { "epoch": 0.6780448071374741, "grad_norm": 0.12522082030773163, "learning_rate": 0.0005, "loss": 2.1107, "step": 178140 }, { "epoch": 0.6780828696055967, "grad_norm": 0.11199326813220978, "learning_rate": 0.0005, "loss": 2.096, "step": 178150 }, { "epoch": 0.6781209320737194, "grad_norm": 0.11414346843957901, "learning_rate": 0.0005, "loss": 2.1118, "step": 178160 }, { "epoch": 0.678158994541842, "grad_norm": 0.1230161264538765, "learning_rate": 0.0005, "loss": 2.1026, "step": 178170 }, { "epoch": 0.6781970570099648, "grad_norm": 0.11851771920919418, "learning_rate": 0.0005, "loss": 2.1125, "step": 178180 }, { "epoch": 0.6782351194780875, "grad_norm": 0.12614396214485168, "learning_rate": 0.0005, "loss": 2.1067, "step": 178190 }, { "epoch": 0.6782731819462101, "grad_norm": 0.11604657024145126, "learning_rate": 0.0005, "loss": 2.1136, "step": 178200 }, { "epoch": 0.6783112444143328, "grad_norm": 0.11043967306613922, "learning_rate": 0.0005, "loss": 2.0992, "step": 178210 }, { "epoch": 0.6783493068824555, "grad_norm": 0.13001075387001038, "learning_rate": 0.0005, "loss": 2.1187, "step": 178220 }, { "epoch": 0.6783873693505782, "grad_norm": 0.1245742067694664, "learning_rate": 0.0005, "loss": 2.1112, "step": 178230 }, { "epoch": 0.6784254318187009, "grad_norm": 0.1329800933599472, "learning_rate": 0.0005, "loss": 2.1134, "step": 178240 }, { "epoch": 0.6784634942868235, "grad_norm": 0.12265625596046448, "learning_rate": 0.0005, "loss": 2.111, "step": 178250 }, { "epoch": 0.6785015567549462, "grad_norm": 0.1359381377696991, "learning_rate": 0.0005, "loss": 2.1161, "step": 178260 }, { "epoch": 0.6785396192230689, "grad_norm": 0.13058646023273468, "learning_rate": 0.0005, "loss": 2.1189, "step": 178270 }, { "epoch": 0.6785776816911916, "grad_norm": 0.12857820093631744, "learning_rate": 0.0005, "loss": 2.1104, "step": 178280 }, { "epoch": 0.6786157441593142, "grad_norm": 0.12578581273555756, "learning_rate": 0.0005, "loss": 2.1158, "step": 178290 }, { "epoch": 0.6786538066274369, "grad_norm": 0.136888325214386, "learning_rate": 0.0005, "loss": 2.0979, "step": 178300 }, { "epoch": 0.6786918690955597, "grad_norm": 0.12314784526824951, "learning_rate": 0.0005, "loss": 2.1012, "step": 178310 }, { "epoch": 0.6787299315636823, "grad_norm": 0.11737371981143951, "learning_rate": 0.0005, "loss": 2.111, "step": 178320 }, { "epoch": 0.678767994031805, "grad_norm": 0.1219782680273056, "learning_rate": 0.0005, "loss": 2.1122, "step": 178330 }, { "epoch": 0.6788060564999276, "grad_norm": 0.12800832092761993, "learning_rate": 0.0005, "loss": 2.1029, "step": 178340 }, { "epoch": 0.6788441189680504, "grad_norm": 0.12492241710424423, "learning_rate": 0.0005, "loss": 2.1006, "step": 178350 }, { "epoch": 0.6788821814361731, "grad_norm": 0.1212736964225769, "learning_rate": 0.0005, "loss": 2.118, "step": 178360 }, { "epoch": 0.6789202439042957, "grad_norm": 0.12892039120197296, "learning_rate": 0.0005, "loss": 2.1075, "step": 178370 }, { "epoch": 0.6789583063724184, "grad_norm": 0.13535402715206146, "learning_rate": 0.0005, "loss": 2.1064, "step": 178380 }, { "epoch": 0.678996368840541, "grad_norm": 0.12345527857542038, "learning_rate": 0.0005, "loss": 2.1294, "step": 178390 }, { "epoch": 0.6790344313086638, "grad_norm": 0.12075518816709518, "learning_rate": 0.0005, "loss": 2.1115, "step": 178400 }, { "epoch": 0.6790724937767865, "grad_norm": 0.11288055032491684, "learning_rate": 0.0005, "loss": 2.1128, "step": 178410 }, { "epoch": 0.6791105562449091, "grad_norm": 0.12587016820907593, "learning_rate": 0.0005, "loss": 2.1106, "step": 178420 }, { "epoch": 0.6791486187130318, "grad_norm": 0.13511881232261658, "learning_rate": 0.0005, "loss": 2.1034, "step": 178430 }, { "epoch": 0.6791866811811546, "grad_norm": 0.12678687274456024, "learning_rate": 0.0005, "loss": 2.1221, "step": 178440 }, { "epoch": 0.6792247436492772, "grad_norm": 0.11422378569841385, "learning_rate": 0.0005, "loss": 2.0985, "step": 178450 }, { "epoch": 0.6792628061173999, "grad_norm": 0.1303092986345291, "learning_rate": 0.0005, "loss": 2.0871, "step": 178460 }, { "epoch": 0.6793008685855225, "grad_norm": 0.11457906663417816, "learning_rate": 0.0005, "loss": 2.1181, "step": 178470 }, { "epoch": 0.6793389310536453, "grad_norm": 0.12561039626598358, "learning_rate": 0.0005, "loss": 2.1018, "step": 178480 }, { "epoch": 0.679376993521768, "grad_norm": 0.11544699966907501, "learning_rate": 0.0005, "loss": 2.1108, "step": 178490 }, { "epoch": 0.6794150559898906, "grad_norm": 0.13715773820877075, "learning_rate": 0.0005, "loss": 2.1211, "step": 178500 }, { "epoch": 0.6794531184580133, "grad_norm": 0.1227487102150917, "learning_rate": 0.0005, "loss": 2.1075, "step": 178510 }, { "epoch": 0.679491180926136, "grad_norm": 0.11637184768915176, "learning_rate": 0.0005, "loss": 2.125, "step": 178520 }, { "epoch": 0.6795292433942587, "grad_norm": 0.11330291628837585, "learning_rate": 0.0005, "loss": 2.1258, "step": 178530 }, { "epoch": 0.6795673058623813, "grad_norm": 0.12525829672813416, "learning_rate": 0.0005, "loss": 2.1087, "step": 178540 }, { "epoch": 0.679605368330504, "grad_norm": 0.11803308129310608, "learning_rate": 0.0005, "loss": 2.1117, "step": 178550 }, { "epoch": 0.6796434307986267, "grad_norm": 0.12072555720806122, "learning_rate": 0.0005, "loss": 2.1112, "step": 178560 }, { "epoch": 0.6796814932667494, "grad_norm": 0.12317853420972824, "learning_rate": 0.0005, "loss": 2.1206, "step": 178570 }, { "epoch": 0.6797195557348721, "grad_norm": 0.1288745105266571, "learning_rate": 0.0005, "loss": 2.1025, "step": 178580 }, { "epoch": 0.6797576182029947, "grad_norm": 0.13706010580062866, "learning_rate": 0.0005, "loss": 2.1083, "step": 178590 }, { "epoch": 0.6797956806711174, "grad_norm": 0.13190880417823792, "learning_rate": 0.0005, "loss": 2.1065, "step": 178600 }, { "epoch": 0.6798337431392402, "grad_norm": 0.13624754548072815, "learning_rate": 0.0005, "loss": 2.1156, "step": 178610 }, { "epoch": 0.6798718056073628, "grad_norm": 0.14656874537467957, "learning_rate": 0.0005, "loss": 2.1155, "step": 178620 }, { "epoch": 0.6799098680754855, "grad_norm": 0.11314431577920914, "learning_rate": 0.0005, "loss": 2.1036, "step": 178630 }, { "epoch": 0.6799479305436081, "grad_norm": 0.11898855865001678, "learning_rate": 0.0005, "loss": 2.106, "step": 178640 }, { "epoch": 0.6799859930117309, "grad_norm": 0.11646313220262527, "learning_rate": 0.0005, "loss": 2.1079, "step": 178650 }, { "epoch": 0.6800240554798536, "grad_norm": 0.12358859926462173, "learning_rate": 0.0005, "loss": 2.1056, "step": 178660 }, { "epoch": 0.6800621179479762, "grad_norm": 0.12150691449642181, "learning_rate": 0.0005, "loss": 2.1195, "step": 178670 }, { "epoch": 0.6801001804160989, "grad_norm": 0.15535567700862885, "learning_rate": 0.0005, "loss": 2.1095, "step": 178680 }, { "epoch": 0.6801382428842215, "grad_norm": 0.13010378181934357, "learning_rate": 0.0005, "loss": 2.0994, "step": 178690 }, { "epoch": 0.6801763053523443, "grad_norm": 0.1207583099603653, "learning_rate": 0.0005, "loss": 2.1066, "step": 178700 }, { "epoch": 0.680214367820467, "grad_norm": 0.12940700352191925, "learning_rate": 0.0005, "loss": 2.094, "step": 178710 }, { "epoch": 0.6802524302885896, "grad_norm": 0.11882823705673218, "learning_rate": 0.0005, "loss": 2.1092, "step": 178720 }, { "epoch": 0.6802904927567123, "grad_norm": 0.12433478981256485, "learning_rate": 0.0005, "loss": 2.1134, "step": 178730 }, { "epoch": 0.680328555224835, "grad_norm": 0.12496747821569443, "learning_rate": 0.0005, "loss": 2.0939, "step": 178740 }, { "epoch": 0.6803666176929577, "grad_norm": 0.12532317638397217, "learning_rate": 0.0005, "loss": 2.0965, "step": 178750 }, { "epoch": 0.6804046801610804, "grad_norm": 0.11943234503269196, "learning_rate": 0.0005, "loss": 2.0914, "step": 178760 }, { "epoch": 0.680442742629203, "grad_norm": 0.1301838904619217, "learning_rate": 0.0005, "loss": 2.0988, "step": 178770 }, { "epoch": 0.6804808050973258, "grad_norm": 0.11897633224725723, "learning_rate": 0.0005, "loss": 2.1044, "step": 178780 }, { "epoch": 0.6805188675654484, "grad_norm": 0.1269768923521042, "learning_rate": 0.0005, "loss": 2.106, "step": 178790 }, { "epoch": 0.6805569300335711, "grad_norm": 0.12773433327674866, "learning_rate": 0.0005, "loss": 2.0983, "step": 178800 }, { "epoch": 0.6805949925016938, "grad_norm": 0.1303984671831131, "learning_rate": 0.0005, "loss": 2.1164, "step": 178810 }, { "epoch": 0.6806330549698164, "grad_norm": 0.11902695149183273, "learning_rate": 0.0005, "loss": 2.103, "step": 178820 }, { "epoch": 0.6806711174379392, "grad_norm": 0.12053973972797394, "learning_rate": 0.0005, "loss": 2.1115, "step": 178830 }, { "epoch": 0.6807091799060618, "grad_norm": 0.12673945724964142, "learning_rate": 0.0005, "loss": 2.1114, "step": 178840 }, { "epoch": 0.6807472423741845, "grad_norm": 0.127527117729187, "learning_rate": 0.0005, "loss": 2.1099, "step": 178850 }, { "epoch": 0.6807853048423071, "grad_norm": 0.12394998222589493, "learning_rate": 0.0005, "loss": 2.1218, "step": 178860 }, { "epoch": 0.6808233673104299, "grad_norm": 0.1310361623764038, "learning_rate": 0.0005, "loss": 2.1168, "step": 178870 }, { "epoch": 0.6808614297785526, "grad_norm": 0.12271669507026672, "learning_rate": 0.0005, "loss": 2.1069, "step": 178880 }, { "epoch": 0.6808994922466752, "grad_norm": 0.11129756271839142, "learning_rate": 0.0005, "loss": 2.1129, "step": 178890 }, { "epoch": 0.6809375547147979, "grad_norm": 0.12341821938753128, "learning_rate": 0.0005, "loss": 2.1191, "step": 178900 }, { "epoch": 0.6809756171829207, "grad_norm": 0.11532428115606308, "learning_rate": 0.0005, "loss": 2.1058, "step": 178910 }, { "epoch": 0.6810136796510433, "grad_norm": 0.12454450130462646, "learning_rate": 0.0005, "loss": 2.1098, "step": 178920 }, { "epoch": 0.681051742119166, "grad_norm": 0.1244184821844101, "learning_rate": 0.0005, "loss": 2.1095, "step": 178930 }, { "epoch": 0.6810898045872886, "grad_norm": 0.11621390283107758, "learning_rate": 0.0005, "loss": 2.1092, "step": 178940 }, { "epoch": 0.6811278670554114, "grad_norm": 0.1157296672463417, "learning_rate": 0.0005, "loss": 2.1221, "step": 178950 }, { "epoch": 0.681165929523534, "grad_norm": 0.11904320865869522, "learning_rate": 0.0005, "loss": 2.1086, "step": 178960 }, { "epoch": 0.6812039919916567, "grad_norm": 0.1200244352221489, "learning_rate": 0.0005, "loss": 2.1176, "step": 178970 }, { "epoch": 0.6812420544597794, "grad_norm": 0.1374550759792328, "learning_rate": 0.0005, "loss": 2.1177, "step": 178980 }, { "epoch": 0.681280116927902, "grad_norm": 0.1327461302280426, "learning_rate": 0.0005, "loss": 2.1097, "step": 178990 }, { "epoch": 0.6813181793960248, "grad_norm": 0.12066030502319336, "learning_rate": 0.0005, "loss": 2.1116, "step": 179000 }, { "epoch": 0.6813562418641474, "grad_norm": 0.11714760959148407, "learning_rate": 0.0005, "loss": 2.1098, "step": 179010 }, { "epoch": 0.6813943043322701, "grad_norm": 0.1339813768863678, "learning_rate": 0.0005, "loss": 2.1064, "step": 179020 }, { "epoch": 0.6814323668003928, "grad_norm": 0.1168723851442337, "learning_rate": 0.0005, "loss": 2.1168, "step": 179030 }, { "epoch": 0.6814704292685155, "grad_norm": 0.12488270550966263, "learning_rate": 0.0005, "loss": 2.1094, "step": 179040 }, { "epoch": 0.6815084917366382, "grad_norm": 0.1275583803653717, "learning_rate": 0.0005, "loss": 2.1298, "step": 179050 }, { "epoch": 0.6815465542047608, "grad_norm": 0.11426876485347748, "learning_rate": 0.0005, "loss": 2.0997, "step": 179060 }, { "epoch": 0.6815846166728835, "grad_norm": 0.12208835780620575, "learning_rate": 0.0005, "loss": 2.105, "step": 179070 }, { "epoch": 0.6816226791410063, "grad_norm": 0.12177817523479462, "learning_rate": 0.0005, "loss": 2.1075, "step": 179080 }, { "epoch": 0.6816607416091289, "grad_norm": 0.1255825310945511, "learning_rate": 0.0005, "loss": 2.0969, "step": 179090 }, { "epoch": 0.6816988040772516, "grad_norm": 0.14138972759246826, "learning_rate": 0.0005, "loss": 2.1057, "step": 179100 }, { "epoch": 0.6817368665453742, "grad_norm": 0.12741991877555847, "learning_rate": 0.0005, "loss": 2.1061, "step": 179110 }, { "epoch": 0.6817749290134969, "grad_norm": 0.1334943175315857, "learning_rate": 0.0005, "loss": 2.1199, "step": 179120 }, { "epoch": 0.6818129914816197, "grad_norm": 0.12393660098314285, "learning_rate": 0.0005, "loss": 2.1206, "step": 179130 }, { "epoch": 0.6818510539497423, "grad_norm": 0.11239521205425262, "learning_rate": 0.0005, "loss": 2.1047, "step": 179140 }, { "epoch": 0.681889116417865, "grad_norm": 0.11759033799171448, "learning_rate": 0.0005, "loss": 2.1112, "step": 179150 }, { "epoch": 0.6819271788859876, "grad_norm": 0.11990151554346085, "learning_rate": 0.0005, "loss": 2.1117, "step": 179160 }, { "epoch": 0.6819652413541104, "grad_norm": 0.11784869432449341, "learning_rate": 0.0005, "loss": 2.1021, "step": 179170 }, { "epoch": 0.6820033038222331, "grad_norm": 0.12301430851221085, "learning_rate": 0.0005, "loss": 2.1081, "step": 179180 }, { "epoch": 0.6820413662903557, "grad_norm": 0.12320394814014435, "learning_rate": 0.0005, "loss": 2.1119, "step": 179190 }, { "epoch": 0.6820794287584784, "grad_norm": 0.13769547641277313, "learning_rate": 0.0005, "loss": 2.1037, "step": 179200 }, { "epoch": 0.6821174912266011, "grad_norm": 0.13465788960456848, "learning_rate": 0.0005, "loss": 2.1142, "step": 179210 }, { "epoch": 0.6821555536947238, "grad_norm": 0.1108383983373642, "learning_rate": 0.0005, "loss": 2.1139, "step": 179220 }, { "epoch": 0.6821936161628465, "grad_norm": 0.12412822991609573, "learning_rate": 0.0005, "loss": 2.0999, "step": 179230 }, { "epoch": 0.6822316786309691, "grad_norm": 0.12183883041143417, "learning_rate": 0.0005, "loss": 2.1131, "step": 179240 }, { "epoch": 0.6822697410990918, "grad_norm": 0.12129288166761398, "learning_rate": 0.0005, "loss": 2.1122, "step": 179250 }, { "epoch": 0.6823078035672145, "grad_norm": 0.12597280740737915, "learning_rate": 0.0005, "loss": 2.1156, "step": 179260 }, { "epoch": 0.6823458660353372, "grad_norm": 0.11562073230743408, "learning_rate": 0.0005, "loss": 2.1004, "step": 179270 }, { "epoch": 0.6823839285034599, "grad_norm": 0.1106618121266365, "learning_rate": 0.0005, "loss": 2.12, "step": 179280 }, { "epoch": 0.6824219909715825, "grad_norm": 0.128122478723526, "learning_rate": 0.0005, "loss": 2.1102, "step": 179290 }, { "epoch": 0.6824600534397053, "grad_norm": 0.14943507313728333, "learning_rate": 0.0005, "loss": 2.1203, "step": 179300 }, { "epoch": 0.6824981159078279, "grad_norm": 0.11820433288812637, "learning_rate": 0.0005, "loss": 2.095, "step": 179310 }, { "epoch": 0.6825361783759506, "grad_norm": 0.11343543976545334, "learning_rate": 0.0005, "loss": 2.1087, "step": 179320 }, { "epoch": 0.6825742408440733, "grad_norm": 0.9048527479171753, "learning_rate": 0.0005, "loss": 2.0994, "step": 179330 }, { "epoch": 0.682612303312196, "grad_norm": 0.12243762612342834, "learning_rate": 0.0005, "loss": 2.103, "step": 179340 }, { "epoch": 0.6826503657803187, "grad_norm": 0.11322756856679916, "learning_rate": 0.0005, "loss": 2.1181, "step": 179350 }, { "epoch": 0.6826884282484413, "grad_norm": 0.11842131614685059, "learning_rate": 0.0005, "loss": 2.1107, "step": 179360 }, { "epoch": 0.682726490716564, "grad_norm": 0.11658118665218353, "learning_rate": 0.0005, "loss": 2.1124, "step": 179370 }, { "epoch": 0.6827645531846868, "grad_norm": 0.11913055926561356, "learning_rate": 0.0005, "loss": 2.0982, "step": 179380 }, { "epoch": 0.6828026156528094, "grad_norm": 0.11381657421588898, "learning_rate": 0.0005, "loss": 2.1185, "step": 179390 }, { "epoch": 0.6828406781209321, "grad_norm": 0.11231932044029236, "learning_rate": 0.0005, "loss": 2.1183, "step": 179400 }, { "epoch": 0.6828787405890547, "grad_norm": 0.12924441695213318, "learning_rate": 0.0005, "loss": 2.109, "step": 179410 }, { "epoch": 0.6829168030571774, "grad_norm": 0.11670733988285065, "learning_rate": 0.0005, "loss": 2.0953, "step": 179420 }, { "epoch": 0.6829548655253002, "grad_norm": 0.11556648463010788, "learning_rate": 0.0005, "loss": 2.0968, "step": 179430 }, { "epoch": 0.6829929279934228, "grad_norm": 0.1284991353750229, "learning_rate": 0.0005, "loss": 2.1145, "step": 179440 }, { "epoch": 0.6830309904615455, "grad_norm": 0.12964291870594025, "learning_rate": 0.0005, "loss": 2.1032, "step": 179450 }, { "epoch": 0.6830690529296681, "grad_norm": 0.1216454803943634, "learning_rate": 0.0005, "loss": 2.1056, "step": 179460 }, { "epoch": 0.6831071153977909, "grad_norm": 0.14056138694286346, "learning_rate": 0.0005, "loss": 2.0957, "step": 179470 }, { "epoch": 0.6831451778659136, "grad_norm": 0.1397508829832077, "learning_rate": 0.0005, "loss": 2.0995, "step": 179480 }, { "epoch": 0.6831832403340362, "grad_norm": 0.12131225317716599, "learning_rate": 0.0005, "loss": 2.1145, "step": 179490 }, { "epoch": 0.6832213028021589, "grad_norm": 0.11590532958507538, "learning_rate": 0.0005, "loss": 2.1193, "step": 179500 }, { "epoch": 0.6832593652702816, "grad_norm": 0.12147286534309387, "learning_rate": 0.0005, "loss": 2.095, "step": 179510 }, { "epoch": 0.6832974277384043, "grad_norm": 0.1389768123626709, "learning_rate": 0.0005, "loss": 2.1009, "step": 179520 }, { "epoch": 0.683335490206527, "grad_norm": 0.11397550255060196, "learning_rate": 0.0005, "loss": 2.1015, "step": 179530 }, { "epoch": 0.6833735526746496, "grad_norm": 0.1191280260682106, "learning_rate": 0.0005, "loss": 2.1074, "step": 179540 }, { "epoch": 0.6834116151427723, "grad_norm": 0.1180817037820816, "learning_rate": 0.0005, "loss": 2.1032, "step": 179550 }, { "epoch": 0.683449677610895, "grad_norm": 0.12457938492298126, "learning_rate": 0.0005, "loss": 2.1069, "step": 179560 }, { "epoch": 0.6834877400790177, "grad_norm": 0.1202085092663765, "learning_rate": 0.0005, "loss": 2.1066, "step": 179570 }, { "epoch": 0.6835258025471403, "grad_norm": 0.1307467371225357, "learning_rate": 0.0005, "loss": 2.1002, "step": 179580 }, { "epoch": 0.683563865015263, "grad_norm": 0.13553386926651, "learning_rate": 0.0005, "loss": 2.1117, "step": 179590 }, { "epoch": 0.6836019274833858, "grad_norm": 0.131942480802536, "learning_rate": 0.0005, "loss": 2.1139, "step": 179600 }, { "epoch": 0.6836399899515084, "grad_norm": 0.11515048891305923, "learning_rate": 0.0005, "loss": 2.1081, "step": 179610 }, { "epoch": 0.6836780524196311, "grad_norm": 0.13275325298309326, "learning_rate": 0.0005, "loss": 2.1062, "step": 179620 }, { "epoch": 0.6837161148877537, "grad_norm": 0.11690928786993027, "learning_rate": 0.0005, "loss": 2.0996, "step": 179630 }, { "epoch": 0.6837541773558765, "grad_norm": 0.14062003791332245, "learning_rate": 0.0005, "loss": 2.1018, "step": 179640 }, { "epoch": 0.6837922398239992, "grad_norm": 0.11849799007177353, "learning_rate": 0.0005, "loss": 2.124, "step": 179650 }, { "epoch": 0.6838303022921218, "grad_norm": 0.12318059802055359, "learning_rate": 0.0005, "loss": 2.0956, "step": 179660 }, { "epoch": 0.6838683647602445, "grad_norm": 0.12393590807914734, "learning_rate": 0.0005, "loss": 2.1034, "step": 179670 }, { "epoch": 0.6839064272283671, "grad_norm": 0.12598690390586853, "learning_rate": 0.0005, "loss": 2.1162, "step": 179680 }, { "epoch": 0.6839444896964899, "grad_norm": 0.11892339587211609, "learning_rate": 0.0005, "loss": 2.1084, "step": 179690 }, { "epoch": 0.6839825521646126, "grad_norm": 0.12454485148191452, "learning_rate": 0.0005, "loss": 2.1054, "step": 179700 }, { "epoch": 0.6840206146327352, "grad_norm": 0.13515712320804596, "learning_rate": 0.0005, "loss": 2.1256, "step": 179710 }, { "epoch": 0.6840586771008579, "grad_norm": 0.12502405047416687, "learning_rate": 0.0005, "loss": 2.0984, "step": 179720 }, { "epoch": 0.6840967395689806, "grad_norm": 0.11308914422988892, "learning_rate": 0.0005, "loss": 2.0922, "step": 179730 }, { "epoch": 0.6841348020371033, "grad_norm": 0.13700667023658752, "learning_rate": 0.0005, "loss": 2.1071, "step": 179740 }, { "epoch": 0.684172864505226, "grad_norm": 0.13408491015434265, "learning_rate": 0.0005, "loss": 2.1106, "step": 179750 }, { "epoch": 0.6842109269733486, "grad_norm": 0.13319963216781616, "learning_rate": 0.0005, "loss": 2.0949, "step": 179760 }, { "epoch": 0.6842489894414714, "grad_norm": 0.12456398457288742, "learning_rate": 0.0005, "loss": 2.113, "step": 179770 }, { "epoch": 0.684287051909594, "grad_norm": 0.13097211718559265, "learning_rate": 0.0005, "loss": 2.0982, "step": 179780 }, { "epoch": 0.6843251143777167, "grad_norm": 0.11321916431188583, "learning_rate": 0.0005, "loss": 2.1135, "step": 179790 }, { "epoch": 0.6843631768458394, "grad_norm": 0.1175604984164238, "learning_rate": 0.0005, "loss": 2.0939, "step": 179800 }, { "epoch": 0.6844012393139621, "grad_norm": 0.12084577232599258, "learning_rate": 0.0005, "loss": 2.1025, "step": 179810 }, { "epoch": 0.6844393017820848, "grad_norm": 0.11561520397663116, "learning_rate": 0.0005, "loss": 2.1145, "step": 179820 }, { "epoch": 0.6844773642502074, "grad_norm": 0.11477413773536682, "learning_rate": 0.0005, "loss": 2.1214, "step": 179830 }, { "epoch": 0.6845154267183301, "grad_norm": 0.1235690787434578, "learning_rate": 0.0005, "loss": 2.1041, "step": 179840 }, { "epoch": 0.6845534891864528, "grad_norm": 0.12032691389322281, "learning_rate": 0.0005, "loss": 2.1281, "step": 179850 }, { "epoch": 0.6845915516545755, "grad_norm": 0.12753982841968536, "learning_rate": 0.0005, "loss": 2.1091, "step": 179860 }, { "epoch": 0.6846296141226982, "grad_norm": 0.1180667132139206, "learning_rate": 0.0005, "loss": 2.1099, "step": 179870 }, { "epoch": 0.6846676765908208, "grad_norm": 0.12818437814712524, "learning_rate": 0.0005, "loss": 2.0986, "step": 179880 }, { "epoch": 0.6847057390589435, "grad_norm": 0.12024499475955963, "learning_rate": 0.0005, "loss": 2.1081, "step": 179890 }, { "epoch": 0.6847438015270663, "grad_norm": 0.11732951551675797, "learning_rate": 0.0005, "loss": 2.1164, "step": 179900 }, { "epoch": 0.6847818639951889, "grad_norm": 0.1257062554359436, "learning_rate": 0.0005, "loss": 2.1191, "step": 179910 }, { "epoch": 0.6848199264633116, "grad_norm": 0.13282521069049835, "learning_rate": 0.0005, "loss": 2.1063, "step": 179920 }, { "epoch": 0.6848579889314342, "grad_norm": 0.12256745249032974, "learning_rate": 0.0005, "loss": 2.1051, "step": 179930 }, { "epoch": 0.684896051399557, "grad_norm": 0.1206756979227066, "learning_rate": 0.0005, "loss": 2.1081, "step": 179940 }, { "epoch": 0.6849341138676797, "grad_norm": 0.12020318955183029, "learning_rate": 0.0005, "loss": 2.0951, "step": 179950 }, { "epoch": 0.6849721763358023, "grad_norm": 0.13199806213378906, "learning_rate": 0.0005, "loss": 2.1162, "step": 179960 }, { "epoch": 0.685010238803925, "grad_norm": 0.12754608690738678, "learning_rate": 0.0005, "loss": 2.1056, "step": 179970 }, { "epoch": 0.6850483012720476, "grad_norm": 0.1329394429922104, "learning_rate": 0.0005, "loss": 2.0972, "step": 179980 }, { "epoch": 0.6850863637401704, "grad_norm": 0.1296091377735138, "learning_rate": 0.0005, "loss": 2.129, "step": 179990 }, { "epoch": 0.6851244262082931, "grad_norm": 0.1283130943775177, "learning_rate": 0.0005, "loss": 2.1058, "step": 180000 }, { "epoch": 0.6851624886764157, "grad_norm": 0.12421011179685593, "learning_rate": 0.0005, "loss": 2.0999, "step": 180010 }, { "epoch": 0.6852005511445384, "grad_norm": 0.13211941719055176, "learning_rate": 0.0005, "loss": 2.1049, "step": 180020 }, { "epoch": 0.6852386136126611, "grad_norm": 0.11720091104507446, "learning_rate": 0.0005, "loss": 2.1029, "step": 180030 }, { "epoch": 0.6852766760807838, "grad_norm": 0.1294281780719757, "learning_rate": 0.0005, "loss": 2.105, "step": 180040 }, { "epoch": 0.6853147385489065, "grad_norm": 0.12521712481975555, "learning_rate": 0.0005, "loss": 2.0966, "step": 180050 }, { "epoch": 0.6853528010170291, "grad_norm": 0.12627719342708588, "learning_rate": 0.0005, "loss": 2.1183, "step": 180060 }, { "epoch": 0.6853908634851519, "grad_norm": 0.13020716607570648, "learning_rate": 0.0005, "loss": 2.1282, "step": 180070 }, { "epoch": 0.6854289259532745, "grad_norm": 0.11846989393234253, "learning_rate": 0.0005, "loss": 2.114, "step": 180080 }, { "epoch": 0.6854669884213972, "grad_norm": 0.11798037588596344, "learning_rate": 0.0005, "loss": 2.12, "step": 180090 }, { "epoch": 0.6855050508895199, "grad_norm": 0.12672331929206848, "learning_rate": 0.0005, "loss": 2.1089, "step": 180100 }, { "epoch": 0.6855431133576425, "grad_norm": 0.12962199747562408, "learning_rate": 0.0005, "loss": 2.1127, "step": 180110 }, { "epoch": 0.6855811758257653, "grad_norm": 0.11662869900465012, "learning_rate": 0.0005, "loss": 2.1097, "step": 180120 }, { "epoch": 0.6856192382938879, "grad_norm": 0.1326063722372055, "learning_rate": 0.0005, "loss": 2.1072, "step": 180130 }, { "epoch": 0.6856573007620106, "grad_norm": 0.12435778975486755, "learning_rate": 0.0005, "loss": 2.1256, "step": 180140 }, { "epoch": 0.6856953632301332, "grad_norm": 0.12299150973558426, "learning_rate": 0.0005, "loss": 2.1132, "step": 180150 }, { "epoch": 0.685733425698256, "grad_norm": 0.13320887088775635, "learning_rate": 0.0005, "loss": 2.0997, "step": 180160 }, { "epoch": 0.6857714881663787, "grad_norm": 0.13181836903095245, "learning_rate": 0.0005, "loss": 2.1062, "step": 180170 }, { "epoch": 0.6858095506345013, "grad_norm": 0.12428215146064758, "learning_rate": 0.0005, "loss": 2.1069, "step": 180180 }, { "epoch": 0.685847613102624, "grad_norm": 0.12043698132038116, "learning_rate": 0.0005, "loss": 2.0954, "step": 180190 }, { "epoch": 0.6858856755707468, "grad_norm": 0.12302254885435104, "learning_rate": 0.0005, "loss": 2.1171, "step": 180200 }, { "epoch": 0.6859237380388694, "grad_norm": 0.24158915877342224, "learning_rate": 0.0005, "loss": 2.1121, "step": 180210 }, { "epoch": 0.6859618005069921, "grad_norm": 0.12980495393276215, "learning_rate": 0.0005, "loss": 2.1029, "step": 180220 }, { "epoch": 0.6859998629751147, "grad_norm": 0.12906219065189362, "learning_rate": 0.0005, "loss": 2.1263, "step": 180230 }, { "epoch": 0.6860379254432375, "grad_norm": 0.1250433474779129, "learning_rate": 0.0005, "loss": 2.116, "step": 180240 }, { "epoch": 0.6860759879113602, "grad_norm": 0.13827890157699585, "learning_rate": 0.0005, "loss": 2.1056, "step": 180250 }, { "epoch": 0.6861140503794828, "grad_norm": 0.12229827791452408, "learning_rate": 0.0005, "loss": 2.114, "step": 180260 }, { "epoch": 0.6861521128476055, "grad_norm": 0.11730405688285828, "learning_rate": 0.0005, "loss": 2.1145, "step": 180270 }, { "epoch": 0.6861901753157281, "grad_norm": 0.13208967447280884, "learning_rate": 0.0005, "loss": 2.1048, "step": 180280 }, { "epoch": 0.6862282377838509, "grad_norm": 0.12246529757976532, "learning_rate": 0.0005, "loss": 2.1245, "step": 180290 }, { "epoch": 0.6862663002519735, "grad_norm": 0.12544351816177368, "learning_rate": 0.0005, "loss": 2.1054, "step": 180300 }, { "epoch": 0.6863043627200962, "grad_norm": 0.1244068592786789, "learning_rate": 0.0005, "loss": 2.1137, "step": 180310 }, { "epoch": 0.6863424251882189, "grad_norm": 0.12453640252351761, "learning_rate": 0.0005, "loss": 2.1224, "step": 180320 }, { "epoch": 0.6863804876563416, "grad_norm": 0.11731000244617462, "learning_rate": 0.0005, "loss": 2.1091, "step": 180330 }, { "epoch": 0.6864185501244643, "grad_norm": 0.12416515499353409, "learning_rate": 0.0005, "loss": 2.1018, "step": 180340 }, { "epoch": 0.686456612592587, "grad_norm": 0.13353504240512848, "learning_rate": 0.0005, "loss": 2.1043, "step": 180350 }, { "epoch": 0.6864946750607096, "grad_norm": 0.11940930038690567, "learning_rate": 0.0005, "loss": 2.0994, "step": 180360 }, { "epoch": 0.6865327375288324, "grad_norm": 0.2674172818660736, "learning_rate": 0.0005, "loss": 2.11, "step": 180370 }, { "epoch": 0.686570799996955, "grad_norm": 0.1152404174208641, "learning_rate": 0.0005, "loss": 2.1082, "step": 180380 }, { "epoch": 0.6866088624650777, "grad_norm": 0.15712182223796844, "learning_rate": 0.0005, "loss": 2.1193, "step": 180390 }, { "epoch": 0.6866469249332003, "grad_norm": 0.13125132024288177, "learning_rate": 0.0005, "loss": 2.1054, "step": 180400 }, { "epoch": 0.686684987401323, "grad_norm": 0.12223441153764725, "learning_rate": 0.0005, "loss": 2.113, "step": 180410 }, { "epoch": 0.6867230498694458, "grad_norm": 0.12567825615406036, "learning_rate": 0.0005, "loss": 2.1111, "step": 180420 }, { "epoch": 0.6867611123375684, "grad_norm": 0.11764916777610779, "learning_rate": 0.0005, "loss": 2.1091, "step": 180430 }, { "epoch": 0.6867991748056911, "grad_norm": 0.1293945163488388, "learning_rate": 0.0005, "loss": 2.1001, "step": 180440 }, { "epoch": 0.6868372372738137, "grad_norm": 0.1217617318034172, "learning_rate": 0.0005, "loss": 2.1029, "step": 180450 }, { "epoch": 0.6868752997419365, "grad_norm": 0.12653307616710663, "learning_rate": 0.0005, "loss": 2.1002, "step": 180460 }, { "epoch": 0.6869133622100592, "grad_norm": 0.13232189416885376, "learning_rate": 0.0005, "loss": 2.1271, "step": 180470 }, { "epoch": 0.6869514246781818, "grad_norm": 0.12960053980350494, "learning_rate": 0.0005, "loss": 2.1117, "step": 180480 }, { "epoch": 0.6869894871463045, "grad_norm": 0.12225568294525146, "learning_rate": 0.0005, "loss": 2.1117, "step": 180490 }, { "epoch": 0.6870275496144272, "grad_norm": 0.1188991367816925, "learning_rate": 0.0005, "loss": 2.0998, "step": 180500 }, { "epoch": 0.6870656120825499, "grad_norm": 0.12143424898386002, "learning_rate": 0.0005, "loss": 2.1162, "step": 180510 }, { "epoch": 0.6871036745506726, "grad_norm": 0.13485486805438995, "learning_rate": 0.0005, "loss": 2.1056, "step": 180520 }, { "epoch": 0.6871417370187952, "grad_norm": 0.12453248351812363, "learning_rate": 0.0005, "loss": 2.1204, "step": 180530 }, { "epoch": 0.6871797994869179, "grad_norm": 0.13146209716796875, "learning_rate": 0.0005, "loss": 2.1048, "step": 180540 }, { "epoch": 0.6872178619550406, "grad_norm": 0.1289307028055191, "learning_rate": 0.0005, "loss": 2.0844, "step": 180550 }, { "epoch": 0.6872559244231633, "grad_norm": 0.1286131888628006, "learning_rate": 0.0005, "loss": 2.1214, "step": 180560 }, { "epoch": 0.687293986891286, "grad_norm": 0.29164138436317444, "learning_rate": 0.0005, "loss": 2.1029, "step": 180570 }, { "epoch": 0.6873320493594086, "grad_norm": 0.13932517170906067, "learning_rate": 0.0005, "loss": 2.1044, "step": 180580 }, { "epoch": 0.6873701118275314, "grad_norm": 0.12999944388866425, "learning_rate": 0.0005, "loss": 2.0993, "step": 180590 }, { "epoch": 0.687408174295654, "grad_norm": 0.12977859377861023, "learning_rate": 0.0005, "loss": 2.1091, "step": 180600 }, { "epoch": 0.6874462367637767, "grad_norm": 0.12538054585456848, "learning_rate": 0.0005, "loss": 2.0938, "step": 180610 }, { "epoch": 0.6874842992318994, "grad_norm": 0.1271113157272339, "learning_rate": 0.0005, "loss": 2.106, "step": 180620 }, { "epoch": 0.6875223617000221, "grad_norm": 0.11441612988710403, "learning_rate": 0.0005, "loss": 2.1115, "step": 180630 }, { "epoch": 0.6875604241681448, "grad_norm": 0.1191798597574234, "learning_rate": 0.0005, "loss": 2.1093, "step": 180640 }, { "epoch": 0.6875984866362674, "grad_norm": 0.12940026819705963, "learning_rate": 0.0005, "loss": 2.0984, "step": 180650 }, { "epoch": 0.6876365491043901, "grad_norm": 0.12178556621074677, "learning_rate": 0.0005, "loss": 2.1066, "step": 180660 }, { "epoch": 0.6876746115725129, "grad_norm": 0.1176435723900795, "learning_rate": 0.0005, "loss": 2.1078, "step": 180670 }, { "epoch": 0.6877126740406355, "grad_norm": 0.12224700301885605, "learning_rate": 0.0005, "loss": 2.1067, "step": 180680 }, { "epoch": 0.6877507365087582, "grad_norm": 0.11887124925851822, "learning_rate": 0.0005, "loss": 2.0955, "step": 180690 }, { "epoch": 0.6877887989768808, "grad_norm": 0.13752269744873047, "learning_rate": 0.0005, "loss": 2.1162, "step": 180700 }, { "epoch": 0.6878268614450035, "grad_norm": 0.11800897121429443, "learning_rate": 0.0005, "loss": 2.1004, "step": 180710 }, { "epoch": 0.6878649239131263, "grad_norm": 0.12719684839248657, "learning_rate": 0.0005, "loss": 2.1219, "step": 180720 }, { "epoch": 0.6879029863812489, "grad_norm": 0.1225336343050003, "learning_rate": 0.0005, "loss": 2.1092, "step": 180730 }, { "epoch": 0.6879410488493716, "grad_norm": 0.11735167354345322, "learning_rate": 0.0005, "loss": 2.0984, "step": 180740 }, { "epoch": 0.6879791113174942, "grad_norm": 0.1273951232433319, "learning_rate": 0.0005, "loss": 2.1317, "step": 180750 }, { "epoch": 0.688017173785617, "grad_norm": 0.1332283616065979, "learning_rate": 0.0005, "loss": 2.1131, "step": 180760 }, { "epoch": 0.6880552362537397, "grad_norm": 0.13235154747962952, "learning_rate": 0.0005, "loss": 2.1149, "step": 180770 }, { "epoch": 0.6880932987218623, "grad_norm": 0.12404754012823105, "learning_rate": 0.0005, "loss": 2.1028, "step": 180780 }, { "epoch": 0.688131361189985, "grad_norm": 0.12362740188837051, "learning_rate": 0.0005, "loss": 2.1173, "step": 180790 }, { "epoch": 0.6881694236581077, "grad_norm": 0.1286434531211853, "learning_rate": 0.0005, "loss": 2.1038, "step": 180800 }, { "epoch": 0.6882074861262304, "grad_norm": 0.12793101370334625, "learning_rate": 0.0005, "loss": 2.1154, "step": 180810 }, { "epoch": 0.688245548594353, "grad_norm": 0.11994019150733948, "learning_rate": 0.0005, "loss": 2.1033, "step": 180820 }, { "epoch": 0.6882836110624757, "grad_norm": 0.11454705148935318, "learning_rate": 0.0005, "loss": 2.0962, "step": 180830 }, { "epoch": 0.6883216735305984, "grad_norm": 0.11727307736873627, "learning_rate": 0.0005, "loss": 2.1075, "step": 180840 }, { "epoch": 0.6883597359987211, "grad_norm": 0.13075630366802216, "learning_rate": 0.0005, "loss": 2.1011, "step": 180850 }, { "epoch": 0.6883977984668438, "grad_norm": 0.13252484798431396, "learning_rate": 0.0005, "loss": 2.1113, "step": 180860 }, { "epoch": 0.6884358609349664, "grad_norm": 0.11895839869976044, "learning_rate": 0.0005, "loss": 2.1154, "step": 180870 }, { "epoch": 0.6884739234030891, "grad_norm": 0.13343650102615356, "learning_rate": 0.0005, "loss": 2.1013, "step": 180880 }, { "epoch": 0.6885119858712119, "grad_norm": 0.12214325368404388, "learning_rate": 0.0005, "loss": 2.1043, "step": 180890 }, { "epoch": 0.6885500483393345, "grad_norm": 0.13322407007217407, "learning_rate": 0.0005, "loss": 2.0943, "step": 180900 }, { "epoch": 0.6885881108074572, "grad_norm": 0.11768539994955063, "learning_rate": 0.0005, "loss": 2.1127, "step": 180910 }, { "epoch": 0.6886261732755798, "grad_norm": 0.1263519674539566, "learning_rate": 0.0005, "loss": 2.1097, "step": 180920 }, { "epoch": 0.6886642357437026, "grad_norm": 0.129350483417511, "learning_rate": 0.0005, "loss": 2.1132, "step": 180930 }, { "epoch": 0.6887022982118253, "grad_norm": 0.1228652149438858, "learning_rate": 0.0005, "loss": 2.0939, "step": 180940 }, { "epoch": 0.6887403606799479, "grad_norm": 0.11624860763549805, "learning_rate": 0.0005, "loss": 2.1023, "step": 180950 }, { "epoch": 0.6887784231480706, "grad_norm": 0.12083599716424942, "learning_rate": 0.0005, "loss": 2.1071, "step": 180960 }, { "epoch": 0.6888164856161932, "grad_norm": 0.12017053365707397, "learning_rate": 0.0005, "loss": 2.1153, "step": 180970 }, { "epoch": 0.688854548084316, "grad_norm": 0.12007162719964981, "learning_rate": 0.0005, "loss": 2.1218, "step": 180980 }, { "epoch": 0.6888926105524387, "grad_norm": 0.12415078282356262, "learning_rate": 0.0005, "loss": 2.1035, "step": 180990 }, { "epoch": 0.6889306730205613, "grad_norm": 0.11761855334043503, "learning_rate": 0.0005, "loss": 2.0996, "step": 181000 }, { "epoch": 0.688968735488684, "grad_norm": 0.13418373465538025, "learning_rate": 0.0005, "loss": 2.1129, "step": 181010 }, { "epoch": 0.6890067979568067, "grad_norm": 0.13213621079921722, "learning_rate": 0.0005, "loss": 2.1011, "step": 181020 }, { "epoch": 0.6890448604249294, "grad_norm": 0.1185673251748085, "learning_rate": 0.0005, "loss": 2.1145, "step": 181030 }, { "epoch": 0.6890829228930521, "grad_norm": 0.12293388694524765, "learning_rate": 0.0005, "loss": 2.106, "step": 181040 }, { "epoch": 0.6891209853611747, "grad_norm": 0.1463843137025833, "learning_rate": 0.0005, "loss": 2.1197, "step": 181050 }, { "epoch": 0.6891590478292975, "grad_norm": 0.1176767498254776, "learning_rate": 0.0005, "loss": 2.1069, "step": 181060 }, { "epoch": 0.6891971102974201, "grad_norm": 0.1293659210205078, "learning_rate": 0.0005, "loss": 2.1133, "step": 181070 }, { "epoch": 0.6892351727655428, "grad_norm": 0.12415621429681778, "learning_rate": 0.0005, "loss": 2.1066, "step": 181080 }, { "epoch": 0.6892732352336655, "grad_norm": 0.11896419525146484, "learning_rate": 0.0005, "loss": 2.1091, "step": 181090 }, { "epoch": 0.6893112977017882, "grad_norm": 0.13892972469329834, "learning_rate": 0.0005, "loss": 2.1059, "step": 181100 }, { "epoch": 0.6893493601699109, "grad_norm": 0.11878636479377747, "learning_rate": 0.0005, "loss": 2.1171, "step": 181110 }, { "epoch": 0.6893874226380335, "grad_norm": 0.12854772806167603, "learning_rate": 0.0005, "loss": 2.0964, "step": 181120 }, { "epoch": 0.6894254851061562, "grad_norm": 0.12110681086778641, "learning_rate": 0.0005, "loss": 2.1123, "step": 181130 }, { "epoch": 0.6894635475742789, "grad_norm": 0.12103510648012161, "learning_rate": 0.0005, "loss": 2.1052, "step": 181140 }, { "epoch": 0.6895016100424016, "grad_norm": 0.12146977335214615, "learning_rate": 0.0005, "loss": 2.1061, "step": 181150 }, { "epoch": 0.6895396725105243, "grad_norm": 0.13748902082443237, "learning_rate": 0.0005, "loss": 2.1088, "step": 181160 }, { "epoch": 0.6895777349786469, "grad_norm": 0.13430270552635193, "learning_rate": 0.0005, "loss": 2.0969, "step": 181170 }, { "epoch": 0.6896157974467696, "grad_norm": 0.12388263642787933, "learning_rate": 0.0005, "loss": 2.1114, "step": 181180 }, { "epoch": 0.6896538599148924, "grad_norm": 0.11775881797075272, "learning_rate": 0.0005, "loss": 2.1224, "step": 181190 }, { "epoch": 0.689691922383015, "grad_norm": 0.12764546275138855, "learning_rate": 0.0005, "loss": 2.1085, "step": 181200 }, { "epoch": 0.6897299848511377, "grad_norm": 0.11993419378995895, "learning_rate": 0.0005, "loss": 2.1088, "step": 181210 }, { "epoch": 0.6897680473192603, "grad_norm": 0.12366458773612976, "learning_rate": 0.0005, "loss": 2.1005, "step": 181220 }, { "epoch": 0.6898061097873831, "grad_norm": 0.12651975452899933, "learning_rate": 0.0005, "loss": 2.113, "step": 181230 }, { "epoch": 0.6898441722555058, "grad_norm": 0.12930312752723694, "learning_rate": 0.0005, "loss": 2.113, "step": 181240 }, { "epoch": 0.6898822347236284, "grad_norm": 0.11947444826364517, "learning_rate": 0.0005, "loss": 2.1194, "step": 181250 }, { "epoch": 0.6899202971917511, "grad_norm": 0.20199982821941376, "learning_rate": 0.0005, "loss": 2.1016, "step": 181260 }, { "epoch": 0.6899583596598737, "grad_norm": 0.11784765869379044, "learning_rate": 0.0005, "loss": 2.1132, "step": 181270 }, { "epoch": 0.6899964221279965, "grad_norm": 0.1350754201412201, "learning_rate": 0.0005, "loss": 2.0942, "step": 181280 }, { "epoch": 0.6900344845961192, "grad_norm": 0.13489319384098053, "learning_rate": 0.0005, "loss": 2.1078, "step": 181290 }, { "epoch": 0.6900725470642418, "grad_norm": 0.12121303379535675, "learning_rate": 0.0005, "loss": 2.1067, "step": 181300 }, { "epoch": 0.6901106095323645, "grad_norm": 0.12036201357841492, "learning_rate": 0.0005, "loss": 2.1196, "step": 181310 }, { "epoch": 0.6901486720004872, "grad_norm": 0.12152925133705139, "learning_rate": 0.0005, "loss": 2.1104, "step": 181320 }, { "epoch": 0.6901867344686099, "grad_norm": 0.13613882660865784, "learning_rate": 0.0005, "loss": 2.1193, "step": 181330 }, { "epoch": 0.6902247969367326, "grad_norm": 0.1253340244293213, "learning_rate": 0.0005, "loss": 2.1162, "step": 181340 }, { "epoch": 0.6902628594048552, "grad_norm": 0.12414707988500595, "learning_rate": 0.0005, "loss": 2.101, "step": 181350 }, { "epoch": 0.690300921872978, "grad_norm": 0.13022355735301971, "learning_rate": 0.0005, "loss": 2.1148, "step": 181360 }, { "epoch": 0.6903389843411006, "grad_norm": 0.12295828759670258, "learning_rate": 0.0005, "loss": 2.1111, "step": 181370 }, { "epoch": 0.6903770468092233, "grad_norm": 0.13136707246303558, "learning_rate": 0.0005, "loss": 2.1108, "step": 181380 }, { "epoch": 0.690415109277346, "grad_norm": 0.12850356101989746, "learning_rate": 0.0005, "loss": 2.1031, "step": 181390 }, { "epoch": 0.6904531717454686, "grad_norm": 0.12073330581188202, "learning_rate": 0.0005, "loss": 2.1016, "step": 181400 }, { "epoch": 0.6904912342135914, "grad_norm": 0.11530125886201859, "learning_rate": 0.0005, "loss": 2.1073, "step": 181410 }, { "epoch": 0.690529296681714, "grad_norm": 0.12165053933858871, "learning_rate": 0.0005, "loss": 2.1134, "step": 181420 }, { "epoch": 0.6905673591498367, "grad_norm": 0.11687213182449341, "learning_rate": 0.0005, "loss": 2.1049, "step": 181430 }, { "epoch": 0.6906054216179593, "grad_norm": 0.12140630930662155, "learning_rate": 0.0005, "loss": 2.1075, "step": 181440 }, { "epoch": 0.6906434840860821, "grad_norm": 0.12299919128417969, "learning_rate": 0.0005, "loss": 2.1047, "step": 181450 }, { "epoch": 0.6906815465542048, "grad_norm": 0.11274578422307968, "learning_rate": 0.0005, "loss": 2.1102, "step": 181460 }, { "epoch": 0.6907196090223274, "grad_norm": 0.11769766360521317, "learning_rate": 0.0005, "loss": 2.1015, "step": 181470 }, { "epoch": 0.6907576714904501, "grad_norm": 0.11614657938480377, "learning_rate": 0.0005, "loss": 2.1107, "step": 181480 }, { "epoch": 0.6907957339585729, "grad_norm": 0.13985876739025116, "learning_rate": 0.0005, "loss": 2.0987, "step": 181490 }, { "epoch": 0.6908337964266955, "grad_norm": 0.1485404074192047, "learning_rate": 0.0005, "loss": 2.0888, "step": 181500 }, { "epoch": 0.6908718588948182, "grad_norm": 0.12811745703220367, "learning_rate": 0.0005, "loss": 2.1169, "step": 181510 }, { "epoch": 0.6909099213629408, "grad_norm": 0.119151271879673, "learning_rate": 0.0005, "loss": 2.104, "step": 181520 }, { "epoch": 0.6909479838310636, "grad_norm": 0.12118816375732422, "learning_rate": 0.0005, "loss": 2.104, "step": 181530 }, { "epoch": 0.6909860462991863, "grad_norm": 0.11851918697357178, "learning_rate": 0.0005, "loss": 2.117, "step": 181540 }, { "epoch": 0.6910241087673089, "grad_norm": 0.12571333348751068, "learning_rate": 0.0005, "loss": 2.0872, "step": 181550 }, { "epoch": 0.6910621712354316, "grad_norm": 0.1240975484251976, "learning_rate": 0.0005, "loss": 2.1022, "step": 181560 }, { "epoch": 0.6911002337035542, "grad_norm": 0.11628197133541107, "learning_rate": 0.0005, "loss": 2.1027, "step": 181570 }, { "epoch": 0.691138296171677, "grad_norm": 0.11772032827138901, "learning_rate": 0.0005, "loss": 2.1061, "step": 181580 }, { "epoch": 0.6911763586397996, "grad_norm": 0.12051352113485336, "learning_rate": 0.0005, "loss": 2.107, "step": 181590 }, { "epoch": 0.6912144211079223, "grad_norm": 0.13565829396247864, "learning_rate": 0.0005, "loss": 2.1189, "step": 181600 }, { "epoch": 0.691252483576045, "grad_norm": 0.1340634673833847, "learning_rate": 0.0005, "loss": 2.1079, "step": 181610 }, { "epoch": 0.6912905460441677, "grad_norm": 0.11844746023416519, "learning_rate": 0.0005, "loss": 2.099, "step": 181620 }, { "epoch": 0.6913286085122904, "grad_norm": 0.13710397481918335, "learning_rate": 0.0005, "loss": 2.0924, "step": 181630 }, { "epoch": 0.691366670980413, "grad_norm": 0.11849800497293472, "learning_rate": 0.0005, "loss": 2.1032, "step": 181640 }, { "epoch": 0.6914047334485357, "grad_norm": 0.12317143380641937, "learning_rate": 0.0005, "loss": 2.116, "step": 181650 }, { "epoch": 0.6914427959166585, "grad_norm": 0.1267412304878235, "learning_rate": 0.0005, "loss": 2.1249, "step": 181660 }, { "epoch": 0.6914808583847811, "grad_norm": 0.12529774010181427, "learning_rate": 0.0005, "loss": 2.1177, "step": 181670 }, { "epoch": 0.6915189208529038, "grad_norm": 0.130142942070961, "learning_rate": 0.0005, "loss": 2.1051, "step": 181680 }, { "epoch": 0.6915569833210264, "grad_norm": 0.12222882360219955, "learning_rate": 0.0005, "loss": 2.1077, "step": 181690 }, { "epoch": 0.6915950457891491, "grad_norm": 0.11752060800790787, "learning_rate": 0.0005, "loss": 2.1103, "step": 181700 }, { "epoch": 0.6916331082572719, "grad_norm": 0.13059893250465393, "learning_rate": 0.0005, "loss": 2.0989, "step": 181710 }, { "epoch": 0.6916711707253945, "grad_norm": 0.13170769810676575, "learning_rate": 0.0005, "loss": 2.109, "step": 181720 }, { "epoch": 0.6917092331935172, "grad_norm": 0.11935019493103027, "learning_rate": 0.0005, "loss": 2.1111, "step": 181730 }, { "epoch": 0.6917472956616398, "grad_norm": 0.1218312606215477, "learning_rate": 0.0005, "loss": 2.1303, "step": 181740 }, { "epoch": 0.6917853581297626, "grad_norm": 0.11730033159255981, "learning_rate": 0.0005, "loss": 2.1206, "step": 181750 }, { "epoch": 0.6918234205978853, "grad_norm": 0.1214584931731224, "learning_rate": 0.0005, "loss": 2.1049, "step": 181760 }, { "epoch": 0.6918614830660079, "grad_norm": 0.13505078852176666, "learning_rate": 0.0005, "loss": 2.1097, "step": 181770 }, { "epoch": 0.6918995455341306, "grad_norm": 0.13040438294410706, "learning_rate": 0.0005, "loss": 2.109, "step": 181780 }, { "epoch": 0.6919376080022533, "grad_norm": 0.12045073509216309, "learning_rate": 0.0005, "loss": 2.1173, "step": 181790 }, { "epoch": 0.691975670470376, "grad_norm": 0.12367752939462662, "learning_rate": 0.0005, "loss": 2.1202, "step": 181800 }, { "epoch": 0.6920137329384987, "grad_norm": 0.13401754200458527, "learning_rate": 0.0005, "loss": 2.108, "step": 181810 }, { "epoch": 0.6920517954066213, "grad_norm": 0.12990473210811615, "learning_rate": 0.0005, "loss": 2.0946, "step": 181820 }, { "epoch": 0.692089857874744, "grad_norm": 0.11714720726013184, "learning_rate": 0.0005, "loss": 2.0953, "step": 181830 }, { "epoch": 0.6921279203428667, "grad_norm": 0.12829618155956268, "learning_rate": 0.0005, "loss": 2.1086, "step": 181840 }, { "epoch": 0.6921659828109894, "grad_norm": 0.12493853271007538, "learning_rate": 0.0005, "loss": 2.1289, "step": 181850 }, { "epoch": 0.6922040452791121, "grad_norm": 0.12608863413333893, "learning_rate": 0.0005, "loss": 2.1177, "step": 181860 }, { "epoch": 0.6922421077472347, "grad_norm": 0.13609422743320465, "learning_rate": 0.0005, "loss": 2.1091, "step": 181870 }, { "epoch": 0.6922801702153575, "grad_norm": 0.14166349172592163, "learning_rate": 0.0005, "loss": 2.1226, "step": 181880 }, { "epoch": 0.6923182326834801, "grad_norm": 0.13140493631362915, "learning_rate": 0.0005, "loss": 2.1014, "step": 181890 }, { "epoch": 0.6923562951516028, "grad_norm": 0.12775324285030365, "learning_rate": 0.0005, "loss": 2.1038, "step": 181900 }, { "epoch": 0.6923943576197255, "grad_norm": 0.1270611435174942, "learning_rate": 0.0005, "loss": 2.1124, "step": 181910 }, { "epoch": 0.6924324200878482, "grad_norm": 0.13412043452262878, "learning_rate": 0.0005, "loss": 2.1006, "step": 181920 }, { "epoch": 0.6924704825559709, "grad_norm": 0.12404550611972809, "learning_rate": 0.0005, "loss": 2.1052, "step": 181930 }, { "epoch": 0.6925085450240935, "grad_norm": 0.1306399554014206, "learning_rate": 0.0005, "loss": 2.1187, "step": 181940 }, { "epoch": 0.6925466074922162, "grad_norm": 0.1323079913854599, "learning_rate": 0.0005, "loss": 2.1009, "step": 181950 }, { "epoch": 0.692584669960339, "grad_norm": 0.12677091360092163, "learning_rate": 0.0005, "loss": 2.1195, "step": 181960 }, { "epoch": 0.6926227324284616, "grad_norm": 0.12194015830755234, "learning_rate": 0.0005, "loss": 2.131, "step": 181970 }, { "epoch": 0.6926607948965843, "grad_norm": 0.12330708652734756, "learning_rate": 0.0005, "loss": 2.0952, "step": 181980 }, { "epoch": 0.6926988573647069, "grad_norm": 0.12560437619686127, "learning_rate": 0.0005, "loss": 2.0936, "step": 181990 }, { "epoch": 0.6927369198328296, "grad_norm": 0.13029389083385468, "learning_rate": 0.0005, "loss": 2.085, "step": 182000 }, { "epoch": 0.6927749823009524, "grad_norm": 0.12004786729812622, "learning_rate": 0.0005, "loss": 2.1238, "step": 182010 }, { "epoch": 0.692813044769075, "grad_norm": 0.1352328658103943, "learning_rate": 0.0005, "loss": 2.1127, "step": 182020 }, { "epoch": 0.6928511072371977, "grad_norm": 0.1298789083957672, "learning_rate": 0.0005, "loss": 2.0988, "step": 182030 }, { "epoch": 0.6928891697053203, "grad_norm": 0.12174946814775467, "learning_rate": 0.0005, "loss": 2.1222, "step": 182040 }, { "epoch": 0.6929272321734431, "grad_norm": 0.12317008525133133, "learning_rate": 0.0005, "loss": 2.1043, "step": 182050 }, { "epoch": 0.6929652946415658, "grad_norm": 0.1217416450381279, "learning_rate": 0.0005, "loss": 2.0969, "step": 182060 }, { "epoch": 0.6930033571096884, "grad_norm": 0.13692374527454376, "learning_rate": 0.0005, "loss": 2.108, "step": 182070 }, { "epoch": 0.6930414195778111, "grad_norm": 0.1314442753791809, "learning_rate": 0.0005, "loss": 2.1177, "step": 182080 }, { "epoch": 0.6930794820459338, "grad_norm": 0.12198949605226517, "learning_rate": 0.0005, "loss": 2.1002, "step": 182090 }, { "epoch": 0.6931175445140565, "grad_norm": 0.1149495393037796, "learning_rate": 0.0005, "loss": 2.1222, "step": 182100 }, { "epoch": 0.6931556069821792, "grad_norm": 0.12052424252033234, "learning_rate": 0.0005, "loss": 2.1127, "step": 182110 }, { "epoch": 0.6931936694503018, "grad_norm": 0.11988913267850876, "learning_rate": 0.0005, "loss": 2.111, "step": 182120 }, { "epoch": 0.6932317319184245, "grad_norm": 0.11893389374017715, "learning_rate": 0.0005, "loss": 2.1204, "step": 182130 }, { "epoch": 0.6932697943865472, "grad_norm": 0.12340518832206726, "learning_rate": 0.0005, "loss": 2.1043, "step": 182140 }, { "epoch": 0.6933078568546699, "grad_norm": 0.12666188180446625, "learning_rate": 0.0005, "loss": 2.1085, "step": 182150 }, { "epoch": 0.6933459193227925, "grad_norm": 0.11834313720464706, "learning_rate": 0.0005, "loss": 2.1153, "step": 182160 }, { "epoch": 0.6933839817909152, "grad_norm": 0.13510651886463165, "learning_rate": 0.0005, "loss": 2.1035, "step": 182170 }, { "epoch": 0.693422044259038, "grad_norm": 0.12976713478565216, "learning_rate": 0.0005, "loss": 2.0916, "step": 182180 }, { "epoch": 0.6934601067271606, "grad_norm": 0.12408886104822159, "learning_rate": 0.0005, "loss": 2.1166, "step": 182190 }, { "epoch": 0.6934981691952833, "grad_norm": 0.13030008971691132, "learning_rate": 0.0005, "loss": 2.1087, "step": 182200 }, { "epoch": 0.6935362316634059, "grad_norm": 0.13181696832180023, "learning_rate": 0.0005, "loss": 2.0985, "step": 182210 }, { "epoch": 0.6935742941315287, "grad_norm": 0.12257903814315796, "learning_rate": 0.0005, "loss": 2.1184, "step": 182220 }, { "epoch": 0.6936123565996514, "grad_norm": 0.11532396078109741, "learning_rate": 0.0005, "loss": 2.1168, "step": 182230 }, { "epoch": 0.693650419067774, "grad_norm": 0.12889619171619415, "learning_rate": 0.0005, "loss": 2.1131, "step": 182240 }, { "epoch": 0.6936884815358967, "grad_norm": 0.12298876792192459, "learning_rate": 0.0005, "loss": 2.1109, "step": 182250 }, { "epoch": 0.6937265440040195, "grad_norm": 0.1157648116350174, "learning_rate": 0.0005, "loss": 2.1057, "step": 182260 }, { "epoch": 0.6937646064721421, "grad_norm": 0.14459484815597534, "learning_rate": 0.0005, "loss": 2.1026, "step": 182270 }, { "epoch": 0.6938026689402648, "grad_norm": 0.13314592838287354, "learning_rate": 0.0005, "loss": 2.095, "step": 182280 }, { "epoch": 0.6938407314083874, "grad_norm": 0.1288757175207138, "learning_rate": 0.0005, "loss": 2.0946, "step": 182290 }, { "epoch": 0.6938787938765101, "grad_norm": 0.12310304492712021, "learning_rate": 0.0005, "loss": 2.1375, "step": 182300 }, { "epoch": 0.6939168563446328, "grad_norm": 0.1330040991306305, "learning_rate": 0.0005, "loss": 2.1014, "step": 182310 }, { "epoch": 0.6939549188127555, "grad_norm": 0.1187869980931282, "learning_rate": 0.0005, "loss": 2.1057, "step": 182320 }, { "epoch": 0.6939929812808782, "grad_norm": 0.11585734784603119, "learning_rate": 0.0005, "loss": 2.099, "step": 182330 }, { "epoch": 0.6940310437490008, "grad_norm": 0.1329014152288437, "learning_rate": 0.0005, "loss": 2.0977, "step": 182340 }, { "epoch": 0.6940691062171236, "grad_norm": 0.13332955539226532, "learning_rate": 0.0005, "loss": 2.1073, "step": 182350 }, { "epoch": 0.6941071686852462, "grad_norm": 0.1157715693116188, "learning_rate": 0.0005, "loss": 2.1047, "step": 182360 }, { "epoch": 0.6941452311533689, "grad_norm": 0.11897668242454529, "learning_rate": 0.0005, "loss": 2.1109, "step": 182370 }, { "epoch": 0.6941832936214916, "grad_norm": 0.12142232060432434, "learning_rate": 0.0005, "loss": 2.0807, "step": 182380 }, { "epoch": 0.6942213560896143, "grad_norm": 0.1747933030128479, "learning_rate": 0.0005, "loss": 2.1199, "step": 182390 }, { "epoch": 0.694259418557737, "grad_norm": 0.1174633651971817, "learning_rate": 0.0005, "loss": 2.0975, "step": 182400 }, { "epoch": 0.6942974810258596, "grad_norm": 0.12898437678813934, "learning_rate": 0.0005, "loss": 2.1134, "step": 182410 }, { "epoch": 0.6943355434939823, "grad_norm": 0.12418783456087112, "learning_rate": 0.0005, "loss": 2.0961, "step": 182420 }, { "epoch": 0.694373605962105, "grad_norm": 0.12284686416387558, "learning_rate": 0.0005, "loss": 2.1052, "step": 182430 }, { "epoch": 0.6944116684302277, "grad_norm": 0.12123236805200577, "learning_rate": 0.0005, "loss": 2.1179, "step": 182440 }, { "epoch": 0.6944497308983504, "grad_norm": 0.11644534766674042, "learning_rate": 0.0005, "loss": 2.1029, "step": 182450 }, { "epoch": 0.694487793366473, "grad_norm": 0.12517879903316498, "learning_rate": 0.0005, "loss": 2.0976, "step": 182460 }, { "epoch": 0.6945258558345957, "grad_norm": 0.11970049887895584, "learning_rate": 0.0005, "loss": 2.1156, "step": 182470 }, { "epoch": 0.6945639183027185, "grad_norm": 0.12386345863342285, "learning_rate": 0.0005, "loss": 2.1123, "step": 182480 }, { "epoch": 0.6946019807708411, "grad_norm": 0.12449586391448975, "learning_rate": 0.0005, "loss": 2.1106, "step": 182490 }, { "epoch": 0.6946400432389638, "grad_norm": 0.12283190339803696, "learning_rate": 0.0005, "loss": 2.1156, "step": 182500 }, { "epoch": 0.6946781057070864, "grad_norm": 0.12632504105567932, "learning_rate": 0.0005, "loss": 2.1033, "step": 182510 }, { "epoch": 0.6947161681752092, "grad_norm": 0.12029515206813812, "learning_rate": 0.0005, "loss": 2.1049, "step": 182520 }, { "epoch": 0.6947542306433319, "grad_norm": 0.1184384822845459, "learning_rate": 0.0005, "loss": 2.1154, "step": 182530 }, { "epoch": 0.6947922931114545, "grad_norm": 0.11830782145261765, "learning_rate": 0.0005, "loss": 2.1001, "step": 182540 }, { "epoch": 0.6948303555795772, "grad_norm": 0.1292402148246765, "learning_rate": 0.0005, "loss": 2.1205, "step": 182550 }, { "epoch": 0.6948684180476998, "grad_norm": 0.13244417309761047, "learning_rate": 0.0005, "loss": 2.1048, "step": 182560 }, { "epoch": 0.6949064805158226, "grad_norm": 0.13277667760849, "learning_rate": 0.0005, "loss": 2.1049, "step": 182570 }, { "epoch": 0.6949445429839453, "grad_norm": 0.1484387069940567, "learning_rate": 0.0005, "loss": 2.1145, "step": 182580 }, { "epoch": 0.6949826054520679, "grad_norm": 0.11615041643381119, "learning_rate": 0.0005, "loss": 2.1156, "step": 182590 }, { "epoch": 0.6950206679201906, "grad_norm": 0.12297375500202179, "learning_rate": 0.0005, "loss": 2.1104, "step": 182600 }, { "epoch": 0.6950587303883133, "grad_norm": 0.12898223102092743, "learning_rate": 0.0005, "loss": 2.0987, "step": 182610 }, { "epoch": 0.695096792856436, "grad_norm": 0.12044510990381241, "learning_rate": 0.0005, "loss": 2.1114, "step": 182620 }, { "epoch": 0.6951348553245587, "grad_norm": 0.13075432181358337, "learning_rate": 0.0005, "loss": 2.1169, "step": 182630 }, { "epoch": 0.6951729177926813, "grad_norm": 0.129336416721344, "learning_rate": 0.0005, "loss": 2.1025, "step": 182640 }, { "epoch": 0.6952109802608041, "grad_norm": 0.13075217604637146, "learning_rate": 0.0005, "loss": 2.1019, "step": 182650 }, { "epoch": 0.6952490427289267, "grad_norm": 0.12655818462371826, "learning_rate": 0.0005, "loss": 2.1181, "step": 182660 }, { "epoch": 0.6952871051970494, "grad_norm": 0.11474862694740295, "learning_rate": 0.0005, "loss": 2.1047, "step": 182670 }, { "epoch": 0.695325167665172, "grad_norm": 0.17931579053401947, "learning_rate": 0.0005, "loss": 2.0888, "step": 182680 }, { "epoch": 0.6953632301332948, "grad_norm": 0.13210253417491913, "learning_rate": 0.0005, "loss": 2.0936, "step": 182690 }, { "epoch": 0.6954012926014175, "grad_norm": 0.12530747056007385, "learning_rate": 0.0005, "loss": 2.1119, "step": 182700 }, { "epoch": 0.6954393550695401, "grad_norm": 0.11945699900388718, "learning_rate": 0.0005, "loss": 2.1244, "step": 182710 }, { "epoch": 0.6954774175376628, "grad_norm": 0.11607809364795685, "learning_rate": 0.0005, "loss": 2.1065, "step": 182720 }, { "epoch": 0.6955154800057854, "grad_norm": 0.13010531663894653, "learning_rate": 0.0005, "loss": 2.1053, "step": 182730 }, { "epoch": 0.6955535424739082, "grad_norm": 0.1291985809803009, "learning_rate": 0.0005, "loss": 2.1069, "step": 182740 }, { "epoch": 0.6955916049420309, "grad_norm": 0.11874186247587204, "learning_rate": 0.0005, "loss": 2.1146, "step": 182750 }, { "epoch": 0.6956296674101535, "grad_norm": 0.13069820404052734, "learning_rate": 0.0005, "loss": 2.1132, "step": 182760 }, { "epoch": 0.6956677298782762, "grad_norm": 0.12255138903856277, "learning_rate": 0.0005, "loss": 2.1011, "step": 182770 }, { "epoch": 0.695705792346399, "grad_norm": 0.12217668443918228, "learning_rate": 0.0005, "loss": 2.0963, "step": 182780 }, { "epoch": 0.6957438548145216, "grad_norm": 0.11657962948083878, "learning_rate": 0.0005, "loss": 2.0881, "step": 182790 }, { "epoch": 0.6957819172826443, "grad_norm": 0.11431416869163513, "learning_rate": 0.0005, "loss": 2.1016, "step": 182800 }, { "epoch": 0.6958199797507669, "grad_norm": 0.12846305966377258, "learning_rate": 0.0005, "loss": 2.1267, "step": 182810 }, { "epoch": 0.6958580422188897, "grad_norm": 0.12828603386878967, "learning_rate": 0.0005, "loss": 2.1184, "step": 182820 }, { "epoch": 0.6958961046870124, "grad_norm": 0.13457556068897247, "learning_rate": 0.0005, "loss": 2.1035, "step": 182830 }, { "epoch": 0.695934167155135, "grad_norm": 0.12460450083017349, "learning_rate": 0.0005, "loss": 2.1158, "step": 182840 }, { "epoch": 0.6959722296232577, "grad_norm": 0.13973352313041687, "learning_rate": 0.0005, "loss": 2.1145, "step": 182850 }, { "epoch": 0.6960102920913803, "grad_norm": 0.1241278126835823, "learning_rate": 0.0005, "loss": 2.112, "step": 182860 }, { "epoch": 0.6960483545595031, "grad_norm": 0.11662398278713226, "learning_rate": 0.0005, "loss": 2.1062, "step": 182870 }, { "epoch": 0.6960864170276257, "grad_norm": 0.12885436415672302, "learning_rate": 0.0005, "loss": 2.1061, "step": 182880 }, { "epoch": 0.6961244794957484, "grad_norm": 0.11967397481203079, "learning_rate": 0.0005, "loss": 2.1044, "step": 182890 }, { "epoch": 0.6961625419638711, "grad_norm": 0.11201397329568863, "learning_rate": 0.0005, "loss": 2.1113, "step": 182900 }, { "epoch": 0.6962006044319938, "grad_norm": 0.11858703196048737, "learning_rate": 0.0005, "loss": 2.1074, "step": 182910 }, { "epoch": 0.6962386669001165, "grad_norm": 0.13523299992084503, "learning_rate": 0.0005, "loss": 2.131, "step": 182920 }, { "epoch": 0.6962767293682391, "grad_norm": 0.12153832614421844, "learning_rate": 0.0005, "loss": 2.1046, "step": 182930 }, { "epoch": 0.6963147918363618, "grad_norm": 0.12720975279808044, "learning_rate": 0.0005, "loss": 2.1129, "step": 182940 }, { "epoch": 0.6963528543044846, "grad_norm": 0.12351260334253311, "learning_rate": 0.0005, "loss": 2.1127, "step": 182950 }, { "epoch": 0.6963909167726072, "grad_norm": 0.12928800284862518, "learning_rate": 0.0005, "loss": 2.1003, "step": 182960 }, { "epoch": 0.6964289792407299, "grad_norm": 0.13228236138820648, "learning_rate": 0.0005, "loss": 2.1107, "step": 182970 }, { "epoch": 0.6964670417088525, "grad_norm": 0.11482942849397659, "learning_rate": 0.0005, "loss": 2.1027, "step": 182980 }, { "epoch": 0.6965051041769752, "grad_norm": 0.14297699928283691, "learning_rate": 0.0005, "loss": 2.1087, "step": 182990 }, { "epoch": 0.696543166645098, "grad_norm": 0.12494589388370514, "learning_rate": 0.0005, "loss": 2.1138, "step": 183000 }, { "epoch": 0.6965812291132206, "grad_norm": 0.11370964348316193, "learning_rate": 0.0005, "loss": 2.1133, "step": 183010 }, { "epoch": 0.6966192915813433, "grad_norm": 0.14288152754306793, "learning_rate": 0.0005, "loss": 2.1071, "step": 183020 }, { "epoch": 0.6966573540494659, "grad_norm": 0.12203198671340942, "learning_rate": 0.0005, "loss": 2.099, "step": 183030 }, { "epoch": 0.6966954165175887, "grad_norm": 0.12365303933620453, "learning_rate": 0.0005, "loss": 2.1203, "step": 183040 }, { "epoch": 0.6967334789857114, "grad_norm": 0.12327518314123154, "learning_rate": 0.0005, "loss": 2.1188, "step": 183050 }, { "epoch": 0.696771541453834, "grad_norm": 0.13546282052993774, "learning_rate": 0.0005, "loss": 2.11, "step": 183060 }, { "epoch": 0.6968096039219567, "grad_norm": 0.1293046772480011, "learning_rate": 0.0005, "loss": 2.11, "step": 183070 }, { "epoch": 0.6968476663900794, "grad_norm": 0.12784546613693237, "learning_rate": 0.0005, "loss": 2.0981, "step": 183080 }, { "epoch": 0.6968857288582021, "grad_norm": 0.13157708942890167, "learning_rate": 0.0005, "loss": 2.1219, "step": 183090 }, { "epoch": 0.6969237913263248, "grad_norm": 0.11840756237506866, "learning_rate": 0.0005, "loss": 2.1182, "step": 183100 }, { "epoch": 0.6969618537944474, "grad_norm": 0.12368209660053253, "learning_rate": 0.0005, "loss": 2.1265, "step": 183110 }, { "epoch": 0.6969999162625702, "grad_norm": 0.12720529735088348, "learning_rate": 0.0005, "loss": 2.1001, "step": 183120 }, { "epoch": 0.6970379787306928, "grad_norm": 0.14050424098968506, "learning_rate": 0.0005, "loss": 2.1079, "step": 183130 }, { "epoch": 0.6970760411988155, "grad_norm": 0.12502345442771912, "learning_rate": 0.0005, "loss": 2.1204, "step": 183140 }, { "epoch": 0.6971141036669382, "grad_norm": 0.12937872111797333, "learning_rate": 0.0005, "loss": 2.109, "step": 183150 }, { "epoch": 0.6971521661350608, "grad_norm": 0.1396130472421646, "learning_rate": 0.0005, "loss": 2.106, "step": 183160 }, { "epoch": 0.6971902286031836, "grad_norm": 0.12844252586364746, "learning_rate": 0.0005, "loss": 2.1017, "step": 183170 }, { "epoch": 0.6972282910713062, "grad_norm": 0.12445782124996185, "learning_rate": 0.0005, "loss": 2.1038, "step": 183180 }, { "epoch": 0.6972663535394289, "grad_norm": 0.11656850576400757, "learning_rate": 0.0005, "loss": 2.1135, "step": 183190 }, { "epoch": 0.6973044160075516, "grad_norm": 0.11850359290838242, "learning_rate": 0.0005, "loss": 2.1229, "step": 183200 }, { "epoch": 0.6973424784756743, "grad_norm": 0.11828984320163727, "learning_rate": 0.0005, "loss": 2.1171, "step": 183210 }, { "epoch": 0.697380540943797, "grad_norm": 0.12916888296604156, "learning_rate": 0.0005, "loss": 2.1018, "step": 183220 }, { "epoch": 0.6974186034119196, "grad_norm": 0.13033920526504517, "learning_rate": 0.0005, "loss": 2.123, "step": 183230 }, { "epoch": 0.6974566658800423, "grad_norm": 0.12288140505552292, "learning_rate": 0.0005, "loss": 2.1098, "step": 183240 }, { "epoch": 0.6974947283481651, "grad_norm": 0.12239166349172592, "learning_rate": 0.0005, "loss": 2.1061, "step": 183250 }, { "epoch": 0.6975327908162877, "grad_norm": 0.1316891759634018, "learning_rate": 0.0005, "loss": 2.1114, "step": 183260 }, { "epoch": 0.6975708532844104, "grad_norm": 0.1223904937505722, "learning_rate": 0.0005, "loss": 2.1205, "step": 183270 }, { "epoch": 0.697608915752533, "grad_norm": 0.13343580067157745, "learning_rate": 0.0005, "loss": 2.1146, "step": 183280 }, { "epoch": 0.6976469782206557, "grad_norm": 0.11461261659860611, "learning_rate": 0.0005, "loss": 2.1177, "step": 183290 }, { "epoch": 0.6976850406887785, "grad_norm": 0.1275361180305481, "learning_rate": 0.0005, "loss": 2.0911, "step": 183300 }, { "epoch": 0.6977231031569011, "grad_norm": 0.11810991168022156, "learning_rate": 0.0005, "loss": 2.1028, "step": 183310 }, { "epoch": 0.6977611656250238, "grad_norm": 0.12467637658119202, "learning_rate": 0.0005, "loss": 2.128, "step": 183320 }, { "epoch": 0.6977992280931464, "grad_norm": 0.13004258275032043, "learning_rate": 0.0005, "loss": 2.1198, "step": 183330 }, { "epoch": 0.6978372905612692, "grad_norm": 0.14000114798545837, "learning_rate": 0.0005, "loss": 2.1135, "step": 183340 }, { "epoch": 0.6978753530293919, "grad_norm": 0.12561576068401337, "learning_rate": 0.0005, "loss": 2.1114, "step": 183350 }, { "epoch": 0.6979134154975145, "grad_norm": 0.13117700815200806, "learning_rate": 0.0005, "loss": 2.1027, "step": 183360 }, { "epoch": 0.6979514779656372, "grad_norm": 0.13505379855632782, "learning_rate": 0.0005, "loss": 2.1028, "step": 183370 }, { "epoch": 0.6979895404337599, "grad_norm": 0.13054326176643372, "learning_rate": 0.0005, "loss": 2.1098, "step": 183380 }, { "epoch": 0.6980276029018826, "grad_norm": 0.13579963147640228, "learning_rate": 0.0005, "loss": 2.1064, "step": 183390 }, { "epoch": 0.6980656653700053, "grad_norm": 0.13260318338871002, "learning_rate": 0.0005, "loss": 2.1183, "step": 183400 }, { "epoch": 0.6981037278381279, "grad_norm": 0.12206083536148071, "learning_rate": 0.0005, "loss": 2.0904, "step": 183410 }, { "epoch": 0.6981417903062506, "grad_norm": 0.12467200309038162, "learning_rate": 0.0005, "loss": 2.1096, "step": 183420 }, { "epoch": 0.6981798527743733, "grad_norm": 0.12051711976528168, "learning_rate": 0.0005, "loss": 2.109, "step": 183430 }, { "epoch": 0.698217915242496, "grad_norm": 0.1251114457845688, "learning_rate": 0.0005, "loss": 2.1113, "step": 183440 }, { "epoch": 0.6982559777106186, "grad_norm": 0.12093877792358398, "learning_rate": 0.0005, "loss": 2.1114, "step": 183450 }, { "epoch": 0.6982940401787413, "grad_norm": 0.12126054614782333, "learning_rate": 0.0005, "loss": 2.1228, "step": 183460 }, { "epoch": 0.6983321026468641, "grad_norm": 0.13498644530773163, "learning_rate": 0.0005, "loss": 2.0933, "step": 183470 }, { "epoch": 0.6983701651149867, "grad_norm": 0.12364812940359116, "learning_rate": 0.0005, "loss": 2.1117, "step": 183480 }, { "epoch": 0.6984082275831094, "grad_norm": 0.119442880153656, "learning_rate": 0.0005, "loss": 2.1057, "step": 183490 }, { "epoch": 0.698446290051232, "grad_norm": 0.11596567928791046, "learning_rate": 0.0005, "loss": 2.1115, "step": 183500 }, { "epoch": 0.6984843525193548, "grad_norm": 0.14025621116161346, "learning_rate": 0.0005, "loss": 2.109, "step": 183510 }, { "epoch": 0.6985224149874775, "grad_norm": 0.1185678243637085, "learning_rate": 0.0005, "loss": 2.1194, "step": 183520 }, { "epoch": 0.6985604774556001, "grad_norm": 0.3852109909057617, "learning_rate": 0.0005, "loss": 2.1254, "step": 183530 }, { "epoch": 0.6985985399237228, "grad_norm": 0.14016196131706238, "learning_rate": 0.0005, "loss": 2.1051, "step": 183540 }, { "epoch": 0.6986366023918456, "grad_norm": 0.12113747000694275, "learning_rate": 0.0005, "loss": 2.1138, "step": 183550 }, { "epoch": 0.6986746648599682, "grad_norm": 0.11765763908624649, "learning_rate": 0.0005, "loss": 2.1117, "step": 183560 }, { "epoch": 0.6987127273280909, "grad_norm": 0.1222396045923233, "learning_rate": 0.0005, "loss": 2.1125, "step": 183570 }, { "epoch": 0.6987507897962135, "grad_norm": 0.11592081934213638, "learning_rate": 0.0005, "loss": 2.107, "step": 183580 }, { "epoch": 0.6987888522643362, "grad_norm": 0.1372542381286621, "learning_rate": 0.0005, "loss": 2.1076, "step": 183590 }, { "epoch": 0.698826914732459, "grad_norm": 0.12375343590974808, "learning_rate": 0.0005, "loss": 2.0973, "step": 183600 }, { "epoch": 0.6988649772005816, "grad_norm": 0.13878773152828217, "learning_rate": 0.0005, "loss": 2.0969, "step": 183610 }, { "epoch": 0.6989030396687043, "grad_norm": 0.1390574872493744, "learning_rate": 0.0005, "loss": 2.0971, "step": 183620 }, { "epoch": 0.6989411021368269, "grad_norm": 0.131068155169487, "learning_rate": 0.0005, "loss": 2.1029, "step": 183630 }, { "epoch": 0.6989791646049497, "grad_norm": 0.11932392418384552, "learning_rate": 0.0005, "loss": 2.1019, "step": 183640 }, { "epoch": 0.6990172270730723, "grad_norm": 0.11907713860273361, "learning_rate": 0.0005, "loss": 2.1154, "step": 183650 }, { "epoch": 0.699055289541195, "grad_norm": 0.11536707729101181, "learning_rate": 0.0005, "loss": 2.093, "step": 183660 }, { "epoch": 0.6990933520093177, "grad_norm": 0.11935848742723465, "learning_rate": 0.0005, "loss": 2.1096, "step": 183670 }, { "epoch": 0.6991314144774404, "grad_norm": 0.12416180223226547, "learning_rate": 0.0005, "loss": 2.1027, "step": 183680 }, { "epoch": 0.6991694769455631, "grad_norm": 0.1163659617304802, "learning_rate": 0.0005, "loss": 2.1061, "step": 183690 }, { "epoch": 0.6992075394136857, "grad_norm": 0.13339783251285553, "learning_rate": 0.0005, "loss": 2.1087, "step": 183700 }, { "epoch": 0.6992456018818084, "grad_norm": 0.12422681599855423, "learning_rate": 0.0005, "loss": 2.1029, "step": 183710 }, { "epoch": 0.6992836643499311, "grad_norm": 0.13619937002658844, "learning_rate": 0.0005, "loss": 2.1, "step": 183720 }, { "epoch": 0.6993217268180538, "grad_norm": 0.12961874902248383, "learning_rate": 0.0005, "loss": 2.1055, "step": 183730 }, { "epoch": 0.6993597892861765, "grad_norm": 0.13386094570159912, "learning_rate": 0.0005, "loss": 2.1104, "step": 183740 }, { "epoch": 0.6993978517542991, "grad_norm": 0.1292843520641327, "learning_rate": 0.0005, "loss": 2.1069, "step": 183750 }, { "epoch": 0.6994359142224218, "grad_norm": 0.12814553081989288, "learning_rate": 0.0005, "loss": 2.1105, "step": 183760 }, { "epoch": 0.6994739766905446, "grad_norm": 0.11046000570058823, "learning_rate": 0.0005, "loss": 2.1106, "step": 183770 }, { "epoch": 0.6995120391586672, "grad_norm": 0.1339779794216156, "learning_rate": 0.0005, "loss": 2.1045, "step": 183780 }, { "epoch": 0.6995501016267899, "grad_norm": 0.12307266891002655, "learning_rate": 0.0005, "loss": 2.1028, "step": 183790 }, { "epoch": 0.6995881640949125, "grad_norm": 0.12837962806224823, "learning_rate": 0.0005, "loss": 2.0996, "step": 183800 }, { "epoch": 0.6996262265630353, "grad_norm": 0.11693299561738968, "learning_rate": 0.0005, "loss": 2.1123, "step": 183810 }, { "epoch": 0.699664289031158, "grad_norm": 0.1293923407793045, "learning_rate": 0.0005, "loss": 2.1036, "step": 183820 }, { "epoch": 0.6997023514992806, "grad_norm": 0.12665726244449615, "learning_rate": 0.0005, "loss": 2.1153, "step": 183830 }, { "epoch": 0.6997404139674033, "grad_norm": 0.12343131005764008, "learning_rate": 0.0005, "loss": 2.1068, "step": 183840 }, { "epoch": 0.6997784764355259, "grad_norm": 0.12809494137763977, "learning_rate": 0.0005, "loss": 2.1018, "step": 183850 }, { "epoch": 0.6998165389036487, "grad_norm": 0.1415323168039322, "learning_rate": 0.0005, "loss": 2.0934, "step": 183860 }, { "epoch": 0.6998546013717714, "grad_norm": 0.13057930767536163, "learning_rate": 0.0005, "loss": 2.1004, "step": 183870 }, { "epoch": 0.699892663839894, "grad_norm": 0.12605410814285278, "learning_rate": 0.0005, "loss": 2.113, "step": 183880 }, { "epoch": 0.6999307263080167, "grad_norm": 0.12751656770706177, "learning_rate": 0.0005, "loss": 2.1057, "step": 183890 }, { "epoch": 0.6999687887761394, "grad_norm": 0.12429352104663849, "learning_rate": 0.0005, "loss": 2.0976, "step": 183900 }, { "epoch": 0.7000068512442621, "grad_norm": 0.11484511941671371, "learning_rate": 0.0005, "loss": 2.1054, "step": 183910 }, { "epoch": 0.7000449137123848, "grad_norm": 0.12453338503837585, "learning_rate": 0.0005, "loss": 2.116, "step": 183920 }, { "epoch": 0.7000829761805074, "grad_norm": 0.12155555188655853, "learning_rate": 0.0005, "loss": 2.1047, "step": 183930 }, { "epoch": 0.7001210386486302, "grad_norm": 0.12803073227405548, "learning_rate": 0.0005, "loss": 2.1196, "step": 183940 }, { "epoch": 0.7001591011167528, "grad_norm": 0.14072082936763763, "learning_rate": 0.0005, "loss": 2.1058, "step": 183950 }, { "epoch": 0.7001971635848755, "grad_norm": 0.11939458549022675, "learning_rate": 0.0005, "loss": 2.1174, "step": 183960 }, { "epoch": 0.7002352260529981, "grad_norm": 0.12321234494447708, "learning_rate": 0.0005, "loss": 2.1029, "step": 183970 }, { "epoch": 0.7002732885211209, "grad_norm": 0.11370708048343658, "learning_rate": 0.0005, "loss": 2.0981, "step": 183980 }, { "epoch": 0.7003113509892436, "grad_norm": 0.13109511137008667, "learning_rate": 0.0005, "loss": 2.1169, "step": 183990 }, { "epoch": 0.7003494134573662, "grad_norm": 0.11811353266239166, "learning_rate": 0.0005, "loss": 2.1147, "step": 184000 }, { "epoch": 0.7003874759254889, "grad_norm": 0.12360669672489166, "learning_rate": 0.0005, "loss": 2.1195, "step": 184010 }, { "epoch": 0.7004255383936115, "grad_norm": 0.15090149641036987, "learning_rate": 0.0005, "loss": 2.1195, "step": 184020 }, { "epoch": 0.7004636008617343, "grad_norm": 0.12501868605613708, "learning_rate": 0.0005, "loss": 2.0962, "step": 184030 }, { "epoch": 0.700501663329857, "grad_norm": 0.12880779802799225, "learning_rate": 0.0005, "loss": 2.1228, "step": 184040 }, { "epoch": 0.7005397257979796, "grad_norm": 0.11294928938150406, "learning_rate": 0.0005, "loss": 2.0966, "step": 184050 }, { "epoch": 0.7005777882661023, "grad_norm": 0.12811915576457977, "learning_rate": 0.0005, "loss": 2.1273, "step": 184060 }, { "epoch": 0.700615850734225, "grad_norm": 0.12252122163772583, "learning_rate": 0.0005, "loss": 2.0963, "step": 184070 }, { "epoch": 0.7006539132023477, "grad_norm": 0.12165088206529617, "learning_rate": 0.0005, "loss": 2.1046, "step": 184080 }, { "epoch": 0.7006919756704704, "grad_norm": 0.1230388954281807, "learning_rate": 0.0005, "loss": 2.1051, "step": 184090 }, { "epoch": 0.700730038138593, "grad_norm": 0.13142719864845276, "learning_rate": 0.0005, "loss": 2.1135, "step": 184100 }, { "epoch": 0.7007681006067158, "grad_norm": 0.12288113683462143, "learning_rate": 0.0005, "loss": 2.1218, "step": 184110 }, { "epoch": 0.7008061630748385, "grad_norm": 0.13205543160438538, "learning_rate": 0.0005, "loss": 2.1183, "step": 184120 }, { "epoch": 0.7008442255429611, "grad_norm": 0.13189668953418732, "learning_rate": 0.0005, "loss": 2.1188, "step": 184130 }, { "epoch": 0.7008822880110838, "grad_norm": 0.10998144000768661, "learning_rate": 0.0005, "loss": 2.1015, "step": 184140 }, { "epoch": 0.7009203504792064, "grad_norm": 0.12300020456314087, "learning_rate": 0.0005, "loss": 2.0962, "step": 184150 }, { "epoch": 0.7009584129473292, "grad_norm": 0.11949418485164642, "learning_rate": 0.0005, "loss": 2.1217, "step": 184160 }, { "epoch": 0.7009964754154518, "grad_norm": 0.1179998368024826, "learning_rate": 0.0005, "loss": 2.1059, "step": 184170 }, { "epoch": 0.7010345378835745, "grad_norm": 0.11798691749572754, "learning_rate": 0.0005, "loss": 2.1181, "step": 184180 }, { "epoch": 0.7010726003516972, "grad_norm": 0.13212557137012482, "learning_rate": 0.0005, "loss": 2.0912, "step": 184190 }, { "epoch": 0.7011106628198199, "grad_norm": 0.1313924491405487, "learning_rate": 0.0005, "loss": 2.1054, "step": 184200 }, { "epoch": 0.7011487252879426, "grad_norm": 0.12167239934206009, "learning_rate": 0.0005, "loss": 2.0977, "step": 184210 }, { "epoch": 0.7011867877560652, "grad_norm": 0.11549846827983856, "learning_rate": 0.0005, "loss": 2.1136, "step": 184220 }, { "epoch": 0.7012248502241879, "grad_norm": 0.11892012506723404, "learning_rate": 0.0005, "loss": 2.103, "step": 184230 }, { "epoch": 0.7012629126923107, "grad_norm": 0.11951800435781479, "learning_rate": 0.0005, "loss": 2.1143, "step": 184240 }, { "epoch": 0.7013009751604333, "grad_norm": 0.12169340997934341, "learning_rate": 0.0005, "loss": 2.1007, "step": 184250 }, { "epoch": 0.701339037628556, "grad_norm": 0.1290595978498459, "learning_rate": 0.0005, "loss": 2.0975, "step": 184260 }, { "epoch": 0.7013771000966786, "grad_norm": 0.13304656744003296, "learning_rate": 0.0005, "loss": 2.1231, "step": 184270 }, { "epoch": 0.7014151625648013, "grad_norm": 0.12380509823560715, "learning_rate": 0.0005, "loss": 2.1013, "step": 184280 }, { "epoch": 0.7014532250329241, "grad_norm": 0.12488371133804321, "learning_rate": 0.0005, "loss": 2.1084, "step": 184290 }, { "epoch": 0.7014912875010467, "grad_norm": 0.11350318789482117, "learning_rate": 0.0005, "loss": 2.1068, "step": 184300 }, { "epoch": 0.7015293499691694, "grad_norm": 0.12473493069410324, "learning_rate": 0.0005, "loss": 2.1008, "step": 184310 }, { "epoch": 0.701567412437292, "grad_norm": 0.1370915323495865, "learning_rate": 0.0005, "loss": 2.0993, "step": 184320 }, { "epoch": 0.7016054749054148, "grad_norm": 0.11229779571294785, "learning_rate": 0.0005, "loss": 2.1191, "step": 184330 }, { "epoch": 0.7016435373735375, "grad_norm": 0.1268649399280548, "learning_rate": 0.0005, "loss": 2.1118, "step": 184340 }, { "epoch": 0.7016815998416601, "grad_norm": 0.12467587739229202, "learning_rate": 0.0005, "loss": 2.0866, "step": 184350 }, { "epoch": 0.7017196623097828, "grad_norm": 0.13000306487083435, "learning_rate": 0.0005, "loss": 2.0873, "step": 184360 }, { "epoch": 0.7017577247779055, "grad_norm": 0.11177881062030792, "learning_rate": 0.0005, "loss": 2.1053, "step": 184370 }, { "epoch": 0.7017957872460282, "grad_norm": 0.12280798703432083, "learning_rate": 0.0005, "loss": 2.1004, "step": 184380 }, { "epoch": 0.7018338497141509, "grad_norm": 0.13626615703105927, "learning_rate": 0.0005, "loss": 2.1065, "step": 184390 }, { "epoch": 0.7018719121822735, "grad_norm": 0.12476800382137299, "learning_rate": 0.0005, "loss": 2.1027, "step": 184400 }, { "epoch": 0.7019099746503963, "grad_norm": 0.12631286680698395, "learning_rate": 0.0005, "loss": 2.0994, "step": 184410 }, { "epoch": 0.7019480371185189, "grad_norm": 0.12857434153556824, "learning_rate": 0.0005, "loss": 2.1074, "step": 184420 }, { "epoch": 0.7019860995866416, "grad_norm": 0.11980973184108734, "learning_rate": 0.0005, "loss": 2.0938, "step": 184430 }, { "epoch": 0.7020241620547643, "grad_norm": 0.1299014389514923, "learning_rate": 0.0005, "loss": 2.1297, "step": 184440 }, { "epoch": 0.7020622245228869, "grad_norm": 0.11516579985618591, "learning_rate": 0.0005, "loss": 2.1053, "step": 184450 }, { "epoch": 0.7021002869910097, "grad_norm": 0.13994434475898743, "learning_rate": 0.0005, "loss": 2.1124, "step": 184460 }, { "epoch": 0.7021383494591323, "grad_norm": 0.1350252479314804, "learning_rate": 0.0005, "loss": 2.0993, "step": 184470 }, { "epoch": 0.702176411927255, "grad_norm": 0.12053694576025009, "learning_rate": 0.0005, "loss": 2.1243, "step": 184480 }, { "epoch": 0.7022144743953777, "grad_norm": 0.1263304352760315, "learning_rate": 0.0005, "loss": 2.1263, "step": 184490 }, { "epoch": 0.7022525368635004, "grad_norm": 0.12201272696256638, "learning_rate": 0.0005, "loss": 2.1032, "step": 184500 }, { "epoch": 0.7022905993316231, "grad_norm": 0.1335000991821289, "learning_rate": 0.0005, "loss": 2.1124, "step": 184510 }, { "epoch": 0.7023286617997457, "grad_norm": 0.11891698092222214, "learning_rate": 0.0005, "loss": 2.1142, "step": 184520 }, { "epoch": 0.7023667242678684, "grad_norm": 0.11630988866090775, "learning_rate": 0.0005, "loss": 2.1202, "step": 184530 }, { "epoch": 0.7024047867359912, "grad_norm": 0.11909238249063492, "learning_rate": 0.0005, "loss": 2.1008, "step": 184540 }, { "epoch": 0.7024428492041138, "grad_norm": 0.11476001888513565, "learning_rate": 0.0005, "loss": 2.1003, "step": 184550 }, { "epoch": 0.7024809116722365, "grad_norm": 0.1404361128807068, "learning_rate": 0.0005, "loss": 2.1092, "step": 184560 }, { "epoch": 0.7025189741403591, "grad_norm": 0.12600143253803253, "learning_rate": 0.0005, "loss": 2.1179, "step": 184570 }, { "epoch": 0.7025570366084818, "grad_norm": 0.15968388319015503, "learning_rate": 0.0005, "loss": 2.1126, "step": 184580 }, { "epoch": 0.7025950990766046, "grad_norm": 0.13121315836906433, "learning_rate": 0.0005, "loss": 2.1242, "step": 184590 }, { "epoch": 0.7026331615447272, "grad_norm": 0.12117509543895721, "learning_rate": 0.0005, "loss": 2.1056, "step": 184600 }, { "epoch": 0.7026712240128499, "grad_norm": 0.1206469014286995, "learning_rate": 0.0005, "loss": 2.1233, "step": 184610 }, { "epoch": 0.7027092864809725, "grad_norm": 0.1204044297337532, "learning_rate": 0.0005, "loss": 2.1117, "step": 184620 }, { "epoch": 0.7027473489490953, "grad_norm": 0.11982487142086029, "learning_rate": 0.0005, "loss": 2.0926, "step": 184630 }, { "epoch": 0.702785411417218, "grad_norm": 0.1303478628396988, "learning_rate": 0.0005, "loss": 2.0976, "step": 184640 }, { "epoch": 0.7028234738853406, "grad_norm": 0.12789596617221832, "learning_rate": 0.0005, "loss": 2.1127, "step": 184650 }, { "epoch": 0.7028615363534633, "grad_norm": 0.12563934922218323, "learning_rate": 0.0005, "loss": 2.1124, "step": 184660 }, { "epoch": 0.702899598821586, "grad_norm": 0.12067053467035294, "learning_rate": 0.0005, "loss": 2.1013, "step": 184670 }, { "epoch": 0.7029376612897087, "grad_norm": 0.12684741616249084, "learning_rate": 0.0005, "loss": 2.1195, "step": 184680 }, { "epoch": 0.7029757237578314, "grad_norm": 0.12234799563884735, "learning_rate": 0.0005, "loss": 2.0969, "step": 184690 }, { "epoch": 0.703013786225954, "grad_norm": 0.1267378032207489, "learning_rate": 0.0005, "loss": 2.1145, "step": 184700 }, { "epoch": 0.7030518486940767, "grad_norm": 0.12101038545370102, "learning_rate": 0.0005, "loss": 2.1191, "step": 184710 }, { "epoch": 0.7030899111621994, "grad_norm": 0.1352284848690033, "learning_rate": 0.0005, "loss": 2.0959, "step": 184720 }, { "epoch": 0.7031279736303221, "grad_norm": 0.11621130257844925, "learning_rate": 0.0005, "loss": 2.1017, "step": 184730 }, { "epoch": 0.7031660360984447, "grad_norm": 0.11834180355072021, "learning_rate": 0.0005, "loss": 2.1128, "step": 184740 }, { "epoch": 0.7032040985665674, "grad_norm": 0.12049825489521027, "learning_rate": 0.0005, "loss": 2.1285, "step": 184750 }, { "epoch": 0.7032421610346902, "grad_norm": 0.12099867314100266, "learning_rate": 0.0005, "loss": 2.1181, "step": 184760 }, { "epoch": 0.7032802235028128, "grad_norm": 0.12538106739521027, "learning_rate": 0.0005, "loss": 2.0961, "step": 184770 }, { "epoch": 0.7033182859709355, "grad_norm": 0.1355898529291153, "learning_rate": 0.0005, "loss": 2.104, "step": 184780 }, { "epoch": 0.7033563484390581, "grad_norm": 0.12255386263132095, "learning_rate": 0.0005, "loss": 2.1086, "step": 184790 }, { "epoch": 0.7033944109071809, "grad_norm": 0.11250531673431396, "learning_rate": 0.0005, "loss": 2.1188, "step": 184800 }, { "epoch": 0.7034324733753036, "grad_norm": 0.11034489423036575, "learning_rate": 0.0005, "loss": 2.1099, "step": 184810 }, { "epoch": 0.7034705358434262, "grad_norm": 0.12313147634267807, "learning_rate": 0.0005, "loss": 2.0969, "step": 184820 }, { "epoch": 0.7035085983115489, "grad_norm": 0.13078320026397705, "learning_rate": 0.0005, "loss": 2.1041, "step": 184830 }, { "epoch": 0.7035466607796717, "grad_norm": 0.12762703001499176, "learning_rate": 0.0005, "loss": 2.1021, "step": 184840 }, { "epoch": 0.7035847232477943, "grad_norm": 0.13302679359912872, "learning_rate": 0.0005, "loss": 2.1015, "step": 184850 }, { "epoch": 0.703622785715917, "grad_norm": 0.12136547267436981, "learning_rate": 0.0005, "loss": 2.1057, "step": 184860 }, { "epoch": 0.7036608481840396, "grad_norm": 0.17401854693889618, "learning_rate": 0.0005, "loss": 2.0976, "step": 184870 }, { "epoch": 0.7036989106521623, "grad_norm": 0.12410420179367065, "learning_rate": 0.0005, "loss": 2.1135, "step": 184880 }, { "epoch": 0.703736973120285, "grad_norm": 0.11731221526861191, "learning_rate": 0.0005, "loss": 2.1112, "step": 184890 }, { "epoch": 0.7037750355884077, "grad_norm": 0.11649060994386673, "learning_rate": 0.0005, "loss": 2.1086, "step": 184900 }, { "epoch": 0.7038130980565304, "grad_norm": 0.11642348766326904, "learning_rate": 0.0005, "loss": 2.1011, "step": 184910 }, { "epoch": 0.703851160524653, "grad_norm": 0.11932951956987381, "learning_rate": 0.0005, "loss": 2.1041, "step": 184920 }, { "epoch": 0.7038892229927758, "grad_norm": 0.11806853115558624, "learning_rate": 0.0005, "loss": 2.103, "step": 184930 }, { "epoch": 0.7039272854608984, "grad_norm": 0.12727023661136627, "learning_rate": 0.0005, "loss": 2.1294, "step": 184940 }, { "epoch": 0.7039653479290211, "grad_norm": 0.1231415644288063, "learning_rate": 0.0005, "loss": 2.0955, "step": 184950 }, { "epoch": 0.7040034103971438, "grad_norm": 0.12632031738758087, "learning_rate": 0.0005, "loss": 2.1043, "step": 184960 }, { "epoch": 0.7040414728652665, "grad_norm": 0.1354237049818039, "learning_rate": 0.0005, "loss": 2.0975, "step": 184970 }, { "epoch": 0.7040795353333892, "grad_norm": 0.11664775758981705, "learning_rate": 0.0005, "loss": 2.1125, "step": 184980 }, { "epoch": 0.7041175978015118, "grad_norm": 0.11538016051054001, "learning_rate": 0.0005, "loss": 2.1019, "step": 184990 }, { "epoch": 0.7041556602696345, "grad_norm": 0.11794717609882355, "learning_rate": 0.0005, "loss": 2.099, "step": 185000 }, { "epoch": 0.7041937227377572, "grad_norm": 0.12959380447864532, "learning_rate": 0.0005, "loss": 2.1094, "step": 185010 }, { "epoch": 0.7042317852058799, "grad_norm": 0.12816959619522095, "learning_rate": 0.0005, "loss": 2.1147, "step": 185020 }, { "epoch": 0.7042698476740026, "grad_norm": 0.1298322081565857, "learning_rate": 0.0005, "loss": 2.1095, "step": 185030 }, { "epoch": 0.7043079101421252, "grad_norm": 0.12537501752376556, "learning_rate": 0.0005, "loss": 2.11, "step": 185040 }, { "epoch": 0.7043459726102479, "grad_norm": 0.14048700034618378, "learning_rate": 0.0005, "loss": 2.1137, "step": 185050 }, { "epoch": 0.7043840350783707, "grad_norm": 0.12887980043888092, "learning_rate": 0.0005, "loss": 2.102, "step": 185060 }, { "epoch": 0.7044220975464933, "grad_norm": 0.15135058760643005, "learning_rate": 0.0005, "loss": 2.1205, "step": 185070 }, { "epoch": 0.704460160014616, "grad_norm": 0.12217877805233002, "learning_rate": 0.0005, "loss": 2.1234, "step": 185080 }, { "epoch": 0.7044982224827386, "grad_norm": 0.11798762530088425, "learning_rate": 0.0005, "loss": 2.1144, "step": 185090 }, { "epoch": 0.7045362849508614, "grad_norm": 0.12822438776493073, "learning_rate": 0.0005, "loss": 2.1146, "step": 185100 }, { "epoch": 0.7045743474189841, "grad_norm": 0.11703921109437943, "learning_rate": 0.0005, "loss": 2.106, "step": 185110 }, { "epoch": 0.7046124098871067, "grad_norm": 0.1345566064119339, "learning_rate": 0.0005, "loss": 2.1202, "step": 185120 }, { "epoch": 0.7046504723552294, "grad_norm": 0.13655199110507965, "learning_rate": 0.0005, "loss": 2.108, "step": 185130 }, { "epoch": 0.704688534823352, "grad_norm": 0.12039312720298767, "learning_rate": 0.0005, "loss": 2.1098, "step": 185140 }, { "epoch": 0.7047265972914748, "grad_norm": 0.12556329369544983, "learning_rate": 0.0005, "loss": 2.0966, "step": 185150 }, { "epoch": 0.7047646597595975, "grad_norm": 0.12472794204950333, "learning_rate": 0.0005, "loss": 2.1079, "step": 185160 }, { "epoch": 0.7048027222277201, "grad_norm": 0.1353958398103714, "learning_rate": 0.0005, "loss": 2.1174, "step": 185170 }, { "epoch": 0.7048407846958428, "grad_norm": 0.1271139532327652, "learning_rate": 0.0005, "loss": 2.1132, "step": 185180 }, { "epoch": 0.7048788471639655, "grad_norm": 0.1349204033613205, "learning_rate": 0.0005, "loss": 2.1071, "step": 185190 }, { "epoch": 0.7049169096320882, "grad_norm": 0.13143670558929443, "learning_rate": 0.0005, "loss": 2.1176, "step": 185200 }, { "epoch": 0.7049549721002109, "grad_norm": 0.11317627131938934, "learning_rate": 0.0005, "loss": 2.1002, "step": 185210 }, { "epoch": 0.7049930345683335, "grad_norm": 0.14271529018878937, "learning_rate": 0.0005, "loss": 2.1191, "step": 185220 }, { "epoch": 0.7050310970364563, "grad_norm": 0.12950241565704346, "learning_rate": 0.0005, "loss": 2.1046, "step": 185230 }, { "epoch": 0.7050691595045789, "grad_norm": 0.12387185543775558, "learning_rate": 0.0005, "loss": 2.0931, "step": 185240 }, { "epoch": 0.7051072219727016, "grad_norm": 0.14337509870529175, "learning_rate": 0.0005, "loss": 2.1013, "step": 185250 }, { "epoch": 0.7051452844408242, "grad_norm": 0.12338105589151382, "learning_rate": 0.0005, "loss": 2.1187, "step": 185260 }, { "epoch": 0.705183346908947, "grad_norm": 0.1267063170671463, "learning_rate": 0.0005, "loss": 2.1155, "step": 185270 }, { "epoch": 0.7052214093770697, "grad_norm": 0.12914665043354034, "learning_rate": 0.0005, "loss": 2.1178, "step": 185280 }, { "epoch": 0.7052594718451923, "grad_norm": 0.1194671168923378, "learning_rate": 0.0005, "loss": 2.1168, "step": 185290 }, { "epoch": 0.705297534313315, "grad_norm": 0.12188367545604706, "learning_rate": 0.0005, "loss": 2.1084, "step": 185300 }, { "epoch": 0.7053355967814376, "grad_norm": 0.12353203445672989, "learning_rate": 0.0005, "loss": 2.1, "step": 185310 }, { "epoch": 0.7053736592495604, "grad_norm": 0.11360117048025131, "learning_rate": 0.0005, "loss": 2.1112, "step": 185320 }, { "epoch": 0.7054117217176831, "grad_norm": 0.11561376601457596, "learning_rate": 0.0005, "loss": 2.1113, "step": 185330 }, { "epoch": 0.7054497841858057, "grad_norm": 0.1353146731853485, "learning_rate": 0.0005, "loss": 2.1044, "step": 185340 }, { "epoch": 0.7054878466539284, "grad_norm": 0.13493327796459198, "learning_rate": 0.0005, "loss": 2.1165, "step": 185350 }, { "epoch": 0.7055259091220512, "grad_norm": 0.1353655904531479, "learning_rate": 0.0005, "loss": 2.1267, "step": 185360 }, { "epoch": 0.7055639715901738, "grad_norm": 0.12144365161657333, "learning_rate": 0.0005, "loss": 2.1085, "step": 185370 }, { "epoch": 0.7056020340582965, "grad_norm": 0.15383411943912506, "learning_rate": 0.0005, "loss": 2.1046, "step": 185380 }, { "epoch": 0.7056400965264191, "grad_norm": 0.12681280076503754, "learning_rate": 0.0005, "loss": 2.1125, "step": 185390 }, { "epoch": 0.7056781589945419, "grad_norm": 0.11766770482063293, "learning_rate": 0.0005, "loss": 2.1162, "step": 185400 }, { "epoch": 0.7057162214626646, "grad_norm": 0.11990108340978622, "learning_rate": 0.0005, "loss": 2.0879, "step": 185410 }, { "epoch": 0.7057542839307872, "grad_norm": 0.11549576371908188, "learning_rate": 0.0005, "loss": 2.1067, "step": 185420 }, { "epoch": 0.7057923463989099, "grad_norm": 0.1381005048751831, "learning_rate": 0.0005, "loss": 2.1164, "step": 185430 }, { "epoch": 0.7058304088670325, "grad_norm": 0.1284915655851364, "learning_rate": 0.0005, "loss": 2.1054, "step": 185440 }, { "epoch": 0.7058684713351553, "grad_norm": 0.12681995332241058, "learning_rate": 0.0005, "loss": 2.1159, "step": 185450 }, { "epoch": 0.705906533803278, "grad_norm": 0.12076663970947266, "learning_rate": 0.0005, "loss": 2.0996, "step": 185460 }, { "epoch": 0.7059445962714006, "grad_norm": 0.12343986332416534, "learning_rate": 0.0005, "loss": 2.0981, "step": 185470 }, { "epoch": 0.7059826587395233, "grad_norm": 0.13354164361953735, "learning_rate": 0.0005, "loss": 2.1272, "step": 185480 }, { "epoch": 0.706020721207646, "grad_norm": 0.12271565198898315, "learning_rate": 0.0005, "loss": 2.1165, "step": 185490 }, { "epoch": 0.7060587836757687, "grad_norm": 0.12300974130630493, "learning_rate": 0.0005, "loss": 2.1127, "step": 185500 }, { "epoch": 0.7060968461438913, "grad_norm": 0.11424795538187027, "learning_rate": 0.0005, "loss": 2.1088, "step": 185510 }, { "epoch": 0.706134908612014, "grad_norm": 0.11418292671442032, "learning_rate": 0.0005, "loss": 2.0861, "step": 185520 }, { "epoch": 0.7061729710801368, "grad_norm": 0.12428473681211472, "learning_rate": 0.0005, "loss": 2.105, "step": 185530 }, { "epoch": 0.7062110335482594, "grad_norm": 0.11890202015638351, "learning_rate": 0.0005, "loss": 2.103, "step": 185540 }, { "epoch": 0.7062490960163821, "grad_norm": 0.12819798290729523, "learning_rate": 0.0005, "loss": 2.1087, "step": 185550 }, { "epoch": 0.7062871584845047, "grad_norm": 0.12449389696121216, "learning_rate": 0.0005, "loss": 2.1107, "step": 185560 }, { "epoch": 0.7063252209526274, "grad_norm": 0.1176564022898674, "learning_rate": 0.0005, "loss": 2.1088, "step": 185570 }, { "epoch": 0.7063632834207502, "grad_norm": 0.1193840354681015, "learning_rate": 0.0005, "loss": 2.1153, "step": 185580 }, { "epoch": 0.7064013458888728, "grad_norm": 0.11895597726106644, "learning_rate": 0.0005, "loss": 2.0847, "step": 185590 }, { "epoch": 0.7064394083569955, "grad_norm": 0.11255518347024918, "learning_rate": 0.0005, "loss": 2.0921, "step": 185600 }, { "epoch": 0.7064774708251181, "grad_norm": 0.11871010065078735, "learning_rate": 0.0005, "loss": 2.0982, "step": 185610 }, { "epoch": 0.7065155332932409, "grad_norm": 0.11335593461990356, "learning_rate": 0.0005, "loss": 2.1114, "step": 185620 }, { "epoch": 0.7065535957613636, "grad_norm": 0.12085136771202087, "learning_rate": 0.0005, "loss": 2.1156, "step": 185630 }, { "epoch": 0.7065916582294862, "grad_norm": 0.12406984716653824, "learning_rate": 0.0005, "loss": 2.1039, "step": 185640 }, { "epoch": 0.7066297206976089, "grad_norm": 0.13687089085578918, "learning_rate": 0.0005, "loss": 2.0998, "step": 185650 }, { "epoch": 0.7066677831657316, "grad_norm": 0.1142050176858902, "learning_rate": 0.0005, "loss": 2.1127, "step": 185660 }, { "epoch": 0.7067058456338543, "grad_norm": 0.11903434246778488, "learning_rate": 0.0005, "loss": 2.1026, "step": 185670 }, { "epoch": 0.706743908101977, "grad_norm": 0.12342733144760132, "learning_rate": 0.0005, "loss": 2.1192, "step": 185680 }, { "epoch": 0.7067819705700996, "grad_norm": 0.11694970726966858, "learning_rate": 0.0005, "loss": 2.1159, "step": 185690 }, { "epoch": 0.7068200330382224, "grad_norm": 0.12412623316049576, "learning_rate": 0.0005, "loss": 2.1007, "step": 185700 }, { "epoch": 0.706858095506345, "grad_norm": 0.13319043815135956, "learning_rate": 0.0005, "loss": 2.1299, "step": 185710 }, { "epoch": 0.7068961579744677, "grad_norm": 0.13207711279392242, "learning_rate": 0.0005, "loss": 2.1101, "step": 185720 }, { "epoch": 0.7069342204425904, "grad_norm": 0.4719063639640808, "learning_rate": 0.0005, "loss": 2.1014, "step": 185730 }, { "epoch": 0.706972282910713, "grad_norm": 0.1343603879213333, "learning_rate": 0.0005, "loss": 2.1042, "step": 185740 }, { "epoch": 0.7070103453788358, "grad_norm": 0.11707521229982376, "learning_rate": 0.0005, "loss": 2.1168, "step": 185750 }, { "epoch": 0.7070484078469584, "grad_norm": 0.11734578758478165, "learning_rate": 0.0005, "loss": 2.1129, "step": 185760 }, { "epoch": 0.7070864703150811, "grad_norm": 0.12925496697425842, "learning_rate": 0.0005, "loss": 2.1011, "step": 185770 }, { "epoch": 0.7071245327832038, "grad_norm": 0.11399725079536438, "learning_rate": 0.0005, "loss": 2.0981, "step": 185780 }, { "epoch": 0.7071625952513265, "grad_norm": 0.12147218734025955, "learning_rate": 0.0005, "loss": 2.1022, "step": 185790 }, { "epoch": 0.7072006577194492, "grad_norm": 0.11649216711521149, "learning_rate": 0.0005, "loss": 2.1095, "step": 185800 }, { "epoch": 0.7072387201875718, "grad_norm": 0.12788520753383636, "learning_rate": 0.0005, "loss": 2.1143, "step": 185810 }, { "epoch": 0.7072767826556945, "grad_norm": 0.12423229217529297, "learning_rate": 0.0005, "loss": 2.099, "step": 185820 }, { "epoch": 0.7073148451238173, "grad_norm": 0.12157479673624039, "learning_rate": 0.0005, "loss": 2.1251, "step": 185830 }, { "epoch": 0.7073529075919399, "grad_norm": 0.13278579711914062, "learning_rate": 0.0005, "loss": 2.0984, "step": 185840 }, { "epoch": 0.7073909700600626, "grad_norm": 0.13672243058681488, "learning_rate": 0.0005, "loss": 2.1128, "step": 185850 }, { "epoch": 0.7074290325281852, "grad_norm": 0.11652804166078568, "learning_rate": 0.0005, "loss": 2.1266, "step": 185860 }, { "epoch": 0.7074670949963079, "grad_norm": 0.11444343626499176, "learning_rate": 0.0005, "loss": 2.1051, "step": 185870 }, { "epoch": 0.7075051574644307, "grad_norm": 0.12347528338432312, "learning_rate": 0.0005, "loss": 2.1168, "step": 185880 }, { "epoch": 0.7075432199325533, "grad_norm": 0.10334424674510956, "learning_rate": 0.0005, "loss": 2.1158, "step": 185890 }, { "epoch": 0.707581282400676, "grad_norm": 0.17353834211826324, "learning_rate": 0.0005, "loss": 2.1085, "step": 185900 }, { "epoch": 0.7076193448687986, "grad_norm": 0.12602335214614868, "learning_rate": 0.0005, "loss": 2.108, "step": 185910 }, { "epoch": 0.7076574073369214, "grad_norm": 0.12529000639915466, "learning_rate": 0.0005, "loss": 2.1098, "step": 185920 }, { "epoch": 0.707695469805044, "grad_norm": 0.1246362179517746, "learning_rate": 0.0005, "loss": 2.1062, "step": 185930 }, { "epoch": 0.7077335322731667, "grad_norm": 0.125050887465477, "learning_rate": 0.0005, "loss": 2.1056, "step": 185940 }, { "epoch": 0.7077715947412894, "grad_norm": 0.12313340604305267, "learning_rate": 0.0005, "loss": 2.1213, "step": 185950 }, { "epoch": 0.7078096572094121, "grad_norm": 0.12455099076032639, "learning_rate": 0.0005, "loss": 2.1028, "step": 185960 }, { "epoch": 0.7078477196775348, "grad_norm": 0.11638380587100983, "learning_rate": 0.0005, "loss": 2.1016, "step": 185970 }, { "epoch": 0.7078857821456574, "grad_norm": 0.12521560490131378, "learning_rate": 0.0005, "loss": 2.1063, "step": 185980 }, { "epoch": 0.7079238446137801, "grad_norm": 0.11603990197181702, "learning_rate": 0.0005, "loss": 2.1009, "step": 185990 }, { "epoch": 0.7079619070819028, "grad_norm": 0.11898902803659439, "learning_rate": 0.0005, "loss": 2.1061, "step": 186000 }, { "epoch": 0.7079999695500255, "grad_norm": 0.13573262095451355, "learning_rate": 0.0005, "loss": 2.1197, "step": 186010 }, { "epoch": 0.7080380320181482, "grad_norm": 0.13460132479667664, "learning_rate": 0.0005, "loss": 2.1087, "step": 186020 }, { "epoch": 0.7080760944862708, "grad_norm": 0.12528298795223236, "learning_rate": 0.0005, "loss": 2.1127, "step": 186030 }, { "epoch": 0.7081141569543935, "grad_norm": 0.12539632618427277, "learning_rate": 0.0005, "loss": 2.1028, "step": 186040 }, { "epoch": 0.7081522194225163, "grad_norm": 0.13004456460475922, "learning_rate": 0.0005, "loss": 2.0981, "step": 186050 }, { "epoch": 0.7081902818906389, "grad_norm": 0.11649925261735916, "learning_rate": 0.0005, "loss": 2.1092, "step": 186060 }, { "epoch": 0.7082283443587616, "grad_norm": 0.12443257868289948, "learning_rate": 0.0005, "loss": 2.0926, "step": 186070 }, { "epoch": 0.7082664068268842, "grad_norm": 0.11880318075418472, "learning_rate": 0.0005, "loss": 2.1129, "step": 186080 }, { "epoch": 0.708304469295007, "grad_norm": 0.12145640701055527, "learning_rate": 0.0005, "loss": 2.1083, "step": 186090 }, { "epoch": 0.7083425317631297, "grad_norm": 0.11560018360614777, "learning_rate": 0.0005, "loss": 2.1135, "step": 186100 }, { "epoch": 0.7083805942312523, "grad_norm": 0.12145461142063141, "learning_rate": 0.0005, "loss": 2.1084, "step": 186110 }, { "epoch": 0.708418656699375, "grad_norm": 0.126446932554245, "learning_rate": 0.0005, "loss": 2.1053, "step": 186120 }, { "epoch": 0.7084567191674978, "grad_norm": 0.10951827466487885, "learning_rate": 0.0005, "loss": 2.1117, "step": 186130 }, { "epoch": 0.7084947816356204, "grad_norm": 0.12064436078071594, "learning_rate": 0.0005, "loss": 2.1076, "step": 186140 }, { "epoch": 0.7085328441037431, "grad_norm": 0.12185485661029816, "learning_rate": 0.0005, "loss": 2.1102, "step": 186150 }, { "epoch": 0.7085709065718657, "grad_norm": 0.12862376868724823, "learning_rate": 0.0005, "loss": 2.112, "step": 186160 }, { "epoch": 0.7086089690399884, "grad_norm": 0.12142283469438553, "learning_rate": 0.0005, "loss": 2.0963, "step": 186170 }, { "epoch": 0.7086470315081111, "grad_norm": 0.12142530828714371, "learning_rate": 0.0005, "loss": 2.1225, "step": 186180 }, { "epoch": 0.7086850939762338, "grad_norm": 0.12393418699502945, "learning_rate": 0.0005, "loss": 2.0839, "step": 186190 }, { "epoch": 0.7087231564443565, "grad_norm": 0.11970370262861252, "learning_rate": 0.0005, "loss": 2.116, "step": 186200 }, { "epoch": 0.7087612189124791, "grad_norm": 0.1194053664803505, "learning_rate": 0.0005, "loss": 2.1064, "step": 186210 }, { "epoch": 0.7087992813806019, "grad_norm": 0.13109378516674042, "learning_rate": 0.0005, "loss": 2.1142, "step": 186220 }, { "epoch": 0.7088373438487245, "grad_norm": 0.14906781911849976, "learning_rate": 0.0005, "loss": 2.1132, "step": 186230 }, { "epoch": 0.7088754063168472, "grad_norm": 0.12335547059774399, "learning_rate": 0.0005, "loss": 2.1044, "step": 186240 }, { "epoch": 0.7089134687849699, "grad_norm": 0.12918975949287415, "learning_rate": 0.0005, "loss": 2.1033, "step": 186250 }, { "epoch": 0.7089515312530926, "grad_norm": 0.11700023710727692, "learning_rate": 0.0005, "loss": 2.1167, "step": 186260 }, { "epoch": 0.7089895937212153, "grad_norm": 0.12088143825531006, "learning_rate": 0.0005, "loss": 2.1041, "step": 186270 }, { "epoch": 0.7090276561893379, "grad_norm": 0.14303064346313477, "learning_rate": 0.0005, "loss": 2.0962, "step": 186280 }, { "epoch": 0.7090657186574606, "grad_norm": 0.12583290040493011, "learning_rate": 0.0005, "loss": 2.1212, "step": 186290 }, { "epoch": 0.7091037811255833, "grad_norm": 0.13083286583423615, "learning_rate": 0.0005, "loss": 2.1059, "step": 186300 }, { "epoch": 0.709141843593706, "grad_norm": 0.11844097822904587, "learning_rate": 0.0005, "loss": 2.119, "step": 186310 }, { "epoch": 0.7091799060618287, "grad_norm": 0.1358223557472229, "learning_rate": 0.0005, "loss": 2.107, "step": 186320 }, { "epoch": 0.7092179685299513, "grad_norm": 0.12576615810394287, "learning_rate": 0.0005, "loss": 2.0985, "step": 186330 }, { "epoch": 0.709256030998074, "grad_norm": 0.1307367980480194, "learning_rate": 0.0005, "loss": 2.1019, "step": 186340 }, { "epoch": 0.7092940934661968, "grad_norm": 0.12038248777389526, "learning_rate": 0.0005, "loss": 2.1183, "step": 186350 }, { "epoch": 0.7093321559343194, "grad_norm": 0.12328119575977325, "learning_rate": 0.0005, "loss": 2.1142, "step": 186360 }, { "epoch": 0.7093702184024421, "grad_norm": 0.12589795887470245, "learning_rate": 0.0005, "loss": 2.0998, "step": 186370 }, { "epoch": 0.7094082808705647, "grad_norm": 0.11715169250965118, "learning_rate": 0.0005, "loss": 2.1036, "step": 186380 }, { "epoch": 0.7094463433386875, "grad_norm": 0.12813791632652283, "learning_rate": 0.0005, "loss": 2.1174, "step": 186390 }, { "epoch": 0.7094844058068102, "grad_norm": 0.17527632415294647, "learning_rate": 0.0005, "loss": 2.1177, "step": 186400 }, { "epoch": 0.7095224682749328, "grad_norm": 0.1268410086631775, "learning_rate": 0.0005, "loss": 2.1064, "step": 186410 }, { "epoch": 0.7095605307430555, "grad_norm": 0.13726426661014557, "learning_rate": 0.0005, "loss": 2.1037, "step": 186420 }, { "epoch": 0.7095985932111781, "grad_norm": 0.1140046939253807, "learning_rate": 0.0005, "loss": 2.1205, "step": 186430 }, { "epoch": 0.7096366556793009, "grad_norm": 0.11753685027360916, "learning_rate": 0.0005, "loss": 2.1255, "step": 186440 }, { "epoch": 0.7096747181474236, "grad_norm": 0.12714707851409912, "learning_rate": 0.0005, "loss": 2.0882, "step": 186450 }, { "epoch": 0.7097127806155462, "grad_norm": 0.12958048284053802, "learning_rate": 0.0005, "loss": 2.1087, "step": 186460 }, { "epoch": 0.7097508430836689, "grad_norm": 0.13289125263690948, "learning_rate": 0.0005, "loss": 2.1, "step": 186470 }, { "epoch": 0.7097889055517916, "grad_norm": 0.1325511336326599, "learning_rate": 0.0005, "loss": 2.0969, "step": 186480 }, { "epoch": 0.7098269680199143, "grad_norm": 0.11752146482467651, "learning_rate": 0.0005, "loss": 2.1278, "step": 186490 }, { "epoch": 0.709865030488037, "grad_norm": 0.11792637407779694, "learning_rate": 0.0005, "loss": 2.1034, "step": 186500 }, { "epoch": 0.7099030929561596, "grad_norm": 0.12621088325977325, "learning_rate": 0.0005, "loss": 2.1125, "step": 186510 }, { "epoch": 0.7099411554242824, "grad_norm": 0.12432099878787994, "learning_rate": 0.0005, "loss": 2.089, "step": 186520 }, { "epoch": 0.709979217892405, "grad_norm": 0.13783694803714752, "learning_rate": 0.0005, "loss": 2.1012, "step": 186530 }, { "epoch": 0.7100172803605277, "grad_norm": 0.12313029170036316, "learning_rate": 0.0005, "loss": 2.1047, "step": 186540 }, { "epoch": 0.7100553428286503, "grad_norm": 0.15528278052806854, "learning_rate": 0.0005, "loss": 2.1091, "step": 186550 }, { "epoch": 0.7100934052967731, "grad_norm": 0.1190701350569725, "learning_rate": 0.0005, "loss": 2.1035, "step": 186560 }, { "epoch": 0.7101314677648958, "grad_norm": 0.12318447232246399, "learning_rate": 0.0005, "loss": 2.104, "step": 186570 }, { "epoch": 0.7101695302330184, "grad_norm": 0.12099643796682358, "learning_rate": 0.0005, "loss": 2.1056, "step": 186580 }, { "epoch": 0.7102075927011411, "grad_norm": 0.12311971932649612, "learning_rate": 0.0005, "loss": 2.096, "step": 186590 }, { "epoch": 0.7102456551692637, "grad_norm": 0.12411779165267944, "learning_rate": 0.0005, "loss": 2.1147, "step": 186600 }, { "epoch": 0.7102837176373865, "grad_norm": 0.12775003910064697, "learning_rate": 0.0005, "loss": 2.1279, "step": 186610 }, { "epoch": 0.7103217801055092, "grad_norm": 0.11959369480609894, "learning_rate": 0.0005, "loss": 2.1155, "step": 186620 }, { "epoch": 0.7103598425736318, "grad_norm": 0.13204076886177063, "learning_rate": 0.0005, "loss": 2.1142, "step": 186630 }, { "epoch": 0.7103979050417545, "grad_norm": 0.12158135324716568, "learning_rate": 0.0005, "loss": 2.1158, "step": 186640 }, { "epoch": 0.7104359675098773, "grad_norm": 0.11938668042421341, "learning_rate": 0.0005, "loss": 2.1186, "step": 186650 }, { "epoch": 0.7104740299779999, "grad_norm": 0.11448933929204941, "learning_rate": 0.0005, "loss": 2.0976, "step": 186660 }, { "epoch": 0.7105120924461226, "grad_norm": 0.11393057554960251, "learning_rate": 0.0005, "loss": 2.1033, "step": 186670 }, { "epoch": 0.7105501549142452, "grad_norm": 0.12316378206014633, "learning_rate": 0.0005, "loss": 2.1103, "step": 186680 }, { "epoch": 0.710588217382368, "grad_norm": 0.11819449812173843, "learning_rate": 0.0005, "loss": 2.1167, "step": 186690 }, { "epoch": 0.7106262798504906, "grad_norm": 0.12064626067876816, "learning_rate": 0.0005, "loss": 2.0913, "step": 186700 }, { "epoch": 0.7106643423186133, "grad_norm": 0.1182859018445015, "learning_rate": 0.0005, "loss": 2.1155, "step": 186710 }, { "epoch": 0.710702404786736, "grad_norm": 0.12516747415065765, "learning_rate": 0.0005, "loss": 2.1133, "step": 186720 }, { "epoch": 0.7107404672548586, "grad_norm": 0.13327202200889587, "learning_rate": 0.0005, "loss": 2.114, "step": 186730 }, { "epoch": 0.7107785297229814, "grad_norm": 0.12220057845115662, "learning_rate": 0.0005, "loss": 2.1101, "step": 186740 }, { "epoch": 0.710816592191104, "grad_norm": 0.12841612100601196, "learning_rate": 0.0005, "loss": 2.0913, "step": 186750 }, { "epoch": 0.7108546546592267, "grad_norm": 0.11224877089262009, "learning_rate": 0.0005, "loss": 2.1178, "step": 186760 }, { "epoch": 0.7108927171273494, "grad_norm": 0.12116874754428864, "learning_rate": 0.0005, "loss": 2.1134, "step": 186770 }, { "epoch": 0.7109307795954721, "grad_norm": 0.1130293607711792, "learning_rate": 0.0005, "loss": 2.1147, "step": 186780 }, { "epoch": 0.7109688420635948, "grad_norm": 0.12867222726345062, "learning_rate": 0.0005, "loss": 2.1228, "step": 186790 }, { "epoch": 0.7110069045317174, "grad_norm": 0.12083183974027634, "learning_rate": 0.0005, "loss": 2.1056, "step": 186800 }, { "epoch": 0.7110449669998401, "grad_norm": 0.11976433545351028, "learning_rate": 0.0005, "loss": 2.0977, "step": 186810 }, { "epoch": 0.7110830294679629, "grad_norm": 0.1157735213637352, "learning_rate": 0.0005, "loss": 2.1048, "step": 186820 }, { "epoch": 0.7111210919360855, "grad_norm": 0.12680009007453918, "learning_rate": 0.0005, "loss": 2.1024, "step": 186830 }, { "epoch": 0.7111591544042082, "grad_norm": 0.13108272850513458, "learning_rate": 0.0005, "loss": 2.0836, "step": 186840 }, { "epoch": 0.7111972168723308, "grad_norm": 0.11942317336797714, "learning_rate": 0.0005, "loss": 2.1133, "step": 186850 }, { "epoch": 0.7112352793404536, "grad_norm": 0.12562249600887299, "learning_rate": 0.0005, "loss": 2.0997, "step": 186860 }, { "epoch": 0.7112733418085763, "grad_norm": 0.13094764947891235, "learning_rate": 0.0005, "loss": 2.1109, "step": 186870 }, { "epoch": 0.7113114042766989, "grad_norm": 0.14853011071681976, "learning_rate": 0.0005, "loss": 2.1094, "step": 186880 }, { "epoch": 0.7113494667448216, "grad_norm": 0.21083134412765503, "learning_rate": 0.0005, "loss": 2.1096, "step": 186890 }, { "epoch": 0.7113875292129442, "grad_norm": 0.12516815960407257, "learning_rate": 0.0005, "loss": 2.115, "step": 186900 }, { "epoch": 0.711425591681067, "grad_norm": 0.11651566624641418, "learning_rate": 0.0005, "loss": 2.0968, "step": 186910 }, { "epoch": 0.7114636541491897, "grad_norm": 0.11329130083322525, "learning_rate": 0.0005, "loss": 2.1024, "step": 186920 }, { "epoch": 0.7115017166173123, "grad_norm": 0.12068556994199753, "learning_rate": 0.0005, "loss": 2.1144, "step": 186930 }, { "epoch": 0.711539779085435, "grad_norm": 0.12598134577274323, "learning_rate": 0.0005, "loss": 2.1085, "step": 186940 }, { "epoch": 0.7115778415535577, "grad_norm": 0.11477117985486984, "learning_rate": 0.0005, "loss": 2.1046, "step": 186950 }, { "epoch": 0.7116159040216804, "grad_norm": 0.13626538217067719, "learning_rate": 0.0005, "loss": 2.1226, "step": 186960 }, { "epoch": 0.7116539664898031, "grad_norm": 0.13596713542938232, "learning_rate": 0.0005, "loss": 2.1061, "step": 186970 }, { "epoch": 0.7116920289579257, "grad_norm": 0.12076553702354431, "learning_rate": 0.0005, "loss": 2.0876, "step": 186980 }, { "epoch": 0.7117300914260485, "grad_norm": 0.12414637953042984, "learning_rate": 0.0005, "loss": 2.1091, "step": 186990 }, { "epoch": 0.7117681538941711, "grad_norm": 0.11907530575990677, "learning_rate": 0.0005, "loss": 2.0956, "step": 187000 }, { "epoch": 0.7118062163622938, "grad_norm": 0.11883730441331863, "learning_rate": 0.0005, "loss": 2.1027, "step": 187010 }, { "epoch": 0.7118442788304165, "grad_norm": 0.11856499314308167, "learning_rate": 0.0005, "loss": 2.1053, "step": 187020 }, { "epoch": 0.7118823412985391, "grad_norm": 0.1154351681470871, "learning_rate": 0.0005, "loss": 2.1077, "step": 187030 }, { "epoch": 0.7119204037666619, "grad_norm": 0.11441994458436966, "learning_rate": 0.0005, "loss": 2.1076, "step": 187040 }, { "epoch": 0.7119584662347845, "grad_norm": 0.12784643471240997, "learning_rate": 0.0005, "loss": 2.1091, "step": 187050 }, { "epoch": 0.7119965287029072, "grad_norm": 0.138327956199646, "learning_rate": 0.0005, "loss": 2.1189, "step": 187060 }, { "epoch": 0.7120345911710299, "grad_norm": 0.13243740797042847, "learning_rate": 0.0005, "loss": 2.1048, "step": 187070 }, { "epoch": 0.7120726536391526, "grad_norm": 0.11836738139390945, "learning_rate": 0.0005, "loss": 2.1127, "step": 187080 }, { "epoch": 0.7121107161072753, "grad_norm": 0.12716001272201538, "learning_rate": 0.0005, "loss": 2.0925, "step": 187090 }, { "epoch": 0.7121487785753979, "grad_norm": 0.14492060244083405, "learning_rate": 0.0005, "loss": 2.1133, "step": 187100 }, { "epoch": 0.7121868410435206, "grad_norm": 0.1620989590883255, "learning_rate": 0.0005, "loss": 2.1048, "step": 187110 }, { "epoch": 0.7122249035116434, "grad_norm": 0.11282233148813248, "learning_rate": 0.0005, "loss": 2.1154, "step": 187120 }, { "epoch": 0.712262965979766, "grad_norm": 0.1469227373600006, "learning_rate": 0.0005, "loss": 2.0917, "step": 187130 }, { "epoch": 0.7123010284478887, "grad_norm": 0.13612151145935059, "learning_rate": 0.0005, "loss": 2.1031, "step": 187140 }, { "epoch": 0.7123390909160113, "grad_norm": 0.12967805564403534, "learning_rate": 0.0005, "loss": 2.1001, "step": 187150 }, { "epoch": 0.712377153384134, "grad_norm": 0.14889055490493774, "learning_rate": 0.0005, "loss": 2.1018, "step": 187160 }, { "epoch": 0.7124152158522568, "grad_norm": 0.12575414776802063, "learning_rate": 0.0005, "loss": 2.117, "step": 187170 }, { "epoch": 0.7124532783203794, "grad_norm": 0.12359946966171265, "learning_rate": 0.0005, "loss": 2.1039, "step": 187180 }, { "epoch": 0.7124913407885021, "grad_norm": 0.12194182723760605, "learning_rate": 0.0005, "loss": 2.1173, "step": 187190 }, { "epoch": 0.7125294032566247, "grad_norm": 0.12410972267389297, "learning_rate": 0.0005, "loss": 2.1078, "step": 187200 }, { "epoch": 0.7125674657247475, "grad_norm": 0.11400230973958969, "learning_rate": 0.0005, "loss": 2.1172, "step": 187210 }, { "epoch": 0.7126055281928702, "grad_norm": 0.11606621742248535, "learning_rate": 0.0005, "loss": 2.115, "step": 187220 }, { "epoch": 0.7126435906609928, "grad_norm": 0.12606237828731537, "learning_rate": 0.0005, "loss": 2.1196, "step": 187230 }, { "epoch": 0.7126816531291155, "grad_norm": 0.1324600726366043, "learning_rate": 0.0005, "loss": 2.1124, "step": 187240 }, { "epoch": 0.7127197155972382, "grad_norm": 0.7168033719062805, "learning_rate": 0.0005, "loss": 2.1118, "step": 187250 }, { "epoch": 0.7127577780653609, "grad_norm": 0.11666694283485413, "learning_rate": 0.0005, "loss": 2.0817, "step": 187260 }, { "epoch": 0.7127958405334835, "grad_norm": 0.1246514767408371, "learning_rate": 0.0005, "loss": 2.1082, "step": 187270 }, { "epoch": 0.7128339030016062, "grad_norm": 0.11888870596885681, "learning_rate": 0.0005, "loss": 2.0972, "step": 187280 }, { "epoch": 0.712871965469729, "grad_norm": 0.13312821090221405, "learning_rate": 0.0005, "loss": 2.1198, "step": 187290 }, { "epoch": 0.7129100279378516, "grad_norm": 0.12242951989173889, "learning_rate": 0.0005, "loss": 2.1129, "step": 187300 }, { "epoch": 0.7129480904059743, "grad_norm": 0.1219000369310379, "learning_rate": 0.0005, "loss": 2.0949, "step": 187310 }, { "epoch": 0.712986152874097, "grad_norm": 0.1179623156785965, "learning_rate": 0.0005, "loss": 2.1363, "step": 187320 }, { "epoch": 0.7130242153422196, "grad_norm": 0.12434601038694382, "learning_rate": 0.0005, "loss": 2.111, "step": 187330 }, { "epoch": 0.7130622778103424, "grad_norm": 0.11475856602191925, "learning_rate": 0.0005, "loss": 2.1059, "step": 187340 }, { "epoch": 0.713100340278465, "grad_norm": 0.11931030452251434, "learning_rate": 0.0005, "loss": 2.1088, "step": 187350 }, { "epoch": 0.7131384027465877, "grad_norm": 0.13195136189460754, "learning_rate": 0.0005, "loss": 2.1098, "step": 187360 }, { "epoch": 0.7131764652147103, "grad_norm": 0.11491978168487549, "learning_rate": 0.0005, "loss": 2.1141, "step": 187370 }, { "epoch": 0.7132145276828331, "grad_norm": 0.13336238265037537, "learning_rate": 0.0005, "loss": 2.1129, "step": 187380 }, { "epoch": 0.7132525901509558, "grad_norm": 0.11675713211297989, "learning_rate": 0.0005, "loss": 2.1097, "step": 187390 }, { "epoch": 0.7132906526190784, "grad_norm": 0.11502215266227722, "learning_rate": 0.0005, "loss": 2.1082, "step": 187400 }, { "epoch": 0.7133287150872011, "grad_norm": 0.12776829302310944, "learning_rate": 0.0005, "loss": 2.1253, "step": 187410 }, { "epoch": 0.7133667775553238, "grad_norm": 0.12221261858940125, "learning_rate": 0.0005, "loss": 2.0999, "step": 187420 }, { "epoch": 0.7134048400234465, "grad_norm": 0.12141124904155731, "learning_rate": 0.0005, "loss": 2.1101, "step": 187430 }, { "epoch": 0.7134429024915692, "grad_norm": 0.12151667475700378, "learning_rate": 0.0005, "loss": 2.1211, "step": 187440 }, { "epoch": 0.7134809649596918, "grad_norm": 0.12741494178771973, "learning_rate": 0.0005, "loss": 2.1108, "step": 187450 }, { "epoch": 0.7135190274278145, "grad_norm": 0.13373515009880066, "learning_rate": 0.0005, "loss": 2.1037, "step": 187460 }, { "epoch": 0.7135570898959372, "grad_norm": 0.12541289627552032, "learning_rate": 0.0005, "loss": 2.0949, "step": 187470 }, { "epoch": 0.7135951523640599, "grad_norm": 0.13134725391864777, "learning_rate": 0.0005, "loss": 2.1071, "step": 187480 }, { "epoch": 0.7136332148321826, "grad_norm": 0.12061105668544769, "learning_rate": 0.0005, "loss": 2.0963, "step": 187490 }, { "epoch": 0.7136712773003052, "grad_norm": 0.12639255821704865, "learning_rate": 0.0005, "loss": 2.1294, "step": 187500 }, { "epoch": 0.713709339768428, "grad_norm": 0.12902987003326416, "learning_rate": 0.0005, "loss": 2.1038, "step": 187510 }, { "epoch": 0.7137474022365506, "grad_norm": 0.12578243017196655, "learning_rate": 0.0005, "loss": 2.127, "step": 187520 }, { "epoch": 0.7137854647046733, "grad_norm": 0.11344828456640244, "learning_rate": 0.0005, "loss": 2.092, "step": 187530 }, { "epoch": 0.713823527172796, "grad_norm": 0.13546188175678253, "learning_rate": 0.0005, "loss": 2.1258, "step": 187540 }, { "epoch": 0.7138615896409187, "grad_norm": 0.12519478797912598, "learning_rate": 0.0005, "loss": 2.0963, "step": 187550 }, { "epoch": 0.7138996521090414, "grad_norm": 0.12113405764102936, "learning_rate": 0.0005, "loss": 2.0966, "step": 187560 }, { "epoch": 0.713937714577164, "grad_norm": 0.13216333091259003, "learning_rate": 0.0005, "loss": 2.1151, "step": 187570 }, { "epoch": 0.7139757770452867, "grad_norm": 0.13803592324256897, "learning_rate": 0.0005, "loss": 2.1305, "step": 187580 }, { "epoch": 0.7140138395134094, "grad_norm": 0.12262871116399765, "learning_rate": 0.0005, "loss": 2.1133, "step": 187590 }, { "epoch": 0.7140519019815321, "grad_norm": 0.12475186586380005, "learning_rate": 0.0005, "loss": 2.1217, "step": 187600 }, { "epoch": 0.7140899644496548, "grad_norm": 0.1186951994895935, "learning_rate": 0.0005, "loss": 2.1087, "step": 187610 }, { "epoch": 0.7141280269177774, "grad_norm": 0.12116733193397522, "learning_rate": 0.0005, "loss": 2.0968, "step": 187620 }, { "epoch": 0.7141660893859001, "grad_norm": 0.12256205081939697, "learning_rate": 0.0005, "loss": 2.0939, "step": 187630 }, { "epoch": 0.7142041518540229, "grad_norm": 0.122563935816288, "learning_rate": 0.0005, "loss": 2.1067, "step": 187640 }, { "epoch": 0.7142422143221455, "grad_norm": 0.1201736181974411, "learning_rate": 0.0005, "loss": 2.094, "step": 187650 }, { "epoch": 0.7142802767902682, "grad_norm": 0.11947773396968842, "learning_rate": 0.0005, "loss": 2.0989, "step": 187660 }, { "epoch": 0.7143183392583908, "grad_norm": 0.11251599341630936, "learning_rate": 0.0005, "loss": 2.1137, "step": 187670 }, { "epoch": 0.7143564017265136, "grad_norm": 0.11396802216768265, "learning_rate": 0.0005, "loss": 2.1235, "step": 187680 }, { "epoch": 0.7143944641946363, "grad_norm": 0.1160028800368309, "learning_rate": 0.0005, "loss": 2.1057, "step": 187690 }, { "epoch": 0.7144325266627589, "grad_norm": 0.11407670378684998, "learning_rate": 0.0005, "loss": 2.0946, "step": 187700 }, { "epoch": 0.7144705891308816, "grad_norm": 0.11758105456829071, "learning_rate": 0.0005, "loss": 2.1183, "step": 187710 }, { "epoch": 0.7145086515990043, "grad_norm": 0.12069651484489441, "learning_rate": 0.0005, "loss": 2.1104, "step": 187720 }, { "epoch": 0.714546714067127, "grad_norm": 0.12910427153110504, "learning_rate": 0.0005, "loss": 2.1225, "step": 187730 }, { "epoch": 0.7145847765352497, "grad_norm": 0.12105337530374527, "learning_rate": 0.0005, "loss": 2.1057, "step": 187740 }, { "epoch": 0.7146228390033723, "grad_norm": 0.1282321959733963, "learning_rate": 0.0005, "loss": 2.1297, "step": 187750 }, { "epoch": 0.714660901471495, "grad_norm": 0.1256188601255417, "learning_rate": 0.0005, "loss": 2.1092, "step": 187760 }, { "epoch": 0.7146989639396177, "grad_norm": 0.12741422653198242, "learning_rate": 0.0005, "loss": 2.1029, "step": 187770 }, { "epoch": 0.7147370264077404, "grad_norm": 0.12263234704732895, "learning_rate": 0.0005, "loss": 2.1051, "step": 187780 }, { "epoch": 0.714775088875863, "grad_norm": 0.12101583927869797, "learning_rate": 0.0005, "loss": 2.1042, "step": 187790 }, { "epoch": 0.7148131513439857, "grad_norm": 0.13574911653995514, "learning_rate": 0.0005, "loss": 2.1128, "step": 187800 }, { "epoch": 0.7148512138121085, "grad_norm": 0.12082146853208542, "learning_rate": 0.0005, "loss": 2.125, "step": 187810 }, { "epoch": 0.7148892762802311, "grad_norm": 0.12578290700912476, "learning_rate": 0.0005, "loss": 2.125, "step": 187820 }, { "epoch": 0.7149273387483538, "grad_norm": 0.12050847709178925, "learning_rate": 0.0005, "loss": 2.1241, "step": 187830 }, { "epoch": 0.7149654012164764, "grad_norm": 0.122773677110672, "learning_rate": 0.0005, "loss": 2.0985, "step": 187840 }, { "epoch": 0.7150034636845992, "grad_norm": 0.11656002700328827, "learning_rate": 0.0005, "loss": 2.113, "step": 187850 }, { "epoch": 0.7150415261527219, "grad_norm": 0.1179434210062027, "learning_rate": 0.0005, "loss": 2.1011, "step": 187860 }, { "epoch": 0.7150795886208445, "grad_norm": 0.11811520904302597, "learning_rate": 0.0005, "loss": 2.0987, "step": 187870 }, { "epoch": 0.7151176510889672, "grad_norm": 0.12294347584247589, "learning_rate": 0.0005, "loss": 2.1087, "step": 187880 }, { "epoch": 0.7151557135570898, "grad_norm": 0.13415418565273285, "learning_rate": 0.0005, "loss": 2.1092, "step": 187890 }, { "epoch": 0.7151937760252126, "grad_norm": 0.11592666804790497, "learning_rate": 0.0005, "loss": 2.1088, "step": 187900 }, { "epoch": 0.7152318384933353, "grad_norm": 0.1220850721001625, "learning_rate": 0.0005, "loss": 2.1105, "step": 187910 }, { "epoch": 0.7152699009614579, "grad_norm": 0.1307125687599182, "learning_rate": 0.0005, "loss": 2.1262, "step": 187920 }, { "epoch": 0.7153079634295806, "grad_norm": 0.13731436431407928, "learning_rate": 0.0005, "loss": 2.1086, "step": 187930 }, { "epoch": 0.7153460258977034, "grad_norm": 0.1312839537858963, "learning_rate": 0.0005, "loss": 2.1128, "step": 187940 }, { "epoch": 0.715384088365826, "grad_norm": 0.13992613554000854, "learning_rate": 0.0005, "loss": 2.1182, "step": 187950 }, { "epoch": 0.7154221508339487, "grad_norm": 0.11659088730812073, "learning_rate": 0.0005, "loss": 2.1121, "step": 187960 }, { "epoch": 0.7154602133020713, "grad_norm": 0.12068942189216614, "learning_rate": 0.0005, "loss": 2.1134, "step": 187970 }, { "epoch": 0.7154982757701941, "grad_norm": 0.1227799654006958, "learning_rate": 0.0005, "loss": 2.0905, "step": 187980 }, { "epoch": 0.7155363382383167, "grad_norm": 0.12143576890230179, "learning_rate": 0.0005, "loss": 2.1065, "step": 187990 }, { "epoch": 0.7155744007064394, "grad_norm": 0.11767726391553879, "learning_rate": 0.0005, "loss": 2.0974, "step": 188000 }, { "epoch": 0.7156124631745621, "grad_norm": 0.12173023074865341, "learning_rate": 0.0005, "loss": 2.1232, "step": 188010 }, { "epoch": 0.7156505256426847, "grad_norm": 0.1280447244644165, "learning_rate": 0.0005, "loss": 2.1019, "step": 188020 }, { "epoch": 0.7156885881108075, "grad_norm": 0.13191145658493042, "learning_rate": 0.0005, "loss": 2.1146, "step": 188030 }, { "epoch": 0.7157266505789301, "grad_norm": 0.13750119507312775, "learning_rate": 0.0005, "loss": 2.1001, "step": 188040 }, { "epoch": 0.7157647130470528, "grad_norm": 0.12749455869197845, "learning_rate": 0.0005, "loss": 2.0974, "step": 188050 }, { "epoch": 0.7158027755151755, "grad_norm": 0.1899019479751587, "learning_rate": 0.0005, "loss": 2.1092, "step": 188060 }, { "epoch": 0.7158408379832982, "grad_norm": 0.1267443299293518, "learning_rate": 0.0005, "loss": 2.113, "step": 188070 }, { "epoch": 0.7158789004514209, "grad_norm": 0.12289052456617355, "learning_rate": 0.0005, "loss": 2.1, "step": 188080 }, { "epoch": 0.7159169629195435, "grad_norm": 0.1474115550518036, "learning_rate": 0.0005, "loss": 2.1079, "step": 188090 }, { "epoch": 0.7159550253876662, "grad_norm": 0.12440325319766998, "learning_rate": 0.0005, "loss": 2.1153, "step": 188100 }, { "epoch": 0.715993087855789, "grad_norm": 0.13077470660209656, "learning_rate": 0.0005, "loss": 2.1163, "step": 188110 }, { "epoch": 0.7160311503239116, "grad_norm": 0.11480049043893814, "learning_rate": 0.0005, "loss": 2.103, "step": 188120 }, { "epoch": 0.7160692127920343, "grad_norm": 0.11923867464065552, "learning_rate": 0.0005, "loss": 2.1107, "step": 188130 }, { "epoch": 0.7161072752601569, "grad_norm": 0.1369302123785019, "learning_rate": 0.0005, "loss": 2.1112, "step": 188140 }, { "epoch": 0.7161453377282797, "grad_norm": 0.13814698159694672, "learning_rate": 0.0005, "loss": 2.109, "step": 188150 }, { "epoch": 0.7161834001964024, "grad_norm": 0.13006384670734406, "learning_rate": 0.0005, "loss": 2.1022, "step": 188160 }, { "epoch": 0.716221462664525, "grad_norm": 0.12163983285427094, "learning_rate": 0.0005, "loss": 2.1007, "step": 188170 }, { "epoch": 0.7162595251326477, "grad_norm": 0.12530353665351868, "learning_rate": 0.0005, "loss": 2.1081, "step": 188180 }, { "epoch": 0.7162975876007703, "grad_norm": 0.12386982887983322, "learning_rate": 0.0005, "loss": 2.1069, "step": 188190 }, { "epoch": 0.7163356500688931, "grad_norm": 0.12685070931911469, "learning_rate": 0.0005, "loss": 2.1175, "step": 188200 }, { "epoch": 0.7163737125370158, "grad_norm": 0.4660695791244507, "learning_rate": 0.0005, "loss": 2.1041, "step": 188210 }, { "epoch": 0.7164117750051384, "grad_norm": 0.1478555053472519, "learning_rate": 0.0005, "loss": 2.1114, "step": 188220 }, { "epoch": 0.7164498374732611, "grad_norm": 0.12359131127595901, "learning_rate": 0.0005, "loss": 2.105, "step": 188230 }, { "epoch": 0.7164878999413838, "grad_norm": 0.12482161074876785, "learning_rate": 0.0005, "loss": 2.1088, "step": 188240 }, { "epoch": 0.7165259624095065, "grad_norm": 0.14276772737503052, "learning_rate": 0.0005, "loss": 2.1033, "step": 188250 }, { "epoch": 0.7165640248776292, "grad_norm": 0.12220495939254761, "learning_rate": 0.0005, "loss": 2.1124, "step": 188260 }, { "epoch": 0.7166020873457518, "grad_norm": 0.12394607067108154, "learning_rate": 0.0005, "loss": 2.1086, "step": 188270 }, { "epoch": 0.7166401498138746, "grad_norm": 0.13645556569099426, "learning_rate": 0.0005, "loss": 2.1235, "step": 188280 }, { "epoch": 0.7166782122819972, "grad_norm": 0.12253500521183014, "learning_rate": 0.0005, "loss": 2.0978, "step": 188290 }, { "epoch": 0.7167162747501199, "grad_norm": 0.12891119718551636, "learning_rate": 0.0005, "loss": 2.1055, "step": 188300 }, { "epoch": 0.7167543372182426, "grad_norm": 0.1140003502368927, "learning_rate": 0.0005, "loss": 2.1146, "step": 188310 }, { "epoch": 0.7167923996863652, "grad_norm": 0.11547480523586273, "learning_rate": 0.0005, "loss": 2.0956, "step": 188320 }, { "epoch": 0.716830462154488, "grad_norm": 0.10986243188381195, "learning_rate": 0.0005, "loss": 2.091, "step": 188330 }, { "epoch": 0.7168685246226106, "grad_norm": 0.14526185393333435, "learning_rate": 0.0005, "loss": 2.1105, "step": 188340 }, { "epoch": 0.7169065870907333, "grad_norm": 0.23593851923942566, "learning_rate": 0.0005, "loss": 2.1092, "step": 188350 }, { "epoch": 0.716944649558856, "grad_norm": 0.1532217115163803, "learning_rate": 0.0005, "loss": 2.1143, "step": 188360 }, { "epoch": 0.7169827120269787, "grad_norm": 0.12344641238451004, "learning_rate": 0.0005, "loss": 2.0996, "step": 188370 }, { "epoch": 0.7170207744951014, "grad_norm": 0.12098667770624161, "learning_rate": 0.0005, "loss": 2.1105, "step": 188380 }, { "epoch": 0.717058836963224, "grad_norm": 0.1281360387802124, "learning_rate": 0.0005, "loss": 2.1069, "step": 188390 }, { "epoch": 0.7170968994313467, "grad_norm": 0.10796024650335312, "learning_rate": 0.0005, "loss": 2.1093, "step": 188400 }, { "epoch": 0.7171349618994695, "grad_norm": 0.12044819444417953, "learning_rate": 0.0005, "loss": 2.0977, "step": 188410 }, { "epoch": 0.7171730243675921, "grad_norm": 0.11701665073633194, "learning_rate": 0.0005, "loss": 2.1112, "step": 188420 }, { "epoch": 0.7172110868357148, "grad_norm": 0.12460478395223618, "learning_rate": 0.0005, "loss": 2.1092, "step": 188430 }, { "epoch": 0.7172491493038374, "grad_norm": 0.13054977357387543, "learning_rate": 0.0005, "loss": 2.112, "step": 188440 }, { "epoch": 0.7172872117719601, "grad_norm": 0.11797840893268585, "learning_rate": 0.0005, "loss": 2.1034, "step": 188450 }, { "epoch": 0.7173252742400829, "grad_norm": 0.1306247115135193, "learning_rate": 0.0005, "loss": 2.114, "step": 188460 }, { "epoch": 0.7173633367082055, "grad_norm": 0.13063837587833405, "learning_rate": 0.0005, "loss": 2.095, "step": 188470 }, { "epoch": 0.7174013991763282, "grad_norm": 0.12942704558372498, "learning_rate": 0.0005, "loss": 2.111, "step": 188480 }, { "epoch": 0.7174394616444508, "grad_norm": 0.11970430612564087, "learning_rate": 0.0005, "loss": 2.1041, "step": 188490 }, { "epoch": 0.7174775241125736, "grad_norm": 0.13363367319107056, "learning_rate": 0.0005, "loss": 2.0975, "step": 188500 }, { "epoch": 0.7175155865806963, "grad_norm": 0.13914057612419128, "learning_rate": 0.0005, "loss": 2.1268, "step": 188510 }, { "epoch": 0.7175536490488189, "grad_norm": 0.12554246187210083, "learning_rate": 0.0005, "loss": 2.1223, "step": 188520 }, { "epoch": 0.7175917115169416, "grad_norm": 0.12300821393728256, "learning_rate": 0.0005, "loss": 2.1073, "step": 188530 }, { "epoch": 0.7176297739850643, "grad_norm": 0.11722347140312195, "learning_rate": 0.0005, "loss": 2.0974, "step": 188540 }, { "epoch": 0.717667836453187, "grad_norm": 0.11022867262363434, "learning_rate": 0.0005, "loss": 2.1007, "step": 188550 }, { "epoch": 0.7177058989213096, "grad_norm": 0.13012823462486267, "learning_rate": 0.0005, "loss": 2.1027, "step": 188560 }, { "epoch": 0.7177439613894323, "grad_norm": 0.12405557930469513, "learning_rate": 0.0005, "loss": 2.1086, "step": 188570 }, { "epoch": 0.7177820238575551, "grad_norm": 0.11842475086450577, "learning_rate": 0.0005, "loss": 2.1186, "step": 188580 }, { "epoch": 0.7178200863256777, "grad_norm": 0.11545991897583008, "learning_rate": 0.0005, "loss": 2.1074, "step": 188590 }, { "epoch": 0.7178581487938004, "grad_norm": 0.11615921556949615, "learning_rate": 0.0005, "loss": 2.0959, "step": 188600 }, { "epoch": 0.717896211261923, "grad_norm": 0.11555454134941101, "learning_rate": 0.0005, "loss": 2.1119, "step": 188610 }, { "epoch": 0.7179342737300457, "grad_norm": 0.13142219185829163, "learning_rate": 0.0005, "loss": 2.1106, "step": 188620 }, { "epoch": 0.7179723361981685, "grad_norm": 0.13296760618686676, "learning_rate": 0.0005, "loss": 2.112, "step": 188630 }, { "epoch": 0.7180103986662911, "grad_norm": 0.12703141570091248, "learning_rate": 0.0005, "loss": 2.1084, "step": 188640 }, { "epoch": 0.7180484611344138, "grad_norm": 0.12670379877090454, "learning_rate": 0.0005, "loss": 2.11, "step": 188650 }, { "epoch": 0.7180865236025364, "grad_norm": 0.12041231989860535, "learning_rate": 0.0005, "loss": 2.1124, "step": 188660 }, { "epoch": 0.7181245860706592, "grad_norm": 0.13363684713840485, "learning_rate": 0.0005, "loss": 2.1047, "step": 188670 }, { "epoch": 0.7181626485387819, "grad_norm": 0.12533117830753326, "learning_rate": 0.0005, "loss": 2.1174, "step": 188680 }, { "epoch": 0.7182007110069045, "grad_norm": 0.12696968019008636, "learning_rate": 0.0005, "loss": 2.1084, "step": 188690 }, { "epoch": 0.7182387734750272, "grad_norm": 0.11952722817659378, "learning_rate": 0.0005, "loss": 2.1165, "step": 188700 }, { "epoch": 0.71827683594315, "grad_norm": 0.11573243141174316, "learning_rate": 0.0005, "loss": 2.0972, "step": 188710 }, { "epoch": 0.7183148984112726, "grad_norm": 0.11628570407629013, "learning_rate": 0.0005, "loss": 2.1227, "step": 188720 }, { "epoch": 0.7183529608793953, "grad_norm": 0.12123718857765198, "learning_rate": 0.0005, "loss": 2.1224, "step": 188730 }, { "epoch": 0.7183910233475179, "grad_norm": 0.14781494438648224, "learning_rate": 0.0005, "loss": 2.0823, "step": 188740 }, { "epoch": 0.7184290858156406, "grad_norm": 0.1224733367562294, "learning_rate": 0.0005, "loss": 2.1017, "step": 188750 }, { "epoch": 0.7184671482837633, "grad_norm": 0.13327038288116455, "learning_rate": 0.0005, "loss": 2.1007, "step": 188760 }, { "epoch": 0.718505210751886, "grad_norm": 0.13257130980491638, "learning_rate": 0.0005, "loss": 2.1116, "step": 188770 }, { "epoch": 0.7185432732200087, "grad_norm": 0.11904368549585342, "learning_rate": 0.0005, "loss": 2.0859, "step": 188780 }, { "epoch": 0.7185813356881313, "grad_norm": 0.1520932912826538, "learning_rate": 0.0005, "loss": 2.1083, "step": 188790 }, { "epoch": 0.7186193981562541, "grad_norm": 0.11274637281894684, "learning_rate": 0.0005, "loss": 2.1243, "step": 188800 }, { "epoch": 0.7186574606243767, "grad_norm": 0.11699338257312775, "learning_rate": 0.0005, "loss": 2.1134, "step": 188810 }, { "epoch": 0.7186955230924994, "grad_norm": 0.1301192194223404, "learning_rate": 0.0005, "loss": 2.1031, "step": 188820 }, { "epoch": 0.7187335855606221, "grad_norm": 0.12862426042556763, "learning_rate": 0.0005, "loss": 2.1097, "step": 188830 }, { "epoch": 0.7187716480287448, "grad_norm": 0.1355823129415512, "learning_rate": 0.0005, "loss": 2.0997, "step": 188840 }, { "epoch": 0.7188097104968675, "grad_norm": 0.1313876509666443, "learning_rate": 0.0005, "loss": 2.0989, "step": 188850 }, { "epoch": 0.7188477729649901, "grad_norm": 0.14267843961715698, "learning_rate": 0.0005, "loss": 2.1023, "step": 188860 }, { "epoch": 0.7188858354331128, "grad_norm": 0.12998761236667633, "learning_rate": 0.0005, "loss": 2.089, "step": 188870 }, { "epoch": 0.7189238979012355, "grad_norm": 0.1305166333913803, "learning_rate": 0.0005, "loss": 2.106, "step": 188880 }, { "epoch": 0.7189619603693582, "grad_norm": 0.11812318861484528, "learning_rate": 0.0005, "loss": 2.1033, "step": 188890 }, { "epoch": 0.7190000228374809, "grad_norm": 0.13159221410751343, "learning_rate": 0.0005, "loss": 2.0968, "step": 188900 }, { "epoch": 0.7190380853056035, "grad_norm": 0.11888105422258377, "learning_rate": 0.0005, "loss": 2.1062, "step": 188910 }, { "epoch": 0.7190761477737262, "grad_norm": 0.12733519077301025, "learning_rate": 0.0005, "loss": 2.1233, "step": 188920 }, { "epoch": 0.719114210241849, "grad_norm": 0.13240741193294525, "learning_rate": 0.0005, "loss": 2.1164, "step": 188930 }, { "epoch": 0.7191522727099716, "grad_norm": 0.11451985687017441, "learning_rate": 0.0005, "loss": 2.1115, "step": 188940 }, { "epoch": 0.7191903351780943, "grad_norm": 0.12244334071874619, "learning_rate": 0.0005, "loss": 2.0975, "step": 188950 }, { "epoch": 0.7192283976462169, "grad_norm": 0.12746724486351013, "learning_rate": 0.0005, "loss": 2.1123, "step": 188960 }, { "epoch": 0.7192664601143397, "grad_norm": 0.1367591917514801, "learning_rate": 0.0005, "loss": 2.1069, "step": 188970 }, { "epoch": 0.7193045225824624, "grad_norm": 0.12524889409542084, "learning_rate": 0.0005, "loss": 2.0968, "step": 188980 }, { "epoch": 0.719342585050585, "grad_norm": 0.12255341559648514, "learning_rate": 0.0005, "loss": 2.1232, "step": 188990 }, { "epoch": 0.7193806475187077, "grad_norm": 0.12615957856178284, "learning_rate": 0.0005, "loss": 2.1, "step": 189000 }, { "epoch": 0.7194187099868304, "grad_norm": 0.13030847907066345, "learning_rate": 0.0005, "loss": 2.1182, "step": 189010 }, { "epoch": 0.7194567724549531, "grad_norm": 0.12409580498933792, "learning_rate": 0.0005, "loss": 2.1075, "step": 189020 }, { "epoch": 0.7194948349230758, "grad_norm": 0.12422270327806473, "learning_rate": 0.0005, "loss": 2.096, "step": 189030 }, { "epoch": 0.7195328973911984, "grad_norm": 0.11482467502355576, "learning_rate": 0.0005, "loss": 2.1074, "step": 189040 }, { "epoch": 0.7195709598593211, "grad_norm": 0.1306055337190628, "learning_rate": 0.0005, "loss": 2.1052, "step": 189050 }, { "epoch": 0.7196090223274438, "grad_norm": 0.11800495535135269, "learning_rate": 0.0005, "loss": 2.1124, "step": 189060 }, { "epoch": 0.7196470847955665, "grad_norm": 0.12688249349594116, "learning_rate": 0.0005, "loss": 2.1012, "step": 189070 }, { "epoch": 0.7196851472636892, "grad_norm": 0.1269036829471588, "learning_rate": 0.0005, "loss": 2.1075, "step": 189080 }, { "epoch": 0.7197232097318118, "grad_norm": 0.12588876485824585, "learning_rate": 0.0005, "loss": 2.1056, "step": 189090 }, { "epoch": 0.7197612721999346, "grad_norm": 0.12352360039949417, "learning_rate": 0.0005, "loss": 2.1135, "step": 189100 }, { "epoch": 0.7197993346680572, "grad_norm": 0.11827840656042099, "learning_rate": 0.0005, "loss": 2.1083, "step": 189110 }, { "epoch": 0.7198373971361799, "grad_norm": 0.11889263242483139, "learning_rate": 0.0005, "loss": 2.1193, "step": 189120 }, { "epoch": 0.7198754596043025, "grad_norm": 0.11715391278266907, "learning_rate": 0.0005, "loss": 2.1165, "step": 189130 }, { "epoch": 0.7199135220724253, "grad_norm": 0.11334909498691559, "learning_rate": 0.0005, "loss": 2.1002, "step": 189140 }, { "epoch": 0.719951584540548, "grad_norm": 0.12664197385311127, "learning_rate": 0.0005, "loss": 2.0927, "step": 189150 }, { "epoch": 0.7199896470086706, "grad_norm": 0.11824624985456467, "learning_rate": 0.0005, "loss": 2.1008, "step": 189160 }, { "epoch": 0.7200277094767933, "grad_norm": 0.12083036452531815, "learning_rate": 0.0005, "loss": 2.1039, "step": 189170 }, { "epoch": 0.7200657719449159, "grad_norm": 0.13151566684246063, "learning_rate": 0.0005, "loss": 2.1113, "step": 189180 }, { "epoch": 0.7201038344130387, "grad_norm": 0.12442060559988022, "learning_rate": 0.0005, "loss": 2.1048, "step": 189190 }, { "epoch": 0.7201418968811614, "grad_norm": 0.1193741112947464, "learning_rate": 0.0005, "loss": 2.1217, "step": 189200 }, { "epoch": 0.720179959349284, "grad_norm": 0.1237751692533493, "learning_rate": 0.0005, "loss": 2.1064, "step": 189210 }, { "epoch": 0.7202180218174067, "grad_norm": 0.1314356029033661, "learning_rate": 0.0005, "loss": 2.0949, "step": 189220 }, { "epoch": 0.7202560842855295, "grad_norm": 0.1298057585954666, "learning_rate": 0.0005, "loss": 2.1294, "step": 189230 }, { "epoch": 0.7202941467536521, "grad_norm": 0.13816192746162415, "learning_rate": 0.0005, "loss": 2.1071, "step": 189240 }, { "epoch": 0.7203322092217748, "grad_norm": 0.11258254200220108, "learning_rate": 0.0005, "loss": 2.0919, "step": 189250 }, { "epoch": 0.7203702716898974, "grad_norm": 0.12321577966213226, "learning_rate": 0.0005, "loss": 2.1147, "step": 189260 }, { "epoch": 0.7204083341580202, "grad_norm": 0.12666542828083038, "learning_rate": 0.0005, "loss": 2.0995, "step": 189270 }, { "epoch": 0.7204463966261428, "grad_norm": 0.12397710978984833, "learning_rate": 0.0005, "loss": 2.1055, "step": 189280 }, { "epoch": 0.7204844590942655, "grad_norm": 0.12449897825717926, "learning_rate": 0.0005, "loss": 2.1033, "step": 189290 }, { "epoch": 0.7205225215623882, "grad_norm": 0.11226168274879456, "learning_rate": 0.0005, "loss": 2.1013, "step": 189300 }, { "epoch": 0.7205605840305108, "grad_norm": 0.12319658696651459, "learning_rate": 0.0005, "loss": 2.1117, "step": 189310 }, { "epoch": 0.7205986464986336, "grad_norm": 0.12847982347011566, "learning_rate": 0.0005, "loss": 2.0973, "step": 189320 }, { "epoch": 0.7206367089667562, "grad_norm": 0.11298822611570358, "learning_rate": 0.0005, "loss": 2.1152, "step": 189330 }, { "epoch": 0.7206747714348789, "grad_norm": 0.12321088463068008, "learning_rate": 0.0005, "loss": 2.1243, "step": 189340 }, { "epoch": 0.7207128339030016, "grad_norm": 0.12383893132209778, "learning_rate": 0.0005, "loss": 2.0976, "step": 189350 }, { "epoch": 0.7207508963711243, "grad_norm": 0.12227057665586472, "learning_rate": 0.0005, "loss": 2.103, "step": 189360 }, { "epoch": 0.720788958839247, "grad_norm": 0.12617835402488708, "learning_rate": 0.0005, "loss": 2.099, "step": 189370 }, { "epoch": 0.7208270213073696, "grad_norm": 0.1312699317932129, "learning_rate": 0.0005, "loss": 2.1136, "step": 189380 }, { "epoch": 0.7208650837754923, "grad_norm": 0.12388839572668076, "learning_rate": 0.0005, "loss": 2.1009, "step": 189390 }, { "epoch": 0.7209031462436151, "grad_norm": 0.14507871866226196, "learning_rate": 0.0005, "loss": 2.1242, "step": 189400 }, { "epoch": 0.7209412087117377, "grad_norm": 0.11744916439056396, "learning_rate": 0.0005, "loss": 2.101, "step": 189410 }, { "epoch": 0.7209792711798604, "grad_norm": 0.12581637501716614, "learning_rate": 0.0005, "loss": 2.0967, "step": 189420 }, { "epoch": 0.721017333647983, "grad_norm": 0.1203906461596489, "learning_rate": 0.0005, "loss": 2.1119, "step": 189430 }, { "epoch": 0.7210553961161058, "grad_norm": 0.12365449219942093, "learning_rate": 0.0005, "loss": 2.1167, "step": 189440 }, { "epoch": 0.7210934585842285, "grad_norm": 0.11525920778512955, "learning_rate": 0.0005, "loss": 2.1057, "step": 189450 }, { "epoch": 0.7211315210523511, "grad_norm": 0.11574247479438782, "learning_rate": 0.0005, "loss": 2.1278, "step": 189460 }, { "epoch": 0.7211695835204738, "grad_norm": 0.1328197568655014, "learning_rate": 0.0005, "loss": 2.1119, "step": 189470 }, { "epoch": 0.7212076459885964, "grad_norm": 0.14724650979042053, "learning_rate": 0.0005, "loss": 2.1091, "step": 189480 }, { "epoch": 0.7212457084567192, "grad_norm": 0.12958109378814697, "learning_rate": 0.0005, "loss": 2.1235, "step": 189490 }, { "epoch": 0.7212837709248419, "grad_norm": 0.13805188238620758, "learning_rate": 0.0005, "loss": 2.0946, "step": 189500 }, { "epoch": 0.7213218333929645, "grad_norm": 0.1344052255153656, "learning_rate": 0.0005, "loss": 2.1021, "step": 189510 }, { "epoch": 0.7213598958610872, "grad_norm": 0.12882310152053833, "learning_rate": 0.0005, "loss": 2.1015, "step": 189520 }, { "epoch": 0.7213979583292099, "grad_norm": 0.13333013653755188, "learning_rate": 0.0005, "loss": 2.1031, "step": 189530 }, { "epoch": 0.7214360207973326, "grad_norm": 0.11692256480455399, "learning_rate": 0.0005, "loss": 2.1094, "step": 189540 }, { "epoch": 0.7214740832654553, "grad_norm": 0.12592947483062744, "learning_rate": 0.0005, "loss": 2.1138, "step": 189550 }, { "epoch": 0.7215121457335779, "grad_norm": 0.13801392912864685, "learning_rate": 0.0005, "loss": 2.111, "step": 189560 }, { "epoch": 0.7215502082017007, "grad_norm": 0.12887591123580933, "learning_rate": 0.0005, "loss": 2.1019, "step": 189570 }, { "epoch": 0.7215882706698233, "grad_norm": 0.14320510625839233, "learning_rate": 0.0005, "loss": 2.0955, "step": 189580 }, { "epoch": 0.721626333137946, "grad_norm": 0.12209837138652802, "learning_rate": 0.0005, "loss": 2.0975, "step": 189590 }, { "epoch": 0.7216643956060687, "grad_norm": 0.12583830952644348, "learning_rate": 0.0005, "loss": 2.1117, "step": 189600 }, { "epoch": 0.7217024580741913, "grad_norm": 0.12144199758768082, "learning_rate": 0.0005, "loss": 2.1119, "step": 189610 }, { "epoch": 0.7217405205423141, "grad_norm": 0.11988362669944763, "learning_rate": 0.0005, "loss": 2.1095, "step": 189620 }, { "epoch": 0.7217785830104367, "grad_norm": 0.14672856032848358, "learning_rate": 0.0005, "loss": 2.0907, "step": 189630 }, { "epoch": 0.7218166454785594, "grad_norm": 0.12737524509429932, "learning_rate": 0.0005, "loss": 2.1046, "step": 189640 }, { "epoch": 0.721854707946682, "grad_norm": 0.11321786791086197, "learning_rate": 0.0005, "loss": 2.1118, "step": 189650 }, { "epoch": 0.7218927704148048, "grad_norm": 0.11814183741807938, "learning_rate": 0.0005, "loss": 2.1138, "step": 189660 }, { "epoch": 0.7219308328829275, "grad_norm": 0.11926648765802383, "learning_rate": 0.0005, "loss": 2.0989, "step": 189670 }, { "epoch": 0.7219688953510501, "grad_norm": 0.1263626515865326, "learning_rate": 0.0005, "loss": 2.1089, "step": 189680 }, { "epoch": 0.7220069578191728, "grad_norm": 0.12267031520605087, "learning_rate": 0.0005, "loss": 2.1075, "step": 189690 }, { "epoch": 0.7220450202872956, "grad_norm": 0.13086408376693726, "learning_rate": 0.0005, "loss": 2.1186, "step": 189700 }, { "epoch": 0.7220830827554182, "grad_norm": 0.13127847015857697, "learning_rate": 0.0005, "loss": 2.1096, "step": 189710 }, { "epoch": 0.7221211452235409, "grad_norm": 0.10789002478122711, "learning_rate": 0.0005, "loss": 2.1023, "step": 189720 }, { "epoch": 0.7221592076916635, "grad_norm": 0.12119221687316895, "learning_rate": 0.0005, "loss": 2.1205, "step": 189730 }, { "epoch": 0.7221972701597862, "grad_norm": 0.12627781927585602, "learning_rate": 0.0005, "loss": 2.1072, "step": 189740 }, { "epoch": 0.722235332627909, "grad_norm": 0.1297757923603058, "learning_rate": 0.0005, "loss": 2.1004, "step": 189750 }, { "epoch": 0.7222733950960316, "grad_norm": 0.12928855419158936, "learning_rate": 0.0005, "loss": 2.0983, "step": 189760 }, { "epoch": 0.7223114575641543, "grad_norm": 0.12331338226795197, "learning_rate": 0.0005, "loss": 2.1163, "step": 189770 }, { "epoch": 0.7223495200322769, "grad_norm": 0.13063183426856995, "learning_rate": 0.0005, "loss": 2.1061, "step": 189780 }, { "epoch": 0.7223875825003997, "grad_norm": 0.1343780905008316, "learning_rate": 0.0005, "loss": 2.1221, "step": 189790 }, { "epoch": 0.7224256449685224, "grad_norm": 0.13402073085308075, "learning_rate": 0.0005, "loss": 2.1102, "step": 189800 }, { "epoch": 0.722463707436645, "grad_norm": 0.1176358014345169, "learning_rate": 0.0005, "loss": 2.0964, "step": 189810 }, { "epoch": 0.7225017699047677, "grad_norm": 0.11849499493837357, "learning_rate": 0.0005, "loss": 2.1127, "step": 189820 }, { "epoch": 0.7225398323728904, "grad_norm": 0.12364055961370468, "learning_rate": 0.0005, "loss": 2.1115, "step": 189830 }, { "epoch": 0.7225778948410131, "grad_norm": 0.11501021683216095, "learning_rate": 0.0005, "loss": 2.1105, "step": 189840 }, { "epoch": 0.7226159573091357, "grad_norm": 0.13123121857643127, "learning_rate": 0.0005, "loss": 2.1004, "step": 189850 }, { "epoch": 0.7226540197772584, "grad_norm": 0.11838650703430176, "learning_rate": 0.0005, "loss": 2.1118, "step": 189860 }, { "epoch": 0.7226920822453812, "grad_norm": 0.11834557354450226, "learning_rate": 0.0005, "loss": 2.1044, "step": 189870 }, { "epoch": 0.7227301447135038, "grad_norm": 0.1256466507911682, "learning_rate": 0.0005, "loss": 2.1199, "step": 189880 }, { "epoch": 0.7227682071816265, "grad_norm": 0.12792137265205383, "learning_rate": 0.0005, "loss": 2.1014, "step": 189890 }, { "epoch": 0.7228062696497491, "grad_norm": 0.12776750326156616, "learning_rate": 0.0005, "loss": 2.1022, "step": 189900 }, { "epoch": 0.7228443321178718, "grad_norm": 0.12447094172239304, "learning_rate": 0.0005, "loss": 2.1096, "step": 189910 }, { "epoch": 0.7228823945859946, "grad_norm": 0.12594708800315857, "learning_rate": 0.0005, "loss": 2.105, "step": 189920 }, { "epoch": 0.7229204570541172, "grad_norm": 0.1258392184972763, "learning_rate": 0.0005, "loss": 2.1098, "step": 189930 }, { "epoch": 0.7229585195222399, "grad_norm": 0.12574885785579681, "learning_rate": 0.0005, "loss": 2.1119, "step": 189940 }, { "epoch": 0.7229965819903625, "grad_norm": 0.12300106137990952, "learning_rate": 0.0005, "loss": 2.1004, "step": 189950 }, { "epoch": 0.7230346444584853, "grad_norm": 0.12460119277238846, "learning_rate": 0.0005, "loss": 2.1087, "step": 189960 }, { "epoch": 0.723072706926608, "grad_norm": 0.11818580329418182, "learning_rate": 0.0005, "loss": 2.1059, "step": 189970 }, { "epoch": 0.7231107693947306, "grad_norm": 0.13395582139492035, "learning_rate": 0.0005, "loss": 2.097, "step": 189980 }, { "epoch": 0.7231488318628533, "grad_norm": 0.1125182956457138, "learning_rate": 0.0005, "loss": 2.1156, "step": 189990 }, { "epoch": 0.723186894330976, "grad_norm": 0.12543977797031403, "learning_rate": 0.0005, "loss": 2.1253, "step": 190000 }, { "epoch": 0.7232249567990987, "grad_norm": 0.1250716894865036, "learning_rate": 0.0005, "loss": 2.1166, "step": 190010 }, { "epoch": 0.7232630192672214, "grad_norm": 0.12153498083353043, "learning_rate": 0.0005, "loss": 2.1074, "step": 190020 }, { "epoch": 0.723301081735344, "grad_norm": 0.11792191863059998, "learning_rate": 0.0005, "loss": 2.1112, "step": 190030 }, { "epoch": 0.7233391442034667, "grad_norm": 0.12454144656658173, "learning_rate": 0.0005, "loss": 2.1105, "step": 190040 }, { "epoch": 0.7233772066715894, "grad_norm": 0.12839899957180023, "learning_rate": 0.0005, "loss": 2.1197, "step": 190050 }, { "epoch": 0.7234152691397121, "grad_norm": 0.11735977232456207, "learning_rate": 0.0005, "loss": 2.1004, "step": 190060 }, { "epoch": 0.7234533316078348, "grad_norm": 0.11648841947317123, "learning_rate": 0.0005, "loss": 2.127, "step": 190070 }, { "epoch": 0.7234913940759574, "grad_norm": 0.12635114789009094, "learning_rate": 0.0005, "loss": 2.113, "step": 190080 }, { "epoch": 0.7235294565440802, "grad_norm": 0.12741799652576447, "learning_rate": 0.0005, "loss": 2.1141, "step": 190090 }, { "epoch": 0.7235675190122028, "grad_norm": 0.12407921254634857, "learning_rate": 0.0005, "loss": 2.1061, "step": 190100 }, { "epoch": 0.7236055814803255, "grad_norm": 0.12719793617725372, "learning_rate": 0.0005, "loss": 2.1048, "step": 190110 }, { "epoch": 0.7236436439484482, "grad_norm": 0.1287856101989746, "learning_rate": 0.0005, "loss": 2.1057, "step": 190120 }, { "epoch": 0.7236817064165709, "grad_norm": 0.1346472203731537, "learning_rate": 0.0005, "loss": 2.1107, "step": 190130 }, { "epoch": 0.7237197688846936, "grad_norm": 0.12088294327259064, "learning_rate": 0.0005, "loss": 2.1112, "step": 190140 }, { "epoch": 0.7237578313528162, "grad_norm": 0.11873350292444229, "learning_rate": 0.0005, "loss": 2.1112, "step": 190150 }, { "epoch": 0.7237958938209389, "grad_norm": 0.12106628715991974, "learning_rate": 0.0005, "loss": 2.108, "step": 190160 }, { "epoch": 0.7238339562890616, "grad_norm": 0.12756995856761932, "learning_rate": 0.0005, "loss": 2.0906, "step": 190170 }, { "epoch": 0.7238720187571843, "grad_norm": 0.11779794096946716, "learning_rate": 0.0005, "loss": 2.107, "step": 190180 }, { "epoch": 0.723910081225307, "grad_norm": 0.11814567446708679, "learning_rate": 0.0005, "loss": 2.1115, "step": 190190 }, { "epoch": 0.7239481436934296, "grad_norm": 0.12297229468822479, "learning_rate": 0.0005, "loss": 2.1049, "step": 190200 }, { "epoch": 0.7239862061615523, "grad_norm": 0.1186053529381752, "learning_rate": 0.0005, "loss": 2.1055, "step": 190210 }, { "epoch": 0.7240242686296751, "grad_norm": 0.12631520628929138, "learning_rate": 0.0005, "loss": 2.1097, "step": 190220 }, { "epoch": 0.7240623310977977, "grad_norm": 0.12042009085416794, "learning_rate": 0.0005, "loss": 2.1016, "step": 190230 }, { "epoch": 0.7241003935659204, "grad_norm": 0.12243197858333588, "learning_rate": 0.0005, "loss": 2.1185, "step": 190240 }, { "epoch": 0.724138456034043, "grad_norm": 0.11951422691345215, "learning_rate": 0.0005, "loss": 2.1018, "step": 190250 }, { "epoch": 0.7241765185021658, "grad_norm": 0.12606891989707947, "learning_rate": 0.0005, "loss": 2.1128, "step": 190260 }, { "epoch": 0.7242145809702885, "grad_norm": 0.11739018559455872, "learning_rate": 0.0005, "loss": 2.104, "step": 190270 }, { "epoch": 0.7242526434384111, "grad_norm": 0.14321762323379517, "learning_rate": 0.0005, "loss": 2.1219, "step": 190280 }, { "epoch": 0.7242907059065338, "grad_norm": 0.11680032312870026, "learning_rate": 0.0005, "loss": 2.0941, "step": 190290 }, { "epoch": 0.7243287683746565, "grad_norm": 0.13240210711956024, "learning_rate": 0.0005, "loss": 2.1012, "step": 190300 }, { "epoch": 0.7243668308427792, "grad_norm": 0.12868797779083252, "learning_rate": 0.0005, "loss": 2.1127, "step": 190310 }, { "epoch": 0.7244048933109019, "grad_norm": 0.1447475254535675, "learning_rate": 0.0005, "loss": 2.1, "step": 190320 }, { "epoch": 0.7244429557790245, "grad_norm": 0.13167577981948853, "learning_rate": 0.0005, "loss": 2.1133, "step": 190330 }, { "epoch": 0.7244810182471472, "grad_norm": 0.1302758753299713, "learning_rate": 0.0005, "loss": 2.1061, "step": 190340 }, { "epoch": 0.7245190807152699, "grad_norm": 0.12184086441993713, "learning_rate": 0.0005, "loss": 2.0995, "step": 190350 }, { "epoch": 0.7245571431833926, "grad_norm": 0.12463497370481491, "learning_rate": 0.0005, "loss": 2.1018, "step": 190360 }, { "epoch": 0.7245952056515153, "grad_norm": 0.10786112397909164, "learning_rate": 0.0005, "loss": 2.1098, "step": 190370 }, { "epoch": 0.7246332681196379, "grad_norm": 0.1271791160106659, "learning_rate": 0.0005, "loss": 2.1079, "step": 190380 }, { "epoch": 0.7246713305877607, "grad_norm": 0.11974359303712845, "learning_rate": 0.0005, "loss": 2.1045, "step": 190390 }, { "epoch": 0.7247093930558833, "grad_norm": 0.1264609694480896, "learning_rate": 0.0005, "loss": 2.0943, "step": 190400 }, { "epoch": 0.724747455524006, "grad_norm": 0.1227140799164772, "learning_rate": 0.0005, "loss": 2.1237, "step": 190410 }, { "epoch": 0.7247855179921286, "grad_norm": 0.1285555362701416, "learning_rate": 0.0005, "loss": 2.1124, "step": 190420 }, { "epoch": 0.7248235804602514, "grad_norm": 0.12815794348716736, "learning_rate": 0.0005, "loss": 2.1058, "step": 190430 }, { "epoch": 0.7248616429283741, "grad_norm": 0.11281808465719223, "learning_rate": 0.0005, "loss": 2.1091, "step": 190440 }, { "epoch": 0.7248997053964967, "grad_norm": 0.13443569839000702, "learning_rate": 0.0005, "loss": 2.1113, "step": 190450 }, { "epoch": 0.7249377678646194, "grad_norm": 0.13241413235664368, "learning_rate": 0.0005, "loss": 2.1128, "step": 190460 }, { "epoch": 0.724975830332742, "grad_norm": 0.12769834697246552, "learning_rate": 0.0005, "loss": 2.1064, "step": 190470 }, { "epoch": 0.7250138928008648, "grad_norm": 0.12712432444095612, "learning_rate": 0.0005, "loss": 2.1038, "step": 190480 }, { "epoch": 0.7250519552689875, "grad_norm": 0.1333046555519104, "learning_rate": 0.0005, "loss": 2.1101, "step": 190490 }, { "epoch": 0.7250900177371101, "grad_norm": 0.12503378093242645, "learning_rate": 0.0005, "loss": 2.118, "step": 190500 }, { "epoch": 0.7251280802052328, "grad_norm": 0.12388863414525986, "learning_rate": 0.0005, "loss": 2.1094, "step": 190510 }, { "epoch": 0.7251661426733556, "grad_norm": 0.11568185687065125, "learning_rate": 0.0005, "loss": 2.1098, "step": 190520 }, { "epoch": 0.7252042051414782, "grad_norm": 0.1251184195280075, "learning_rate": 0.0005, "loss": 2.1051, "step": 190530 }, { "epoch": 0.7252422676096009, "grad_norm": 0.1223842203617096, "learning_rate": 0.0005, "loss": 2.1095, "step": 190540 }, { "epoch": 0.7252803300777235, "grad_norm": 0.11854085326194763, "learning_rate": 0.0005, "loss": 2.1035, "step": 190550 }, { "epoch": 0.7253183925458463, "grad_norm": 0.12464988976716995, "learning_rate": 0.0005, "loss": 2.0991, "step": 190560 }, { "epoch": 0.725356455013969, "grad_norm": 0.12290152162313461, "learning_rate": 0.0005, "loss": 2.1074, "step": 190570 }, { "epoch": 0.7253945174820916, "grad_norm": 0.12548936903476715, "learning_rate": 0.0005, "loss": 2.1212, "step": 190580 }, { "epoch": 0.7254325799502143, "grad_norm": 0.13220664858818054, "learning_rate": 0.0005, "loss": 2.1106, "step": 190590 }, { "epoch": 0.7254706424183369, "grad_norm": 0.1262110024690628, "learning_rate": 0.0005, "loss": 2.0912, "step": 190600 }, { "epoch": 0.7255087048864597, "grad_norm": 0.12420913577079773, "learning_rate": 0.0005, "loss": 2.1004, "step": 190610 }, { "epoch": 0.7255467673545823, "grad_norm": 0.13860227167606354, "learning_rate": 0.0005, "loss": 2.1021, "step": 190620 }, { "epoch": 0.725584829822705, "grad_norm": 0.14560693502426147, "learning_rate": 0.0005, "loss": 2.1234, "step": 190630 }, { "epoch": 0.7256228922908277, "grad_norm": 0.13095788657665253, "learning_rate": 0.0005, "loss": 2.1106, "step": 190640 }, { "epoch": 0.7256609547589504, "grad_norm": 0.125588521361351, "learning_rate": 0.0005, "loss": 2.1031, "step": 190650 }, { "epoch": 0.7256990172270731, "grad_norm": 0.13196080923080444, "learning_rate": 0.0005, "loss": 2.0899, "step": 190660 }, { "epoch": 0.7257370796951957, "grad_norm": 0.11772377043962479, "learning_rate": 0.0005, "loss": 2.111, "step": 190670 }, { "epoch": 0.7257751421633184, "grad_norm": 0.1185615062713623, "learning_rate": 0.0005, "loss": 2.1066, "step": 190680 }, { "epoch": 0.7258132046314412, "grad_norm": 0.12486417591571808, "learning_rate": 0.0005, "loss": 2.1153, "step": 190690 }, { "epoch": 0.7258512670995638, "grad_norm": 0.12780217826366425, "learning_rate": 0.0005, "loss": 2.109, "step": 190700 }, { "epoch": 0.7258893295676865, "grad_norm": 0.13506446778774261, "learning_rate": 0.0005, "loss": 2.12, "step": 190710 }, { "epoch": 0.7259273920358091, "grad_norm": 0.11652851849794388, "learning_rate": 0.0005, "loss": 2.1044, "step": 190720 }, { "epoch": 0.7259654545039319, "grad_norm": 0.12922434508800507, "learning_rate": 0.0005, "loss": 2.1033, "step": 190730 }, { "epoch": 0.7260035169720546, "grad_norm": 0.12396050244569778, "learning_rate": 0.0005, "loss": 2.1158, "step": 190740 }, { "epoch": 0.7260415794401772, "grad_norm": 0.15156452357769012, "learning_rate": 0.0005, "loss": 2.1237, "step": 190750 }, { "epoch": 0.7260796419082999, "grad_norm": 0.11920617520809174, "learning_rate": 0.0005, "loss": 2.1175, "step": 190760 }, { "epoch": 0.7261177043764225, "grad_norm": 0.1289711445569992, "learning_rate": 0.0005, "loss": 2.1046, "step": 190770 }, { "epoch": 0.7261557668445453, "grad_norm": 0.12466076016426086, "learning_rate": 0.0005, "loss": 2.1191, "step": 190780 }, { "epoch": 0.726193829312668, "grad_norm": 0.11493317782878876, "learning_rate": 0.0005, "loss": 2.1097, "step": 190790 }, { "epoch": 0.7262318917807906, "grad_norm": 0.1272706240415573, "learning_rate": 0.0005, "loss": 2.0877, "step": 190800 }, { "epoch": 0.7262699542489133, "grad_norm": 0.13595803081989288, "learning_rate": 0.0005, "loss": 2.1071, "step": 190810 }, { "epoch": 0.726308016717036, "grad_norm": 0.14387886226177216, "learning_rate": 0.0005, "loss": 2.1017, "step": 190820 }, { "epoch": 0.7263460791851587, "grad_norm": 0.12329813838005066, "learning_rate": 0.0005, "loss": 2.0966, "step": 190830 }, { "epoch": 0.7263841416532814, "grad_norm": 0.13892407715320587, "learning_rate": 0.0005, "loss": 2.116, "step": 190840 }, { "epoch": 0.726422204121404, "grad_norm": 0.1313343346118927, "learning_rate": 0.0005, "loss": 2.1046, "step": 190850 }, { "epoch": 0.7264602665895268, "grad_norm": 0.12014450877904892, "learning_rate": 0.0005, "loss": 2.1039, "step": 190860 }, { "epoch": 0.7264983290576494, "grad_norm": 0.11999203264713287, "learning_rate": 0.0005, "loss": 2.0985, "step": 190870 }, { "epoch": 0.7265363915257721, "grad_norm": 0.13435359299182892, "learning_rate": 0.0005, "loss": 2.1243, "step": 190880 }, { "epoch": 0.7265744539938948, "grad_norm": 0.13794434070587158, "learning_rate": 0.0005, "loss": 2.0845, "step": 190890 }, { "epoch": 0.7266125164620174, "grad_norm": 0.12234117835760117, "learning_rate": 0.0005, "loss": 2.1173, "step": 190900 }, { "epoch": 0.7266505789301402, "grad_norm": 0.13267767429351807, "learning_rate": 0.0005, "loss": 2.1137, "step": 190910 }, { "epoch": 0.7266886413982628, "grad_norm": 0.11783602088689804, "learning_rate": 0.0005, "loss": 2.1102, "step": 190920 }, { "epoch": 0.7267267038663855, "grad_norm": 0.13267308473587036, "learning_rate": 0.0005, "loss": 2.1116, "step": 190930 }, { "epoch": 0.7267647663345081, "grad_norm": 0.12474412471055984, "learning_rate": 0.0005, "loss": 2.1053, "step": 190940 }, { "epoch": 0.7268028288026309, "grad_norm": 0.13308046758174896, "learning_rate": 0.0005, "loss": 2.1083, "step": 190950 }, { "epoch": 0.7268408912707536, "grad_norm": 0.12626679241657257, "learning_rate": 0.0005, "loss": 2.1009, "step": 190960 }, { "epoch": 0.7268789537388762, "grad_norm": 0.12065853178501129, "learning_rate": 0.0005, "loss": 2.116, "step": 190970 }, { "epoch": 0.7269170162069989, "grad_norm": 0.11856792122125626, "learning_rate": 0.0005, "loss": 2.1103, "step": 190980 }, { "epoch": 0.7269550786751217, "grad_norm": 0.14160200953483582, "learning_rate": 0.0005, "loss": 2.0968, "step": 190990 }, { "epoch": 0.7269931411432443, "grad_norm": 0.13331879675388336, "learning_rate": 0.0005, "loss": 2.1088, "step": 191000 }, { "epoch": 0.727031203611367, "grad_norm": 0.13861005008220673, "learning_rate": 0.0005, "loss": 2.0836, "step": 191010 }, { "epoch": 0.7270692660794896, "grad_norm": 0.11692408472299576, "learning_rate": 0.0005, "loss": 2.1043, "step": 191020 }, { "epoch": 0.7271073285476123, "grad_norm": 0.11825072765350342, "learning_rate": 0.0005, "loss": 2.1152, "step": 191030 }, { "epoch": 0.727145391015735, "grad_norm": 0.12770481407642365, "learning_rate": 0.0005, "loss": 2.1008, "step": 191040 }, { "epoch": 0.7271834534838577, "grad_norm": 0.1203337088227272, "learning_rate": 0.0005, "loss": 2.098, "step": 191050 }, { "epoch": 0.7272215159519804, "grad_norm": 0.12706756591796875, "learning_rate": 0.0005, "loss": 2.1031, "step": 191060 }, { "epoch": 0.727259578420103, "grad_norm": 0.13932664692401886, "learning_rate": 0.0005, "loss": 2.1156, "step": 191070 }, { "epoch": 0.7272976408882258, "grad_norm": 0.12633827328681946, "learning_rate": 0.0005, "loss": 2.1056, "step": 191080 }, { "epoch": 0.7273357033563485, "grad_norm": 0.11820350587368011, "learning_rate": 0.0005, "loss": 2.1026, "step": 191090 }, { "epoch": 0.7273737658244711, "grad_norm": 0.12790940701961517, "learning_rate": 0.0005, "loss": 2.1075, "step": 191100 }, { "epoch": 0.7274118282925938, "grad_norm": 0.12736229598522186, "learning_rate": 0.0005, "loss": 2.1069, "step": 191110 }, { "epoch": 0.7274498907607165, "grad_norm": 0.12914036214351654, "learning_rate": 0.0005, "loss": 2.1041, "step": 191120 }, { "epoch": 0.7274879532288392, "grad_norm": 0.13645537197589874, "learning_rate": 0.0005, "loss": 2.117, "step": 191130 }, { "epoch": 0.7275260156969618, "grad_norm": 0.13665220141410828, "learning_rate": 0.0005, "loss": 2.0969, "step": 191140 }, { "epoch": 0.7275640781650845, "grad_norm": 0.12808138132095337, "learning_rate": 0.0005, "loss": 2.1023, "step": 191150 }, { "epoch": 0.7276021406332073, "grad_norm": 0.14374873042106628, "learning_rate": 0.0005, "loss": 2.1029, "step": 191160 }, { "epoch": 0.7276402031013299, "grad_norm": 0.12461627274751663, "learning_rate": 0.0005, "loss": 2.0987, "step": 191170 }, { "epoch": 0.7276782655694526, "grad_norm": 0.12864984571933746, "learning_rate": 0.0005, "loss": 2.1158, "step": 191180 }, { "epoch": 0.7277163280375752, "grad_norm": 0.1112942323088646, "learning_rate": 0.0005, "loss": 2.0927, "step": 191190 }, { "epoch": 0.7277543905056979, "grad_norm": 0.14084668457508087, "learning_rate": 0.0005, "loss": 2.1073, "step": 191200 }, { "epoch": 0.7277924529738207, "grad_norm": 0.12258113920688629, "learning_rate": 0.0005, "loss": 2.0966, "step": 191210 }, { "epoch": 0.7278305154419433, "grad_norm": 0.12442629039287567, "learning_rate": 0.0005, "loss": 2.1138, "step": 191220 }, { "epoch": 0.727868577910066, "grad_norm": 0.12644663453102112, "learning_rate": 0.0005, "loss": 2.107, "step": 191230 }, { "epoch": 0.7279066403781886, "grad_norm": 0.11585810035467148, "learning_rate": 0.0005, "loss": 2.1063, "step": 191240 }, { "epoch": 0.7279447028463114, "grad_norm": 0.14139024913311005, "learning_rate": 0.0005, "loss": 2.1159, "step": 191250 }, { "epoch": 0.7279827653144341, "grad_norm": 0.1261819750070572, "learning_rate": 0.0005, "loss": 2.0942, "step": 191260 }, { "epoch": 0.7280208277825567, "grad_norm": 0.12189679592847824, "learning_rate": 0.0005, "loss": 2.1055, "step": 191270 }, { "epoch": 0.7280588902506794, "grad_norm": 0.12211482971906662, "learning_rate": 0.0005, "loss": 2.0995, "step": 191280 }, { "epoch": 0.7280969527188021, "grad_norm": 0.12543800473213196, "learning_rate": 0.0005, "loss": 2.097, "step": 191290 }, { "epoch": 0.7281350151869248, "grad_norm": 0.13193655014038086, "learning_rate": 0.0005, "loss": 2.1106, "step": 191300 }, { "epoch": 0.7281730776550475, "grad_norm": 0.1276022493839264, "learning_rate": 0.0005, "loss": 2.1154, "step": 191310 }, { "epoch": 0.7282111401231701, "grad_norm": 0.1486976146697998, "learning_rate": 0.0005, "loss": 2.1002, "step": 191320 }, { "epoch": 0.7282492025912928, "grad_norm": 0.1777261197566986, "learning_rate": 0.0005, "loss": 2.0958, "step": 191330 }, { "epoch": 0.7282872650594155, "grad_norm": 0.1220930740237236, "learning_rate": 0.0005, "loss": 2.1075, "step": 191340 }, { "epoch": 0.7283253275275382, "grad_norm": 0.12030477076768875, "learning_rate": 0.0005, "loss": 2.1079, "step": 191350 }, { "epoch": 0.7283633899956609, "grad_norm": 0.14080104231834412, "learning_rate": 0.0005, "loss": 2.0928, "step": 191360 }, { "epoch": 0.7284014524637835, "grad_norm": 0.12594959139823914, "learning_rate": 0.0005, "loss": 2.1106, "step": 191370 }, { "epoch": 0.7284395149319063, "grad_norm": 0.13289976119995117, "learning_rate": 0.0005, "loss": 2.108, "step": 191380 }, { "epoch": 0.7284775774000289, "grad_norm": 0.11373937129974365, "learning_rate": 0.0005, "loss": 2.1183, "step": 191390 }, { "epoch": 0.7285156398681516, "grad_norm": 0.13607096672058105, "learning_rate": 0.0005, "loss": 2.1131, "step": 191400 }, { "epoch": 0.7285537023362743, "grad_norm": 0.11733315140008926, "learning_rate": 0.0005, "loss": 2.1154, "step": 191410 }, { "epoch": 0.728591764804397, "grad_norm": 0.125960573554039, "learning_rate": 0.0005, "loss": 2.1001, "step": 191420 }, { "epoch": 0.7286298272725197, "grad_norm": 0.13192327320575714, "learning_rate": 0.0005, "loss": 2.1207, "step": 191430 }, { "epoch": 0.7286678897406423, "grad_norm": 0.1263810694217682, "learning_rate": 0.0005, "loss": 2.1101, "step": 191440 }, { "epoch": 0.728705952208765, "grad_norm": 0.12355059385299683, "learning_rate": 0.0005, "loss": 2.1102, "step": 191450 }, { "epoch": 0.7287440146768878, "grad_norm": 0.11213410645723343, "learning_rate": 0.0005, "loss": 2.103, "step": 191460 }, { "epoch": 0.7287820771450104, "grad_norm": 0.11975536495447159, "learning_rate": 0.0005, "loss": 2.1041, "step": 191470 }, { "epoch": 0.7288201396131331, "grad_norm": 0.11797386407852173, "learning_rate": 0.0005, "loss": 2.1206, "step": 191480 }, { "epoch": 0.7288582020812557, "grad_norm": 0.12237497419118881, "learning_rate": 0.0005, "loss": 2.1255, "step": 191490 }, { "epoch": 0.7288962645493784, "grad_norm": 0.12381374090909958, "learning_rate": 0.0005, "loss": 2.1149, "step": 191500 }, { "epoch": 0.7289343270175012, "grad_norm": 0.12289374321699142, "learning_rate": 0.0005, "loss": 2.1068, "step": 191510 }, { "epoch": 0.7289723894856238, "grad_norm": 0.12725837528705597, "learning_rate": 0.0005, "loss": 2.1182, "step": 191520 }, { "epoch": 0.7290104519537465, "grad_norm": 0.13120581209659576, "learning_rate": 0.0005, "loss": 2.112, "step": 191530 }, { "epoch": 0.7290485144218691, "grad_norm": 0.12344781309366226, "learning_rate": 0.0005, "loss": 2.1097, "step": 191540 }, { "epoch": 0.7290865768899919, "grad_norm": 0.12666013836860657, "learning_rate": 0.0005, "loss": 2.1198, "step": 191550 }, { "epoch": 0.7291246393581146, "grad_norm": 0.11229068040847778, "learning_rate": 0.0005, "loss": 2.1133, "step": 191560 }, { "epoch": 0.7291627018262372, "grad_norm": 0.1354386806488037, "learning_rate": 0.0005, "loss": 2.1058, "step": 191570 }, { "epoch": 0.7292007642943599, "grad_norm": 0.13093844056129456, "learning_rate": 0.0005, "loss": 2.0934, "step": 191580 }, { "epoch": 0.7292388267624826, "grad_norm": 0.12772110104560852, "learning_rate": 0.0005, "loss": 2.106, "step": 191590 }, { "epoch": 0.7292768892306053, "grad_norm": 0.12142293900251389, "learning_rate": 0.0005, "loss": 2.0933, "step": 191600 }, { "epoch": 0.729314951698728, "grad_norm": 0.1302458941936493, "learning_rate": 0.0005, "loss": 2.1136, "step": 191610 }, { "epoch": 0.7293530141668506, "grad_norm": 0.12662212550640106, "learning_rate": 0.0005, "loss": 2.1212, "step": 191620 }, { "epoch": 0.7293910766349733, "grad_norm": 0.1362551897764206, "learning_rate": 0.0005, "loss": 2.112, "step": 191630 }, { "epoch": 0.729429139103096, "grad_norm": 0.12042704224586487, "learning_rate": 0.0005, "loss": 2.1261, "step": 191640 }, { "epoch": 0.7294672015712187, "grad_norm": 0.13758443295955658, "learning_rate": 0.0005, "loss": 2.104, "step": 191650 }, { "epoch": 0.7295052640393414, "grad_norm": 0.11594793200492859, "learning_rate": 0.0005, "loss": 2.0962, "step": 191660 }, { "epoch": 0.729543326507464, "grad_norm": 0.11860723793506622, "learning_rate": 0.0005, "loss": 2.1001, "step": 191670 }, { "epoch": 0.7295813889755868, "grad_norm": 0.12728294730186462, "learning_rate": 0.0005, "loss": 2.1065, "step": 191680 }, { "epoch": 0.7296194514437094, "grad_norm": 0.12003385275602341, "learning_rate": 0.0005, "loss": 2.1067, "step": 191690 }, { "epoch": 0.7296575139118321, "grad_norm": 0.12386397272348404, "learning_rate": 0.0005, "loss": 2.1018, "step": 191700 }, { "epoch": 0.7296955763799547, "grad_norm": 0.13539811968803406, "learning_rate": 0.0005, "loss": 2.1041, "step": 191710 }, { "epoch": 0.7297336388480775, "grad_norm": 0.12116830050945282, "learning_rate": 0.0005, "loss": 2.1223, "step": 191720 }, { "epoch": 0.7297717013162002, "grad_norm": 0.12179238349199295, "learning_rate": 0.0005, "loss": 2.1079, "step": 191730 }, { "epoch": 0.7298097637843228, "grad_norm": 0.1176956370472908, "learning_rate": 0.0005, "loss": 2.1238, "step": 191740 }, { "epoch": 0.7298478262524455, "grad_norm": 0.12996090948581696, "learning_rate": 0.0005, "loss": 2.098, "step": 191750 }, { "epoch": 0.7298858887205681, "grad_norm": 0.1440000981092453, "learning_rate": 0.0005, "loss": 2.1005, "step": 191760 }, { "epoch": 0.7299239511886909, "grad_norm": 0.11414307355880737, "learning_rate": 0.0005, "loss": 2.1155, "step": 191770 }, { "epoch": 0.7299620136568136, "grad_norm": 0.13091455399990082, "learning_rate": 0.0005, "loss": 2.1223, "step": 191780 }, { "epoch": 0.7300000761249362, "grad_norm": 0.1182379201054573, "learning_rate": 0.0005, "loss": 2.1158, "step": 191790 }, { "epoch": 0.7300381385930589, "grad_norm": 0.13387130200862885, "learning_rate": 0.0005, "loss": 2.1109, "step": 191800 }, { "epoch": 0.7300762010611817, "grad_norm": 0.16108554601669312, "learning_rate": 0.0005, "loss": 2.0841, "step": 191810 }, { "epoch": 0.7301142635293043, "grad_norm": 0.11871182173490524, "learning_rate": 0.0005, "loss": 2.0839, "step": 191820 }, { "epoch": 0.730152325997427, "grad_norm": 0.11011119186878204, "learning_rate": 0.0005, "loss": 2.1179, "step": 191830 }, { "epoch": 0.7301903884655496, "grad_norm": 0.12743213772773743, "learning_rate": 0.0005, "loss": 2.0948, "step": 191840 }, { "epoch": 0.7302284509336724, "grad_norm": 0.1217624694108963, "learning_rate": 0.0005, "loss": 2.1092, "step": 191850 }, { "epoch": 0.730266513401795, "grad_norm": 0.1229822188615799, "learning_rate": 0.0005, "loss": 2.1152, "step": 191860 }, { "epoch": 0.7303045758699177, "grad_norm": 0.11932218819856644, "learning_rate": 0.0005, "loss": 2.1139, "step": 191870 }, { "epoch": 0.7303426383380404, "grad_norm": 0.13336646556854248, "learning_rate": 0.0005, "loss": 2.1095, "step": 191880 }, { "epoch": 0.7303807008061631, "grad_norm": 0.13458584249019623, "learning_rate": 0.0005, "loss": 2.1228, "step": 191890 }, { "epoch": 0.7304187632742858, "grad_norm": 0.1292772889137268, "learning_rate": 0.0005, "loss": 2.0995, "step": 191900 }, { "epoch": 0.7304568257424084, "grad_norm": 0.12842907011508942, "learning_rate": 0.0005, "loss": 2.109, "step": 191910 }, { "epoch": 0.7304948882105311, "grad_norm": 0.11552907526493073, "learning_rate": 0.0005, "loss": 2.0999, "step": 191920 }, { "epoch": 0.7305329506786538, "grad_norm": 0.12777958810329437, "learning_rate": 0.0005, "loss": 2.1069, "step": 191930 }, { "epoch": 0.7305710131467765, "grad_norm": 0.1212586760520935, "learning_rate": 0.0005, "loss": 2.1077, "step": 191940 }, { "epoch": 0.7306090756148992, "grad_norm": 0.13225427269935608, "learning_rate": 0.0005, "loss": 2.1178, "step": 191950 }, { "epoch": 0.7306471380830218, "grad_norm": 0.12566696107387543, "learning_rate": 0.0005, "loss": 2.1143, "step": 191960 }, { "epoch": 0.7306852005511445, "grad_norm": 0.12067980319261551, "learning_rate": 0.0005, "loss": 2.1152, "step": 191970 }, { "epoch": 0.7307232630192673, "grad_norm": 0.11316678673028946, "learning_rate": 0.0005, "loss": 2.1016, "step": 191980 }, { "epoch": 0.7307613254873899, "grad_norm": 0.12638939917087555, "learning_rate": 0.0005, "loss": 2.1012, "step": 191990 }, { "epoch": 0.7307993879555126, "grad_norm": 0.12090083956718445, "learning_rate": 0.0005, "loss": 2.1153, "step": 192000 }, { "epoch": 0.7308374504236352, "grad_norm": 0.1209336370229721, "learning_rate": 0.0005, "loss": 2.117, "step": 192010 }, { "epoch": 0.730875512891758, "grad_norm": 0.12320481240749359, "learning_rate": 0.0005, "loss": 2.1096, "step": 192020 }, { "epoch": 0.7309135753598807, "grad_norm": 0.11362239718437195, "learning_rate": 0.0005, "loss": 2.1101, "step": 192030 }, { "epoch": 0.7309516378280033, "grad_norm": 0.12169143557548523, "learning_rate": 0.0005, "loss": 2.0909, "step": 192040 }, { "epoch": 0.730989700296126, "grad_norm": 0.12608258426189423, "learning_rate": 0.0005, "loss": 2.1016, "step": 192050 }, { "epoch": 0.7310277627642486, "grad_norm": 0.1409205049276352, "learning_rate": 0.0005, "loss": 2.1132, "step": 192060 }, { "epoch": 0.7310658252323714, "grad_norm": 0.11726506054401398, "learning_rate": 0.0005, "loss": 2.1169, "step": 192070 }, { "epoch": 0.7311038877004941, "grad_norm": 0.12257499247789383, "learning_rate": 0.0005, "loss": 2.1096, "step": 192080 }, { "epoch": 0.7311419501686167, "grad_norm": 0.1246478408575058, "learning_rate": 0.0005, "loss": 2.0977, "step": 192090 }, { "epoch": 0.7311800126367394, "grad_norm": 0.11844100058078766, "learning_rate": 0.0005, "loss": 2.0972, "step": 192100 }, { "epoch": 0.7312180751048621, "grad_norm": 0.12616798281669617, "learning_rate": 0.0005, "loss": 2.1224, "step": 192110 }, { "epoch": 0.7312561375729848, "grad_norm": 0.11756548285484314, "learning_rate": 0.0005, "loss": 2.1039, "step": 192120 }, { "epoch": 0.7312942000411075, "grad_norm": 0.15198977291584015, "learning_rate": 0.0005, "loss": 2.1221, "step": 192130 }, { "epoch": 0.7313322625092301, "grad_norm": 0.1201084554195404, "learning_rate": 0.0005, "loss": 2.1195, "step": 192140 }, { "epoch": 0.7313703249773529, "grad_norm": 0.13323737680912018, "learning_rate": 0.0005, "loss": 2.1053, "step": 192150 }, { "epoch": 0.7314083874454755, "grad_norm": 0.12064183503389359, "learning_rate": 0.0005, "loss": 2.1116, "step": 192160 }, { "epoch": 0.7314464499135982, "grad_norm": 0.12921644747257233, "learning_rate": 0.0005, "loss": 2.0976, "step": 192170 }, { "epoch": 0.7314845123817209, "grad_norm": 0.11377564817667007, "learning_rate": 0.0005, "loss": 2.1109, "step": 192180 }, { "epoch": 0.7315225748498435, "grad_norm": 0.11053812503814697, "learning_rate": 0.0005, "loss": 2.1008, "step": 192190 }, { "epoch": 0.7315606373179663, "grad_norm": 0.1232326477766037, "learning_rate": 0.0005, "loss": 2.1049, "step": 192200 }, { "epoch": 0.7315986997860889, "grad_norm": 0.122260183095932, "learning_rate": 0.0005, "loss": 2.1121, "step": 192210 }, { "epoch": 0.7316367622542116, "grad_norm": 0.13045214116573334, "learning_rate": 0.0005, "loss": 2.122, "step": 192220 }, { "epoch": 0.7316748247223342, "grad_norm": 0.11762091517448425, "learning_rate": 0.0005, "loss": 2.1231, "step": 192230 }, { "epoch": 0.731712887190457, "grad_norm": 0.11861058324575424, "learning_rate": 0.0005, "loss": 2.1169, "step": 192240 }, { "epoch": 0.7317509496585797, "grad_norm": 0.12967616319656372, "learning_rate": 0.0005, "loss": 2.1079, "step": 192250 }, { "epoch": 0.7317890121267023, "grad_norm": 0.11592860519886017, "learning_rate": 0.0005, "loss": 2.106, "step": 192260 }, { "epoch": 0.731827074594825, "grad_norm": 0.12144582718610764, "learning_rate": 0.0005, "loss": 2.1126, "step": 192270 }, { "epoch": 0.7318651370629478, "grad_norm": 0.11628725379705429, "learning_rate": 0.0005, "loss": 2.1112, "step": 192280 }, { "epoch": 0.7319031995310704, "grad_norm": 0.11305202543735504, "learning_rate": 0.0005, "loss": 2.1133, "step": 192290 }, { "epoch": 0.7319412619991931, "grad_norm": 0.11752969026565552, "learning_rate": 0.0005, "loss": 2.1076, "step": 192300 }, { "epoch": 0.7319793244673157, "grad_norm": 0.1308397501707077, "learning_rate": 0.0005, "loss": 2.1077, "step": 192310 }, { "epoch": 0.7320173869354385, "grad_norm": 0.13620050251483917, "learning_rate": 0.0005, "loss": 2.0956, "step": 192320 }, { "epoch": 0.7320554494035612, "grad_norm": 0.12767353653907776, "learning_rate": 0.0005, "loss": 2.1139, "step": 192330 }, { "epoch": 0.7320935118716838, "grad_norm": 0.1389073133468628, "learning_rate": 0.0005, "loss": 2.099, "step": 192340 }, { "epoch": 0.7321315743398065, "grad_norm": 0.14815621078014374, "learning_rate": 0.0005, "loss": 2.1067, "step": 192350 }, { "epoch": 0.7321696368079291, "grad_norm": 0.1339462399482727, "learning_rate": 0.0005, "loss": 2.103, "step": 192360 }, { "epoch": 0.7322076992760519, "grad_norm": 0.129747211933136, "learning_rate": 0.0005, "loss": 2.1216, "step": 192370 }, { "epoch": 0.7322457617441746, "grad_norm": 0.13563303649425507, "learning_rate": 0.0005, "loss": 2.1173, "step": 192380 }, { "epoch": 0.7322838242122972, "grad_norm": 0.12491580843925476, "learning_rate": 0.0005, "loss": 2.1068, "step": 192390 }, { "epoch": 0.7323218866804199, "grad_norm": 0.12931863963603973, "learning_rate": 0.0005, "loss": 2.109, "step": 192400 }, { "epoch": 0.7323599491485426, "grad_norm": 0.12038969993591309, "learning_rate": 0.0005, "loss": 2.1073, "step": 192410 }, { "epoch": 0.7323980116166653, "grad_norm": 0.12600909173488617, "learning_rate": 0.0005, "loss": 2.1134, "step": 192420 }, { "epoch": 0.732436074084788, "grad_norm": 0.12822246551513672, "learning_rate": 0.0005, "loss": 2.0965, "step": 192430 }, { "epoch": 0.7324741365529106, "grad_norm": 0.12231694906949997, "learning_rate": 0.0005, "loss": 2.1077, "step": 192440 }, { "epoch": 0.7325121990210334, "grad_norm": 0.12413424998521805, "learning_rate": 0.0005, "loss": 2.0926, "step": 192450 }, { "epoch": 0.732550261489156, "grad_norm": 0.12980525195598602, "learning_rate": 0.0005, "loss": 2.0957, "step": 192460 }, { "epoch": 0.7325883239572787, "grad_norm": 0.11845885217189789, "learning_rate": 0.0005, "loss": 2.097, "step": 192470 }, { "epoch": 0.7326263864254013, "grad_norm": 0.13867278397083282, "learning_rate": 0.0005, "loss": 2.1115, "step": 192480 }, { "epoch": 0.732664448893524, "grad_norm": 0.1332332342863083, "learning_rate": 0.0005, "loss": 2.1278, "step": 192490 }, { "epoch": 0.7327025113616468, "grad_norm": 0.12007120251655579, "learning_rate": 0.0005, "loss": 2.1035, "step": 192500 }, { "epoch": 0.7327405738297694, "grad_norm": 0.14139346778392792, "learning_rate": 0.0005, "loss": 2.0876, "step": 192510 }, { "epoch": 0.7327786362978921, "grad_norm": 0.1176033467054367, "learning_rate": 0.0005, "loss": 2.1009, "step": 192520 }, { "epoch": 0.7328166987660147, "grad_norm": 0.11958561837673187, "learning_rate": 0.0005, "loss": 2.1234, "step": 192530 }, { "epoch": 0.7328547612341375, "grad_norm": 0.11961330473423004, "learning_rate": 0.0005, "loss": 2.1084, "step": 192540 }, { "epoch": 0.7328928237022602, "grad_norm": 0.13144491612911224, "learning_rate": 0.0005, "loss": 2.122, "step": 192550 }, { "epoch": 0.7329308861703828, "grad_norm": 0.12117725610733032, "learning_rate": 0.0005, "loss": 2.1095, "step": 192560 }, { "epoch": 0.7329689486385055, "grad_norm": 0.11846371740102768, "learning_rate": 0.0005, "loss": 2.1057, "step": 192570 }, { "epoch": 0.7330070111066282, "grad_norm": 0.13184702396392822, "learning_rate": 0.0005, "loss": 2.1178, "step": 192580 }, { "epoch": 0.7330450735747509, "grad_norm": 0.13365581631660461, "learning_rate": 0.0005, "loss": 2.094, "step": 192590 }, { "epoch": 0.7330831360428736, "grad_norm": 0.11207521706819534, "learning_rate": 0.0005, "loss": 2.1016, "step": 192600 }, { "epoch": 0.7331211985109962, "grad_norm": 0.1342562884092331, "learning_rate": 0.0005, "loss": 2.1138, "step": 192610 }, { "epoch": 0.7331592609791189, "grad_norm": 0.1263633370399475, "learning_rate": 0.0005, "loss": 2.1147, "step": 192620 }, { "epoch": 0.7331973234472416, "grad_norm": 0.11910117417573929, "learning_rate": 0.0005, "loss": 2.1064, "step": 192630 }, { "epoch": 0.7332353859153643, "grad_norm": 0.12975086271762848, "learning_rate": 0.0005, "loss": 2.1167, "step": 192640 }, { "epoch": 0.733273448383487, "grad_norm": 0.14330074191093445, "learning_rate": 0.0005, "loss": 2.1202, "step": 192650 }, { "epoch": 0.7333115108516096, "grad_norm": 0.1286696344614029, "learning_rate": 0.0005, "loss": 2.0972, "step": 192660 }, { "epoch": 0.7333495733197324, "grad_norm": 0.11552855372428894, "learning_rate": 0.0005, "loss": 2.1004, "step": 192670 }, { "epoch": 0.733387635787855, "grad_norm": 0.12719112634658813, "learning_rate": 0.0005, "loss": 2.1176, "step": 192680 }, { "epoch": 0.7334256982559777, "grad_norm": 0.11824102699756622, "learning_rate": 0.0005, "loss": 2.1061, "step": 192690 }, { "epoch": 0.7334637607241004, "grad_norm": 0.12147907167673111, "learning_rate": 0.0005, "loss": 2.1111, "step": 192700 }, { "epoch": 0.7335018231922231, "grad_norm": 0.11929722875356674, "learning_rate": 0.0005, "loss": 2.1058, "step": 192710 }, { "epoch": 0.7335398856603458, "grad_norm": 0.12859030067920685, "learning_rate": 0.0005, "loss": 2.1102, "step": 192720 }, { "epoch": 0.7335779481284684, "grad_norm": 0.12841321527957916, "learning_rate": 0.0005, "loss": 2.1111, "step": 192730 }, { "epoch": 0.7336160105965911, "grad_norm": 0.11988534033298492, "learning_rate": 0.0005, "loss": 2.1087, "step": 192740 }, { "epoch": 0.7336540730647139, "grad_norm": 0.12679408490657806, "learning_rate": 0.0005, "loss": 2.0997, "step": 192750 }, { "epoch": 0.7336921355328365, "grad_norm": 0.11900787800550461, "learning_rate": 0.0005, "loss": 2.1103, "step": 192760 }, { "epoch": 0.7337301980009592, "grad_norm": 0.12465648353099823, "learning_rate": 0.0005, "loss": 2.108, "step": 192770 }, { "epoch": 0.7337682604690818, "grad_norm": 0.11600150167942047, "learning_rate": 0.0005, "loss": 2.1027, "step": 192780 }, { "epoch": 0.7338063229372045, "grad_norm": 0.12035398185253143, "learning_rate": 0.0005, "loss": 2.1008, "step": 192790 }, { "epoch": 0.7338443854053273, "grad_norm": 0.11685821413993835, "learning_rate": 0.0005, "loss": 2.0968, "step": 192800 }, { "epoch": 0.7338824478734499, "grad_norm": 0.1247807964682579, "learning_rate": 0.0005, "loss": 2.1045, "step": 192810 }, { "epoch": 0.7339205103415726, "grad_norm": 0.12759731709957123, "learning_rate": 0.0005, "loss": 2.1087, "step": 192820 }, { "epoch": 0.7339585728096952, "grad_norm": 0.11301767081022263, "learning_rate": 0.0005, "loss": 2.0982, "step": 192830 }, { "epoch": 0.733996635277818, "grad_norm": 0.11778232455253601, "learning_rate": 0.0005, "loss": 2.1036, "step": 192840 }, { "epoch": 0.7340346977459407, "grad_norm": 0.12188120931386948, "learning_rate": 0.0005, "loss": 2.0961, "step": 192850 }, { "epoch": 0.7340727602140633, "grad_norm": 0.13007090985774994, "learning_rate": 0.0005, "loss": 2.1158, "step": 192860 }, { "epoch": 0.734110822682186, "grad_norm": 0.13528990745544434, "learning_rate": 0.0005, "loss": 2.1295, "step": 192870 }, { "epoch": 0.7341488851503087, "grad_norm": 0.12280694395303726, "learning_rate": 0.0005, "loss": 2.1026, "step": 192880 }, { "epoch": 0.7341869476184314, "grad_norm": 0.12887060642242432, "learning_rate": 0.0005, "loss": 2.0977, "step": 192890 }, { "epoch": 0.734225010086554, "grad_norm": 0.11871284246444702, "learning_rate": 0.0005, "loss": 2.1009, "step": 192900 }, { "epoch": 0.7342630725546767, "grad_norm": 0.111478291451931, "learning_rate": 0.0005, "loss": 2.1049, "step": 192910 }, { "epoch": 0.7343011350227994, "grad_norm": 0.12480342388153076, "learning_rate": 0.0005, "loss": 2.1192, "step": 192920 }, { "epoch": 0.7343391974909221, "grad_norm": 0.11666394770145416, "learning_rate": 0.0005, "loss": 2.1127, "step": 192930 }, { "epoch": 0.7343772599590448, "grad_norm": 0.11283561587333679, "learning_rate": 0.0005, "loss": 2.1169, "step": 192940 }, { "epoch": 0.7344153224271674, "grad_norm": 0.1266782432794571, "learning_rate": 0.0005, "loss": 2.0996, "step": 192950 }, { "epoch": 0.7344533848952901, "grad_norm": 0.1207321286201477, "learning_rate": 0.0005, "loss": 2.1007, "step": 192960 }, { "epoch": 0.7344914473634129, "grad_norm": 0.13289088010787964, "learning_rate": 0.0005, "loss": 2.1111, "step": 192970 }, { "epoch": 0.7345295098315355, "grad_norm": 0.13234733045101166, "learning_rate": 0.0005, "loss": 2.1222, "step": 192980 }, { "epoch": 0.7345675722996582, "grad_norm": 0.13487665355205536, "learning_rate": 0.0005, "loss": 2.1038, "step": 192990 }, { "epoch": 0.7346056347677808, "grad_norm": 0.11529957503080368, "learning_rate": 0.0005, "loss": 2.1049, "step": 193000 }, { "epoch": 0.7346436972359036, "grad_norm": 0.1310238540172577, "learning_rate": 0.0005, "loss": 2.1175, "step": 193010 }, { "epoch": 0.7346817597040263, "grad_norm": 0.1475614756345749, "learning_rate": 0.0005, "loss": 2.1178, "step": 193020 }, { "epoch": 0.7347198221721489, "grad_norm": 0.12483355402946472, "learning_rate": 0.0005, "loss": 2.1136, "step": 193030 }, { "epoch": 0.7347578846402716, "grad_norm": 0.15248079597949982, "learning_rate": 0.0005, "loss": 2.1103, "step": 193040 }, { "epoch": 0.7347959471083942, "grad_norm": 0.12657523155212402, "learning_rate": 0.0005, "loss": 2.1039, "step": 193050 }, { "epoch": 0.734834009576517, "grad_norm": 0.13055965304374695, "learning_rate": 0.0005, "loss": 2.1037, "step": 193060 }, { "epoch": 0.7348720720446397, "grad_norm": 0.12264831364154816, "learning_rate": 0.0005, "loss": 2.104, "step": 193070 }, { "epoch": 0.7349101345127623, "grad_norm": 0.12394838035106659, "learning_rate": 0.0005, "loss": 2.1122, "step": 193080 }, { "epoch": 0.734948196980885, "grad_norm": 0.12351053953170776, "learning_rate": 0.0005, "loss": 2.1079, "step": 193090 }, { "epoch": 0.7349862594490078, "grad_norm": 0.11562132835388184, "learning_rate": 0.0005, "loss": 2.1171, "step": 193100 }, { "epoch": 0.7350243219171304, "grad_norm": 0.12288986146450043, "learning_rate": 0.0005, "loss": 2.1068, "step": 193110 }, { "epoch": 0.7350623843852531, "grad_norm": 0.12032705545425415, "learning_rate": 0.0005, "loss": 2.1113, "step": 193120 }, { "epoch": 0.7351004468533757, "grad_norm": 0.12123995274305344, "learning_rate": 0.0005, "loss": 2.102, "step": 193130 }, { "epoch": 0.7351385093214985, "grad_norm": 0.11481358855962753, "learning_rate": 0.0005, "loss": 2.0922, "step": 193140 }, { "epoch": 0.7351765717896211, "grad_norm": 0.128712460398674, "learning_rate": 0.0005, "loss": 2.107, "step": 193150 }, { "epoch": 0.7352146342577438, "grad_norm": 0.12128186970949173, "learning_rate": 0.0005, "loss": 2.0784, "step": 193160 }, { "epoch": 0.7352526967258665, "grad_norm": 0.12377264350652695, "learning_rate": 0.0005, "loss": 2.1126, "step": 193170 }, { "epoch": 0.7352907591939892, "grad_norm": 0.14269936084747314, "learning_rate": 0.0005, "loss": 2.101, "step": 193180 }, { "epoch": 0.7353288216621119, "grad_norm": 0.12458387017250061, "learning_rate": 0.0005, "loss": 2.1113, "step": 193190 }, { "epoch": 0.7353668841302345, "grad_norm": 0.11986847221851349, "learning_rate": 0.0005, "loss": 2.0962, "step": 193200 }, { "epoch": 0.7354049465983572, "grad_norm": 0.12104091048240662, "learning_rate": 0.0005, "loss": 2.1087, "step": 193210 }, { "epoch": 0.7354430090664799, "grad_norm": 0.13339339196681976, "learning_rate": 0.0005, "loss": 2.1067, "step": 193220 }, { "epoch": 0.7354810715346026, "grad_norm": 0.1323716938495636, "learning_rate": 0.0005, "loss": 2.1047, "step": 193230 }, { "epoch": 0.7355191340027253, "grad_norm": 0.11504301428794861, "learning_rate": 0.0005, "loss": 2.1067, "step": 193240 }, { "epoch": 0.7355571964708479, "grad_norm": 0.11710140109062195, "learning_rate": 0.0005, "loss": 2.1075, "step": 193250 }, { "epoch": 0.7355952589389706, "grad_norm": 0.12178219109773636, "learning_rate": 0.0005, "loss": 2.1004, "step": 193260 }, { "epoch": 0.7356333214070934, "grad_norm": 0.11730635166168213, "learning_rate": 0.0005, "loss": 2.1192, "step": 193270 }, { "epoch": 0.735671383875216, "grad_norm": 0.12166262418031693, "learning_rate": 0.0005, "loss": 2.1002, "step": 193280 }, { "epoch": 0.7357094463433387, "grad_norm": 0.13376054167747498, "learning_rate": 0.0005, "loss": 2.115, "step": 193290 }, { "epoch": 0.7357475088114613, "grad_norm": 0.11151111125946045, "learning_rate": 0.0005, "loss": 2.1076, "step": 193300 }, { "epoch": 0.7357855712795841, "grad_norm": 0.11726437509059906, "learning_rate": 0.0005, "loss": 2.1244, "step": 193310 }, { "epoch": 0.7358236337477068, "grad_norm": 0.12291442602872849, "learning_rate": 0.0005, "loss": 2.1064, "step": 193320 }, { "epoch": 0.7358616962158294, "grad_norm": 0.12281958758831024, "learning_rate": 0.0005, "loss": 2.1066, "step": 193330 }, { "epoch": 0.7358997586839521, "grad_norm": 0.13164712488651276, "learning_rate": 0.0005, "loss": 2.1026, "step": 193340 }, { "epoch": 0.7359378211520747, "grad_norm": 0.12434583902359009, "learning_rate": 0.0005, "loss": 2.1122, "step": 193350 }, { "epoch": 0.7359758836201975, "grad_norm": 0.12332146614789963, "learning_rate": 0.0005, "loss": 2.1094, "step": 193360 }, { "epoch": 0.7360139460883202, "grad_norm": 0.12583640217781067, "learning_rate": 0.0005, "loss": 2.1056, "step": 193370 }, { "epoch": 0.7360520085564428, "grad_norm": 0.1278449147939682, "learning_rate": 0.0005, "loss": 2.1144, "step": 193380 }, { "epoch": 0.7360900710245655, "grad_norm": 0.12696930766105652, "learning_rate": 0.0005, "loss": 2.107, "step": 193390 }, { "epoch": 0.7361281334926882, "grad_norm": 0.11661224067211151, "learning_rate": 0.0005, "loss": 2.1047, "step": 193400 }, { "epoch": 0.7361661959608109, "grad_norm": 0.13193956017494202, "learning_rate": 0.0005, "loss": 2.1177, "step": 193410 }, { "epoch": 0.7362042584289336, "grad_norm": 0.1422918438911438, "learning_rate": 0.0005, "loss": 2.1114, "step": 193420 }, { "epoch": 0.7362423208970562, "grad_norm": 0.12093400210142136, "learning_rate": 0.0005, "loss": 2.1162, "step": 193430 }, { "epoch": 0.736280383365179, "grad_norm": 0.11584337800741196, "learning_rate": 0.0005, "loss": 2.1016, "step": 193440 }, { "epoch": 0.7363184458333016, "grad_norm": 0.1224224641919136, "learning_rate": 0.0005, "loss": 2.1181, "step": 193450 }, { "epoch": 0.7363565083014243, "grad_norm": 0.1242123395204544, "learning_rate": 0.0005, "loss": 2.0986, "step": 193460 }, { "epoch": 0.736394570769547, "grad_norm": 0.13345323503017426, "learning_rate": 0.0005, "loss": 2.1129, "step": 193470 }, { "epoch": 0.7364326332376696, "grad_norm": 0.1371457725763321, "learning_rate": 0.0005, "loss": 2.1152, "step": 193480 }, { "epoch": 0.7364706957057924, "grad_norm": 0.12954550981521606, "learning_rate": 0.0005, "loss": 2.0934, "step": 193490 }, { "epoch": 0.736508758173915, "grad_norm": 0.12712004780769348, "learning_rate": 0.0005, "loss": 2.0927, "step": 193500 }, { "epoch": 0.7365468206420377, "grad_norm": 0.12469438463449478, "learning_rate": 0.0005, "loss": 2.1054, "step": 193510 }, { "epoch": 0.7365848831101603, "grad_norm": 0.12666510045528412, "learning_rate": 0.0005, "loss": 2.1037, "step": 193520 }, { "epoch": 0.7366229455782831, "grad_norm": 0.12624983489513397, "learning_rate": 0.0005, "loss": 2.0918, "step": 193530 }, { "epoch": 0.7366610080464058, "grad_norm": 0.12753058969974518, "learning_rate": 0.0005, "loss": 2.1112, "step": 193540 }, { "epoch": 0.7366990705145284, "grad_norm": 0.1122048869729042, "learning_rate": 0.0005, "loss": 2.0855, "step": 193550 }, { "epoch": 0.7367371329826511, "grad_norm": 0.12347835302352905, "learning_rate": 0.0005, "loss": 2.1077, "step": 193560 }, { "epoch": 0.7367751954507739, "grad_norm": 0.12568634748458862, "learning_rate": 0.0005, "loss": 2.1098, "step": 193570 }, { "epoch": 0.7368132579188965, "grad_norm": 0.11385814845561981, "learning_rate": 0.0005, "loss": 2.1113, "step": 193580 }, { "epoch": 0.7368513203870192, "grad_norm": 0.13868993520736694, "learning_rate": 0.0005, "loss": 2.1013, "step": 193590 }, { "epoch": 0.7368893828551418, "grad_norm": 0.1321054995059967, "learning_rate": 0.0005, "loss": 2.1047, "step": 193600 }, { "epoch": 0.7369274453232646, "grad_norm": 0.12079103291034698, "learning_rate": 0.0005, "loss": 2.1058, "step": 193610 }, { "epoch": 0.7369655077913873, "grad_norm": 0.11624564230442047, "learning_rate": 0.0005, "loss": 2.1163, "step": 193620 }, { "epoch": 0.7370035702595099, "grad_norm": 0.13603192567825317, "learning_rate": 0.0005, "loss": 2.1035, "step": 193630 }, { "epoch": 0.7370416327276326, "grad_norm": 0.1264621466398239, "learning_rate": 0.0005, "loss": 2.1012, "step": 193640 }, { "epoch": 0.7370796951957552, "grad_norm": 0.11997373402118683, "learning_rate": 0.0005, "loss": 2.1192, "step": 193650 }, { "epoch": 0.737117757663878, "grad_norm": 0.1271255761384964, "learning_rate": 0.0005, "loss": 2.1021, "step": 193660 }, { "epoch": 0.7371558201320006, "grad_norm": 0.13099275529384613, "learning_rate": 0.0005, "loss": 2.1006, "step": 193670 }, { "epoch": 0.7371938826001233, "grad_norm": 0.11465470492839813, "learning_rate": 0.0005, "loss": 2.092, "step": 193680 }, { "epoch": 0.737231945068246, "grad_norm": 0.13196159899234772, "learning_rate": 0.0005, "loss": 2.1119, "step": 193690 }, { "epoch": 0.7372700075363687, "grad_norm": 0.11341453343629837, "learning_rate": 0.0005, "loss": 2.111, "step": 193700 }, { "epoch": 0.7373080700044914, "grad_norm": 0.12351559102535248, "learning_rate": 0.0005, "loss": 2.1221, "step": 193710 }, { "epoch": 0.737346132472614, "grad_norm": 0.12498927861452103, "learning_rate": 0.0005, "loss": 2.1068, "step": 193720 }, { "epoch": 0.7373841949407367, "grad_norm": 0.11956217139959335, "learning_rate": 0.0005, "loss": 2.1008, "step": 193730 }, { "epoch": 0.7374222574088595, "grad_norm": 0.1368047147989273, "learning_rate": 0.0005, "loss": 2.117, "step": 193740 }, { "epoch": 0.7374603198769821, "grad_norm": 0.12249314039945602, "learning_rate": 0.0005, "loss": 2.1097, "step": 193750 }, { "epoch": 0.7374983823451048, "grad_norm": 0.1328524500131607, "learning_rate": 0.0005, "loss": 2.1003, "step": 193760 }, { "epoch": 0.7375364448132274, "grad_norm": 0.23825938999652863, "learning_rate": 0.0005, "loss": 2.1102, "step": 193770 }, { "epoch": 0.7375745072813501, "grad_norm": 0.11991281062364578, "learning_rate": 0.0005, "loss": 2.1079, "step": 193780 }, { "epoch": 0.7376125697494729, "grad_norm": 0.12263292819261551, "learning_rate": 0.0005, "loss": 2.104, "step": 193790 }, { "epoch": 0.7376506322175955, "grad_norm": 0.10938142985105515, "learning_rate": 0.0005, "loss": 2.0997, "step": 193800 }, { "epoch": 0.7376886946857182, "grad_norm": 0.12292012572288513, "learning_rate": 0.0005, "loss": 2.1226, "step": 193810 }, { "epoch": 0.7377267571538408, "grad_norm": 0.13027265667915344, "learning_rate": 0.0005, "loss": 2.1216, "step": 193820 }, { "epoch": 0.7377648196219636, "grad_norm": 0.13232482969760895, "learning_rate": 0.0005, "loss": 2.1076, "step": 193830 }, { "epoch": 0.7378028820900863, "grad_norm": 0.12443753331899643, "learning_rate": 0.0005, "loss": 2.1115, "step": 193840 }, { "epoch": 0.7378409445582089, "grad_norm": 0.11491934210062027, "learning_rate": 0.0005, "loss": 2.0982, "step": 193850 }, { "epoch": 0.7378790070263316, "grad_norm": 0.11515094339847565, "learning_rate": 0.0005, "loss": 2.1057, "step": 193860 }, { "epoch": 0.7379170694944543, "grad_norm": 0.11938028037548065, "learning_rate": 0.0005, "loss": 2.0905, "step": 193870 }, { "epoch": 0.737955131962577, "grad_norm": 0.13776546716690063, "learning_rate": 0.0005, "loss": 2.1172, "step": 193880 }, { "epoch": 0.7379931944306997, "grad_norm": 0.12095969170331955, "learning_rate": 0.0005, "loss": 2.0996, "step": 193890 }, { "epoch": 0.7380312568988223, "grad_norm": 0.1486758291721344, "learning_rate": 0.0005, "loss": 2.1257, "step": 193900 }, { "epoch": 0.738069319366945, "grad_norm": 0.12475045770406723, "learning_rate": 0.0005, "loss": 2.1047, "step": 193910 }, { "epoch": 0.7381073818350677, "grad_norm": 0.12161503732204437, "learning_rate": 0.0005, "loss": 2.1129, "step": 193920 }, { "epoch": 0.7381454443031904, "grad_norm": 0.13839933276176453, "learning_rate": 0.0005, "loss": 2.1189, "step": 193930 }, { "epoch": 0.7381835067713131, "grad_norm": 0.13922223448753357, "learning_rate": 0.0005, "loss": 2.1068, "step": 193940 }, { "epoch": 0.7382215692394357, "grad_norm": 0.12621387839317322, "learning_rate": 0.0005, "loss": 2.1027, "step": 193950 }, { "epoch": 0.7382596317075585, "grad_norm": 0.12859176099300385, "learning_rate": 0.0005, "loss": 2.1193, "step": 193960 }, { "epoch": 0.7382976941756811, "grad_norm": 0.12133989483118057, "learning_rate": 0.0005, "loss": 2.1008, "step": 193970 }, { "epoch": 0.7383357566438038, "grad_norm": 0.11736427992582321, "learning_rate": 0.0005, "loss": 2.101, "step": 193980 }, { "epoch": 0.7383738191119265, "grad_norm": 0.12446961551904678, "learning_rate": 0.0005, "loss": 2.1067, "step": 193990 }, { "epoch": 0.7384118815800492, "grad_norm": 0.1348889321088791, "learning_rate": 0.0005, "loss": 2.1002, "step": 194000 }, { "epoch": 0.7384499440481719, "grad_norm": 0.12804125249385834, "learning_rate": 0.0005, "loss": 2.1013, "step": 194010 }, { "epoch": 0.7384880065162945, "grad_norm": 0.11841566115617752, "learning_rate": 0.0005, "loss": 2.1036, "step": 194020 }, { "epoch": 0.7385260689844172, "grad_norm": 0.12618225812911987, "learning_rate": 0.0005, "loss": 2.1038, "step": 194030 }, { "epoch": 0.73856413145254, "grad_norm": 0.14122790098190308, "learning_rate": 0.0005, "loss": 2.1035, "step": 194040 }, { "epoch": 0.7386021939206626, "grad_norm": 0.12055061757564545, "learning_rate": 0.0005, "loss": 2.1085, "step": 194050 }, { "epoch": 0.7386402563887853, "grad_norm": 0.11497969925403595, "learning_rate": 0.0005, "loss": 2.1041, "step": 194060 }, { "epoch": 0.7386783188569079, "grad_norm": 0.11820774525403976, "learning_rate": 0.0005, "loss": 2.1194, "step": 194070 }, { "epoch": 0.7387163813250306, "grad_norm": 0.14072273671627045, "learning_rate": 0.0005, "loss": 2.0959, "step": 194080 }, { "epoch": 0.7387544437931534, "grad_norm": 0.12891046702861786, "learning_rate": 0.0005, "loss": 2.1263, "step": 194090 }, { "epoch": 0.738792506261276, "grad_norm": 0.1222953349351883, "learning_rate": 0.0005, "loss": 2.0974, "step": 194100 }, { "epoch": 0.7388305687293987, "grad_norm": 0.12340711802244186, "learning_rate": 0.0005, "loss": 2.1037, "step": 194110 }, { "epoch": 0.7388686311975213, "grad_norm": 0.13871681690216064, "learning_rate": 0.0005, "loss": 2.107, "step": 194120 }, { "epoch": 0.7389066936656441, "grad_norm": 0.18659791350364685, "learning_rate": 0.0005, "loss": 2.1212, "step": 194130 }, { "epoch": 0.7389447561337668, "grad_norm": 0.12714652717113495, "learning_rate": 0.0005, "loss": 2.1146, "step": 194140 }, { "epoch": 0.7389828186018894, "grad_norm": 0.11733846366405487, "learning_rate": 0.0005, "loss": 2.1034, "step": 194150 }, { "epoch": 0.7390208810700121, "grad_norm": 0.11640311032533646, "learning_rate": 0.0005, "loss": 2.1136, "step": 194160 }, { "epoch": 0.7390589435381348, "grad_norm": 0.12715081870555878, "learning_rate": 0.0005, "loss": 2.1015, "step": 194170 }, { "epoch": 0.7390970060062575, "grad_norm": 0.12195795029401779, "learning_rate": 0.0005, "loss": 2.1055, "step": 194180 }, { "epoch": 0.7391350684743802, "grad_norm": 0.13097110390663147, "learning_rate": 0.0005, "loss": 2.1055, "step": 194190 }, { "epoch": 0.7391731309425028, "grad_norm": 0.13842256367206573, "learning_rate": 0.0005, "loss": 2.098, "step": 194200 }, { "epoch": 0.7392111934106255, "grad_norm": 0.12363022565841675, "learning_rate": 0.0005, "loss": 2.1074, "step": 194210 }, { "epoch": 0.7392492558787482, "grad_norm": 0.12613829970359802, "learning_rate": 0.0005, "loss": 2.1003, "step": 194220 }, { "epoch": 0.7392873183468709, "grad_norm": 0.12938351929187775, "learning_rate": 0.0005, "loss": 2.1037, "step": 194230 }, { "epoch": 0.7393253808149935, "grad_norm": 0.13193729519844055, "learning_rate": 0.0005, "loss": 2.1018, "step": 194240 }, { "epoch": 0.7393634432831162, "grad_norm": 0.1291109174489975, "learning_rate": 0.0005, "loss": 2.1062, "step": 194250 }, { "epoch": 0.739401505751239, "grad_norm": 0.13131606578826904, "learning_rate": 0.0005, "loss": 2.1047, "step": 194260 }, { "epoch": 0.7394395682193616, "grad_norm": 0.12098632752895355, "learning_rate": 0.0005, "loss": 2.101, "step": 194270 }, { "epoch": 0.7394776306874843, "grad_norm": 0.121994249522686, "learning_rate": 0.0005, "loss": 2.104, "step": 194280 }, { "epoch": 0.739515693155607, "grad_norm": 0.12419252097606659, "learning_rate": 0.0005, "loss": 2.1214, "step": 194290 }, { "epoch": 0.7395537556237297, "grad_norm": 0.1346055269241333, "learning_rate": 0.0005, "loss": 2.1153, "step": 194300 }, { "epoch": 0.7395918180918524, "grad_norm": 0.128359854221344, "learning_rate": 0.0005, "loss": 2.0956, "step": 194310 }, { "epoch": 0.739629880559975, "grad_norm": 0.14567787945270538, "learning_rate": 0.0005, "loss": 2.1049, "step": 194320 }, { "epoch": 0.7396679430280977, "grad_norm": 0.12489699572324753, "learning_rate": 0.0005, "loss": 2.0941, "step": 194330 }, { "epoch": 0.7397060054962203, "grad_norm": 0.1287219524383545, "learning_rate": 0.0005, "loss": 2.101, "step": 194340 }, { "epoch": 0.7397440679643431, "grad_norm": 0.13634465634822845, "learning_rate": 0.0005, "loss": 2.1175, "step": 194350 }, { "epoch": 0.7397821304324658, "grad_norm": 0.11655566096305847, "learning_rate": 0.0005, "loss": 2.1101, "step": 194360 }, { "epoch": 0.7398201929005884, "grad_norm": 0.11334828287363052, "learning_rate": 0.0005, "loss": 2.092, "step": 194370 }, { "epoch": 0.7398582553687111, "grad_norm": 0.12410476803779602, "learning_rate": 0.0005, "loss": 2.0999, "step": 194380 }, { "epoch": 0.7398963178368339, "grad_norm": 0.12430357187986374, "learning_rate": 0.0005, "loss": 2.1084, "step": 194390 }, { "epoch": 0.7399343803049565, "grad_norm": 0.1274576187133789, "learning_rate": 0.0005, "loss": 2.1065, "step": 194400 }, { "epoch": 0.7399724427730792, "grad_norm": 0.12407605350017548, "learning_rate": 0.0005, "loss": 2.1121, "step": 194410 }, { "epoch": 0.7400105052412018, "grad_norm": 0.13608965277671814, "learning_rate": 0.0005, "loss": 2.0964, "step": 194420 }, { "epoch": 0.7400485677093246, "grad_norm": 0.1341804713010788, "learning_rate": 0.0005, "loss": 2.1102, "step": 194430 }, { "epoch": 0.7400866301774472, "grad_norm": 0.1318841278553009, "learning_rate": 0.0005, "loss": 2.1071, "step": 194440 }, { "epoch": 0.7401246926455699, "grad_norm": 0.11794579029083252, "learning_rate": 0.0005, "loss": 2.1111, "step": 194450 }, { "epoch": 0.7401627551136926, "grad_norm": 0.1288701295852661, "learning_rate": 0.0005, "loss": 2.1125, "step": 194460 }, { "epoch": 0.7402008175818153, "grad_norm": 0.11914876848459244, "learning_rate": 0.0005, "loss": 2.105, "step": 194470 }, { "epoch": 0.740238880049938, "grad_norm": 0.13467030227184296, "learning_rate": 0.0005, "loss": 2.1077, "step": 194480 }, { "epoch": 0.7402769425180606, "grad_norm": 0.14675983786582947, "learning_rate": 0.0005, "loss": 2.1168, "step": 194490 }, { "epoch": 0.7403150049861833, "grad_norm": 0.12259969860315323, "learning_rate": 0.0005, "loss": 2.1011, "step": 194500 }, { "epoch": 0.740353067454306, "grad_norm": 0.12208813428878784, "learning_rate": 0.0005, "loss": 2.118, "step": 194510 }, { "epoch": 0.7403911299224287, "grad_norm": 0.12095604836940765, "learning_rate": 0.0005, "loss": 2.0999, "step": 194520 }, { "epoch": 0.7404291923905514, "grad_norm": 0.12412271648645401, "learning_rate": 0.0005, "loss": 2.1014, "step": 194530 }, { "epoch": 0.740467254858674, "grad_norm": 0.11599559336900711, "learning_rate": 0.0005, "loss": 2.1127, "step": 194540 }, { "epoch": 0.7405053173267967, "grad_norm": 0.1328132301568985, "learning_rate": 0.0005, "loss": 2.1084, "step": 194550 }, { "epoch": 0.7405433797949195, "grad_norm": 0.12443557381629944, "learning_rate": 0.0005, "loss": 2.1139, "step": 194560 }, { "epoch": 0.7405814422630421, "grad_norm": 0.11573288589715958, "learning_rate": 0.0005, "loss": 2.1016, "step": 194570 }, { "epoch": 0.7406195047311648, "grad_norm": 0.1223871111869812, "learning_rate": 0.0005, "loss": 2.1151, "step": 194580 }, { "epoch": 0.7406575671992874, "grad_norm": 0.13990621268749237, "learning_rate": 0.0005, "loss": 2.1, "step": 194590 }, { "epoch": 0.7406956296674102, "grad_norm": 0.1179359182715416, "learning_rate": 0.0005, "loss": 2.1073, "step": 194600 }, { "epoch": 0.7407336921355329, "grad_norm": 0.11926417052745819, "learning_rate": 0.0005, "loss": 2.1002, "step": 194610 }, { "epoch": 0.7407717546036555, "grad_norm": 0.12110884487628937, "learning_rate": 0.0005, "loss": 2.1215, "step": 194620 }, { "epoch": 0.7408098170717782, "grad_norm": 0.12658652663230896, "learning_rate": 0.0005, "loss": 2.1026, "step": 194630 }, { "epoch": 0.7408478795399008, "grad_norm": 0.125624418258667, "learning_rate": 0.0005, "loss": 2.1127, "step": 194640 }, { "epoch": 0.7408859420080236, "grad_norm": 0.1293461173772812, "learning_rate": 0.0005, "loss": 2.0944, "step": 194650 }, { "epoch": 0.7409240044761463, "grad_norm": 0.1234101727604866, "learning_rate": 0.0005, "loss": 2.1134, "step": 194660 }, { "epoch": 0.7409620669442689, "grad_norm": 0.1319395899772644, "learning_rate": 0.0005, "loss": 2.1139, "step": 194670 }, { "epoch": 0.7410001294123916, "grad_norm": 0.12228941917419434, "learning_rate": 0.0005, "loss": 2.108, "step": 194680 }, { "epoch": 0.7410381918805143, "grad_norm": 0.13420431315898895, "learning_rate": 0.0005, "loss": 2.1021, "step": 194690 }, { "epoch": 0.741076254348637, "grad_norm": 0.12183579057455063, "learning_rate": 0.0005, "loss": 2.1126, "step": 194700 }, { "epoch": 0.7411143168167597, "grad_norm": 0.13289429247379303, "learning_rate": 0.0005, "loss": 2.1059, "step": 194710 }, { "epoch": 0.7411523792848823, "grad_norm": 0.1252071112394333, "learning_rate": 0.0005, "loss": 2.1105, "step": 194720 }, { "epoch": 0.7411904417530051, "grad_norm": 0.13454918563365936, "learning_rate": 0.0005, "loss": 2.104, "step": 194730 }, { "epoch": 0.7412285042211277, "grad_norm": 0.11970204859972, "learning_rate": 0.0005, "loss": 2.1048, "step": 194740 }, { "epoch": 0.7412665666892504, "grad_norm": 0.1275210827589035, "learning_rate": 0.0005, "loss": 2.11, "step": 194750 }, { "epoch": 0.741304629157373, "grad_norm": 0.1155448704957962, "learning_rate": 0.0005, "loss": 2.1134, "step": 194760 }, { "epoch": 0.7413426916254957, "grad_norm": 0.7521626949310303, "learning_rate": 0.0005, "loss": 2.1072, "step": 194770 }, { "epoch": 0.7413807540936185, "grad_norm": 0.1261117309331894, "learning_rate": 0.0005, "loss": 2.1015, "step": 194780 }, { "epoch": 0.7414188165617411, "grad_norm": 0.11743960529565811, "learning_rate": 0.0005, "loss": 2.1128, "step": 194790 }, { "epoch": 0.7414568790298638, "grad_norm": 0.11156170070171356, "learning_rate": 0.0005, "loss": 2.1058, "step": 194800 }, { "epoch": 0.7414949414979864, "grad_norm": 0.12148051708936691, "learning_rate": 0.0005, "loss": 2.0943, "step": 194810 }, { "epoch": 0.7415330039661092, "grad_norm": 0.12265943735837936, "learning_rate": 0.0005, "loss": 2.1043, "step": 194820 }, { "epoch": 0.7415710664342319, "grad_norm": 0.12968555092811584, "learning_rate": 0.0005, "loss": 2.1122, "step": 194830 }, { "epoch": 0.7416091289023545, "grad_norm": 0.1303674876689911, "learning_rate": 0.0005, "loss": 2.0982, "step": 194840 }, { "epoch": 0.7416471913704772, "grad_norm": 0.12468607723712921, "learning_rate": 0.0005, "loss": 2.1057, "step": 194850 }, { "epoch": 0.7416852538386, "grad_norm": 0.1342761218547821, "learning_rate": 0.0005, "loss": 2.1105, "step": 194860 }, { "epoch": 0.7417233163067226, "grad_norm": 0.12461252510547638, "learning_rate": 0.0005, "loss": 2.1068, "step": 194870 }, { "epoch": 0.7417613787748453, "grad_norm": 0.13232353329658508, "learning_rate": 0.0005, "loss": 2.0986, "step": 194880 }, { "epoch": 0.7417994412429679, "grad_norm": 0.10807273536920547, "learning_rate": 0.0005, "loss": 2.1013, "step": 194890 }, { "epoch": 0.7418375037110907, "grad_norm": 0.12074726819992065, "learning_rate": 0.0005, "loss": 2.1085, "step": 194900 }, { "epoch": 0.7418755661792134, "grad_norm": 0.13266825675964355, "learning_rate": 0.0005, "loss": 2.1086, "step": 194910 }, { "epoch": 0.741913628647336, "grad_norm": 0.17095763981342316, "learning_rate": 0.0005, "loss": 2.0993, "step": 194920 }, { "epoch": 0.7419516911154587, "grad_norm": 0.1295676976442337, "learning_rate": 0.0005, "loss": 2.1113, "step": 194930 }, { "epoch": 0.7419897535835813, "grad_norm": 0.1498701125383377, "learning_rate": 0.0005, "loss": 2.1113, "step": 194940 }, { "epoch": 0.7420278160517041, "grad_norm": 0.11569618433713913, "learning_rate": 0.0005, "loss": 2.1159, "step": 194950 }, { "epoch": 0.7420658785198267, "grad_norm": 0.1568787395954132, "learning_rate": 0.0005, "loss": 2.101, "step": 194960 }, { "epoch": 0.7421039409879494, "grad_norm": 0.11845368146896362, "learning_rate": 0.0005, "loss": 2.0849, "step": 194970 }, { "epoch": 0.7421420034560721, "grad_norm": 0.1119331493973732, "learning_rate": 0.0005, "loss": 2.1079, "step": 194980 }, { "epoch": 0.7421800659241948, "grad_norm": 0.12699395418167114, "learning_rate": 0.0005, "loss": 2.0995, "step": 194990 }, { "epoch": 0.7422181283923175, "grad_norm": 0.112799733877182, "learning_rate": 0.0005, "loss": 2.0924, "step": 195000 }, { "epoch": 0.7422561908604401, "grad_norm": 0.12144352495670319, "learning_rate": 0.0005, "loss": 2.1054, "step": 195010 }, { "epoch": 0.7422942533285628, "grad_norm": 0.131864532828331, "learning_rate": 0.0005, "loss": 2.1074, "step": 195020 }, { "epoch": 0.7423323157966856, "grad_norm": 0.12389921396970749, "learning_rate": 0.0005, "loss": 2.111, "step": 195030 }, { "epoch": 0.7423703782648082, "grad_norm": 0.11010252684354782, "learning_rate": 0.0005, "loss": 2.113, "step": 195040 }, { "epoch": 0.7424084407329309, "grad_norm": 0.11864927411079407, "learning_rate": 0.0005, "loss": 2.1203, "step": 195050 }, { "epoch": 0.7424465032010535, "grad_norm": 0.11730501055717468, "learning_rate": 0.0005, "loss": 2.1025, "step": 195060 }, { "epoch": 0.7424845656691762, "grad_norm": 0.11623819172382355, "learning_rate": 0.0005, "loss": 2.1082, "step": 195070 }, { "epoch": 0.742522628137299, "grad_norm": 0.12447119504213333, "learning_rate": 0.0005, "loss": 2.0924, "step": 195080 }, { "epoch": 0.7425606906054216, "grad_norm": 0.11485525965690613, "learning_rate": 0.0005, "loss": 2.1194, "step": 195090 }, { "epoch": 0.7425987530735443, "grad_norm": 0.12632080912590027, "learning_rate": 0.0005, "loss": 2.112, "step": 195100 }, { "epoch": 0.7426368155416669, "grad_norm": 0.12295328080654144, "learning_rate": 0.0005, "loss": 2.1005, "step": 195110 }, { "epoch": 0.7426748780097897, "grad_norm": 0.14133335649967194, "learning_rate": 0.0005, "loss": 2.1051, "step": 195120 }, { "epoch": 0.7427129404779124, "grad_norm": 0.1315918266773224, "learning_rate": 0.0005, "loss": 2.1028, "step": 195130 }, { "epoch": 0.742751002946035, "grad_norm": 0.11096256226301193, "learning_rate": 0.0005, "loss": 2.1032, "step": 195140 }, { "epoch": 0.7427890654141577, "grad_norm": 0.1371743530035019, "learning_rate": 0.0005, "loss": 2.105, "step": 195150 }, { "epoch": 0.7428271278822804, "grad_norm": 0.13684628903865814, "learning_rate": 0.0005, "loss": 2.1081, "step": 195160 }, { "epoch": 0.7428651903504031, "grad_norm": 0.11548829823732376, "learning_rate": 0.0005, "loss": 2.0895, "step": 195170 }, { "epoch": 0.7429032528185258, "grad_norm": 0.12377560883760452, "learning_rate": 0.0005, "loss": 2.1199, "step": 195180 }, { "epoch": 0.7429413152866484, "grad_norm": 0.12421797215938568, "learning_rate": 0.0005, "loss": 2.1136, "step": 195190 }, { "epoch": 0.7429793777547711, "grad_norm": 0.11980584263801575, "learning_rate": 0.0005, "loss": 2.1023, "step": 195200 }, { "epoch": 0.7430174402228938, "grad_norm": 0.1434640884399414, "learning_rate": 0.0005, "loss": 2.1057, "step": 195210 }, { "epoch": 0.7430555026910165, "grad_norm": 0.13627471029758453, "learning_rate": 0.0005, "loss": 2.1192, "step": 195220 }, { "epoch": 0.7430935651591392, "grad_norm": 0.12290006130933762, "learning_rate": 0.0005, "loss": 2.1003, "step": 195230 }, { "epoch": 0.7431316276272618, "grad_norm": 0.12723302841186523, "learning_rate": 0.0005, "loss": 2.1084, "step": 195240 }, { "epoch": 0.7431696900953846, "grad_norm": 0.1370949000120163, "learning_rate": 0.0005, "loss": 2.1045, "step": 195250 }, { "epoch": 0.7432077525635072, "grad_norm": 0.1215818002820015, "learning_rate": 0.0005, "loss": 2.0992, "step": 195260 }, { "epoch": 0.7432458150316299, "grad_norm": 0.1530386060476303, "learning_rate": 0.0005, "loss": 2.1076, "step": 195270 }, { "epoch": 0.7432838774997526, "grad_norm": 0.13338083028793335, "learning_rate": 0.0005, "loss": 2.1022, "step": 195280 }, { "epoch": 0.7433219399678753, "grad_norm": 0.12526699900627136, "learning_rate": 0.0005, "loss": 2.1027, "step": 195290 }, { "epoch": 0.743360002435998, "grad_norm": 0.11876381933689117, "learning_rate": 0.0005, "loss": 2.0965, "step": 195300 }, { "epoch": 0.7433980649041206, "grad_norm": 0.1208324059844017, "learning_rate": 0.0005, "loss": 2.1071, "step": 195310 }, { "epoch": 0.7434361273722433, "grad_norm": 0.1263258308172226, "learning_rate": 0.0005, "loss": 2.1198, "step": 195320 }, { "epoch": 0.7434741898403661, "grad_norm": 0.12312033027410507, "learning_rate": 0.0005, "loss": 2.1123, "step": 195330 }, { "epoch": 0.7435122523084887, "grad_norm": 0.11395483464002609, "learning_rate": 0.0005, "loss": 2.1032, "step": 195340 }, { "epoch": 0.7435503147766114, "grad_norm": 0.13589192926883698, "learning_rate": 0.0005, "loss": 2.1125, "step": 195350 }, { "epoch": 0.743588377244734, "grad_norm": 0.1306493729352951, "learning_rate": 0.0005, "loss": 2.0964, "step": 195360 }, { "epoch": 0.7436264397128567, "grad_norm": 0.1288546621799469, "learning_rate": 0.0005, "loss": 2.1146, "step": 195370 }, { "epoch": 0.7436645021809795, "grad_norm": 0.13180427253246307, "learning_rate": 0.0005, "loss": 2.1012, "step": 195380 }, { "epoch": 0.7437025646491021, "grad_norm": 0.11989396065473557, "learning_rate": 0.0005, "loss": 2.1217, "step": 195390 }, { "epoch": 0.7437406271172248, "grad_norm": 0.11841694265604019, "learning_rate": 0.0005, "loss": 2.1115, "step": 195400 }, { "epoch": 0.7437786895853474, "grad_norm": 0.12703417241573334, "learning_rate": 0.0005, "loss": 2.1077, "step": 195410 }, { "epoch": 0.7438167520534702, "grad_norm": 0.12518304586410522, "learning_rate": 0.0005, "loss": 2.1007, "step": 195420 }, { "epoch": 0.7438548145215929, "grad_norm": 0.13396604359149933, "learning_rate": 0.0005, "loss": 2.1082, "step": 195430 }, { "epoch": 0.7438928769897155, "grad_norm": 0.12902133166790009, "learning_rate": 0.0005, "loss": 2.1031, "step": 195440 }, { "epoch": 0.7439309394578382, "grad_norm": 0.13435329496860504, "learning_rate": 0.0005, "loss": 2.1135, "step": 195450 }, { "epoch": 0.7439690019259609, "grad_norm": 0.12080912292003632, "learning_rate": 0.0005, "loss": 2.103, "step": 195460 }, { "epoch": 0.7440070643940836, "grad_norm": 0.11378413438796997, "learning_rate": 0.0005, "loss": 2.122, "step": 195470 }, { "epoch": 0.7440451268622063, "grad_norm": 0.12693087756633759, "learning_rate": 0.0005, "loss": 2.1004, "step": 195480 }, { "epoch": 0.7440831893303289, "grad_norm": 0.12436477839946747, "learning_rate": 0.0005, "loss": 2.1014, "step": 195490 }, { "epoch": 0.7441212517984516, "grad_norm": 0.12074817717075348, "learning_rate": 0.0005, "loss": 2.0949, "step": 195500 }, { "epoch": 0.7441593142665743, "grad_norm": 0.13063935935497284, "learning_rate": 0.0005, "loss": 2.1085, "step": 195510 }, { "epoch": 0.744197376734697, "grad_norm": 0.12233955413103104, "learning_rate": 0.0005, "loss": 2.0922, "step": 195520 }, { "epoch": 0.7442354392028196, "grad_norm": 0.12078586220741272, "learning_rate": 0.0005, "loss": 2.1025, "step": 195530 }, { "epoch": 0.7442735016709423, "grad_norm": 0.12886860966682434, "learning_rate": 0.0005, "loss": 2.0972, "step": 195540 }, { "epoch": 0.7443115641390651, "grad_norm": 0.13154838979244232, "learning_rate": 0.0005, "loss": 2.0997, "step": 195550 }, { "epoch": 0.7443496266071877, "grad_norm": 0.12000450491905212, "learning_rate": 0.0005, "loss": 2.1087, "step": 195560 }, { "epoch": 0.7443876890753104, "grad_norm": 0.12249580025672913, "learning_rate": 0.0005, "loss": 2.0969, "step": 195570 }, { "epoch": 0.744425751543433, "grad_norm": 0.12777367234230042, "learning_rate": 0.0005, "loss": 2.1162, "step": 195580 }, { "epoch": 0.7444638140115558, "grad_norm": 0.12537863850593567, "learning_rate": 0.0005, "loss": 2.1103, "step": 195590 }, { "epoch": 0.7445018764796785, "grad_norm": 0.11748939752578735, "learning_rate": 0.0005, "loss": 2.0998, "step": 195600 }, { "epoch": 0.7445399389478011, "grad_norm": 0.11325472593307495, "learning_rate": 0.0005, "loss": 2.116, "step": 195610 }, { "epoch": 0.7445780014159238, "grad_norm": 0.12836772203445435, "learning_rate": 0.0005, "loss": 2.108, "step": 195620 }, { "epoch": 0.7446160638840464, "grad_norm": 0.12222602963447571, "learning_rate": 0.0005, "loss": 2.0922, "step": 195630 }, { "epoch": 0.7446541263521692, "grad_norm": 0.12648913264274597, "learning_rate": 0.0005, "loss": 2.1128, "step": 195640 }, { "epoch": 0.7446921888202919, "grad_norm": 0.1333761066198349, "learning_rate": 0.0005, "loss": 2.1098, "step": 195650 }, { "epoch": 0.7447302512884145, "grad_norm": 0.13610762357711792, "learning_rate": 0.0005, "loss": 2.0942, "step": 195660 }, { "epoch": 0.7447683137565372, "grad_norm": 0.11557116359472275, "learning_rate": 0.0005, "loss": 2.1111, "step": 195670 }, { "epoch": 0.74480637622466, "grad_norm": 0.136189267039299, "learning_rate": 0.0005, "loss": 2.1218, "step": 195680 }, { "epoch": 0.7448444386927826, "grad_norm": 0.14569132030010223, "learning_rate": 0.0005, "loss": 2.1032, "step": 195690 }, { "epoch": 0.7448825011609053, "grad_norm": 0.12963727116584778, "learning_rate": 0.0005, "loss": 2.1072, "step": 195700 }, { "epoch": 0.7449205636290279, "grad_norm": 0.12556087970733643, "learning_rate": 0.0005, "loss": 2.1115, "step": 195710 }, { "epoch": 0.7449586260971507, "grad_norm": 0.11867135018110275, "learning_rate": 0.0005, "loss": 2.1142, "step": 195720 }, { "epoch": 0.7449966885652733, "grad_norm": 0.12275257706642151, "learning_rate": 0.0005, "loss": 2.1072, "step": 195730 }, { "epoch": 0.745034751033396, "grad_norm": 0.11988690495491028, "learning_rate": 0.0005, "loss": 2.1186, "step": 195740 }, { "epoch": 0.7450728135015187, "grad_norm": 0.13531182706356049, "learning_rate": 0.0005, "loss": 2.11, "step": 195750 }, { "epoch": 0.7451108759696414, "grad_norm": 0.1375477910041809, "learning_rate": 0.0005, "loss": 2.1147, "step": 195760 }, { "epoch": 0.7451489384377641, "grad_norm": 0.13907170295715332, "learning_rate": 0.0005, "loss": 2.1017, "step": 195770 }, { "epoch": 0.7451870009058867, "grad_norm": 0.1284158080816269, "learning_rate": 0.0005, "loss": 2.0954, "step": 195780 }, { "epoch": 0.7452250633740094, "grad_norm": 0.11813719570636749, "learning_rate": 0.0005, "loss": 2.1009, "step": 195790 }, { "epoch": 0.7452631258421321, "grad_norm": 0.13614222407341003, "learning_rate": 0.0005, "loss": 2.1116, "step": 195800 }, { "epoch": 0.7453011883102548, "grad_norm": 0.1306437849998474, "learning_rate": 0.0005, "loss": 2.1089, "step": 195810 }, { "epoch": 0.7453392507783775, "grad_norm": 0.13610365986824036, "learning_rate": 0.0005, "loss": 2.1145, "step": 195820 }, { "epoch": 0.7453773132465001, "grad_norm": 0.12205871939659119, "learning_rate": 0.0005, "loss": 2.102, "step": 195830 }, { "epoch": 0.7454153757146228, "grad_norm": 0.1164771318435669, "learning_rate": 0.0005, "loss": 2.1126, "step": 195840 }, { "epoch": 0.7454534381827456, "grad_norm": 0.12755510210990906, "learning_rate": 0.0005, "loss": 2.1212, "step": 195850 }, { "epoch": 0.7454915006508682, "grad_norm": 0.11595284938812256, "learning_rate": 0.0005, "loss": 2.097, "step": 195860 }, { "epoch": 0.7455295631189909, "grad_norm": 0.1476670503616333, "learning_rate": 0.0005, "loss": 2.0923, "step": 195870 }, { "epoch": 0.7455676255871135, "grad_norm": 0.12011052668094635, "learning_rate": 0.0005, "loss": 2.1163, "step": 195880 }, { "epoch": 0.7456056880552363, "grad_norm": 0.13740184903144836, "learning_rate": 0.0005, "loss": 2.1194, "step": 195890 }, { "epoch": 0.745643750523359, "grad_norm": 0.11663123965263367, "learning_rate": 0.0005, "loss": 2.1061, "step": 195900 }, { "epoch": 0.7456818129914816, "grad_norm": 0.12527674436569214, "learning_rate": 0.0005, "loss": 2.108, "step": 195910 }, { "epoch": 0.7457198754596043, "grad_norm": 0.14667300879955292, "learning_rate": 0.0005, "loss": 2.1084, "step": 195920 }, { "epoch": 0.7457579379277269, "grad_norm": 0.14290866255760193, "learning_rate": 0.0005, "loss": 2.1024, "step": 195930 }, { "epoch": 0.7457960003958497, "grad_norm": 0.12882038950920105, "learning_rate": 0.0005, "loss": 2.0961, "step": 195940 }, { "epoch": 0.7458340628639724, "grad_norm": 0.12286058813333511, "learning_rate": 0.0005, "loss": 2.1211, "step": 195950 }, { "epoch": 0.745872125332095, "grad_norm": 0.12759080529212952, "learning_rate": 0.0005, "loss": 2.1204, "step": 195960 }, { "epoch": 0.7459101878002177, "grad_norm": 0.12761107087135315, "learning_rate": 0.0005, "loss": 2.0998, "step": 195970 }, { "epoch": 0.7459482502683404, "grad_norm": 0.11339493840932846, "learning_rate": 0.0005, "loss": 2.1123, "step": 195980 }, { "epoch": 0.7459863127364631, "grad_norm": 0.1386970579624176, "learning_rate": 0.0005, "loss": 2.1028, "step": 195990 }, { "epoch": 0.7460243752045858, "grad_norm": 0.12397254258394241, "learning_rate": 0.0005, "loss": 2.1069, "step": 196000 }, { "epoch": 0.7460624376727084, "grad_norm": 0.12716779112815857, "learning_rate": 0.0005, "loss": 2.0913, "step": 196010 }, { "epoch": 0.7461005001408312, "grad_norm": 0.13425159454345703, "learning_rate": 0.0005, "loss": 2.1229, "step": 196020 }, { "epoch": 0.7461385626089538, "grad_norm": 0.1143328994512558, "learning_rate": 0.0005, "loss": 2.1258, "step": 196030 }, { "epoch": 0.7461766250770765, "grad_norm": 0.13692298531532288, "learning_rate": 0.0005, "loss": 2.1045, "step": 196040 }, { "epoch": 0.7462146875451992, "grad_norm": 0.12230276316404343, "learning_rate": 0.0005, "loss": 2.0959, "step": 196050 }, { "epoch": 0.7462527500133218, "grad_norm": 0.1241021677851677, "learning_rate": 0.0005, "loss": 2.1262, "step": 196060 }, { "epoch": 0.7462908124814446, "grad_norm": 0.11289073526859283, "learning_rate": 0.0005, "loss": 2.1109, "step": 196070 }, { "epoch": 0.7463288749495672, "grad_norm": 0.12893123924732208, "learning_rate": 0.0005, "loss": 2.1088, "step": 196080 }, { "epoch": 0.7463669374176899, "grad_norm": 0.11894325166940689, "learning_rate": 0.0005, "loss": 2.1149, "step": 196090 }, { "epoch": 0.7464049998858125, "grad_norm": 0.11847478151321411, "learning_rate": 0.0005, "loss": 2.1107, "step": 196100 }, { "epoch": 0.7464430623539353, "grad_norm": 0.15468305349349976, "learning_rate": 0.0005, "loss": 2.1179, "step": 196110 }, { "epoch": 0.746481124822058, "grad_norm": 0.11917208880186081, "learning_rate": 0.0005, "loss": 2.1058, "step": 196120 }, { "epoch": 0.7465191872901806, "grad_norm": 0.13077053427696228, "learning_rate": 0.0005, "loss": 2.1025, "step": 196130 }, { "epoch": 0.7465572497583033, "grad_norm": 0.13156230747699738, "learning_rate": 0.0005, "loss": 2.0886, "step": 196140 }, { "epoch": 0.7465953122264261, "grad_norm": 0.1107783317565918, "learning_rate": 0.0005, "loss": 2.1206, "step": 196150 }, { "epoch": 0.7466333746945487, "grad_norm": 0.25660818815231323, "learning_rate": 0.0005, "loss": 2.1105, "step": 196160 }, { "epoch": 0.7466714371626714, "grad_norm": 0.11806541681289673, "learning_rate": 0.0005, "loss": 2.1101, "step": 196170 }, { "epoch": 0.746709499630794, "grad_norm": 0.11881528794765472, "learning_rate": 0.0005, "loss": 2.1009, "step": 196180 }, { "epoch": 0.7467475620989168, "grad_norm": 0.11958711594343185, "learning_rate": 0.0005, "loss": 2.1163, "step": 196190 }, { "epoch": 0.7467856245670395, "grad_norm": 0.12247668951749802, "learning_rate": 0.0005, "loss": 2.1125, "step": 196200 }, { "epoch": 0.7468236870351621, "grad_norm": 0.14594995975494385, "learning_rate": 0.0005, "loss": 2.11, "step": 196210 }, { "epoch": 0.7468617495032848, "grad_norm": 0.14043888449668884, "learning_rate": 0.0005, "loss": 2.1075, "step": 196220 }, { "epoch": 0.7468998119714074, "grad_norm": 0.12176655232906342, "learning_rate": 0.0005, "loss": 2.1214, "step": 196230 }, { "epoch": 0.7469378744395302, "grad_norm": 0.12017684429883957, "learning_rate": 0.0005, "loss": 2.1099, "step": 196240 }, { "epoch": 0.7469759369076528, "grad_norm": 0.12407419085502625, "learning_rate": 0.0005, "loss": 2.1205, "step": 196250 }, { "epoch": 0.7470139993757755, "grad_norm": 0.12825097143650055, "learning_rate": 0.0005, "loss": 2.1168, "step": 196260 }, { "epoch": 0.7470520618438982, "grad_norm": 0.1215038001537323, "learning_rate": 0.0005, "loss": 2.1147, "step": 196270 }, { "epoch": 0.7470901243120209, "grad_norm": 0.12874773144721985, "learning_rate": 0.0005, "loss": 2.1083, "step": 196280 }, { "epoch": 0.7471281867801436, "grad_norm": 0.12362653017044067, "learning_rate": 0.0005, "loss": 2.1107, "step": 196290 }, { "epoch": 0.7471662492482662, "grad_norm": 0.12631510198116302, "learning_rate": 0.0005, "loss": 2.1132, "step": 196300 }, { "epoch": 0.7472043117163889, "grad_norm": 0.12554360926151276, "learning_rate": 0.0005, "loss": 2.1011, "step": 196310 }, { "epoch": 0.7472423741845117, "grad_norm": 0.12463687360286713, "learning_rate": 0.0005, "loss": 2.1091, "step": 196320 }, { "epoch": 0.7472804366526343, "grad_norm": 0.12406588345766068, "learning_rate": 0.0005, "loss": 2.1072, "step": 196330 }, { "epoch": 0.747318499120757, "grad_norm": 0.12036009132862091, "learning_rate": 0.0005, "loss": 2.1047, "step": 196340 }, { "epoch": 0.7473565615888796, "grad_norm": 0.12196014076471329, "learning_rate": 0.0005, "loss": 2.1039, "step": 196350 }, { "epoch": 0.7473946240570023, "grad_norm": 0.1232280284166336, "learning_rate": 0.0005, "loss": 2.1013, "step": 196360 }, { "epoch": 0.7474326865251251, "grad_norm": 0.13089613616466522, "learning_rate": 0.0005, "loss": 2.0951, "step": 196370 }, { "epoch": 0.7474707489932477, "grad_norm": 0.12335113435983658, "learning_rate": 0.0005, "loss": 2.0932, "step": 196380 }, { "epoch": 0.7475088114613704, "grad_norm": 0.12216979265213013, "learning_rate": 0.0005, "loss": 2.0996, "step": 196390 }, { "epoch": 0.747546873929493, "grad_norm": 0.15522664785385132, "learning_rate": 0.0005, "loss": 2.1262, "step": 196400 }, { "epoch": 0.7475849363976158, "grad_norm": 0.13516436517238617, "learning_rate": 0.0005, "loss": 2.1144, "step": 196410 }, { "epoch": 0.7476229988657385, "grad_norm": 0.12450914829969406, "learning_rate": 0.0005, "loss": 2.1045, "step": 196420 }, { "epoch": 0.7476610613338611, "grad_norm": 0.12933525443077087, "learning_rate": 0.0005, "loss": 2.1036, "step": 196430 }, { "epoch": 0.7476991238019838, "grad_norm": 0.1256740838289261, "learning_rate": 0.0005, "loss": 2.1153, "step": 196440 }, { "epoch": 0.7477371862701065, "grad_norm": 0.12129776924848557, "learning_rate": 0.0005, "loss": 2.1058, "step": 196450 }, { "epoch": 0.7477752487382292, "grad_norm": 0.12356984615325928, "learning_rate": 0.0005, "loss": 2.1046, "step": 196460 }, { "epoch": 0.7478133112063519, "grad_norm": 0.12165206670761108, "learning_rate": 0.0005, "loss": 2.1122, "step": 196470 }, { "epoch": 0.7478513736744745, "grad_norm": 0.1134643629193306, "learning_rate": 0.0005, "loss": 2.1003, "step": 196480 }, { "epoch": 0.7478894361425973, "grad_norm": 0.12672019004821777, "learning_rate": 0.0005, "loss": 2.1143, "step": 196490 }, { "epoch": 0.7479274986107199, "grad_norm": 0.11957664787769318, "learning_rate": 0.0005, "loss": 2.1048, "step": 196500 }, { "epoch": 0.7479655610788426, "grad_norm": 0.1286424845457077, "learning_rate": 0.0005, "loss": 2.1152, "step": 196510 }, { "epoch": 0.7480036235469653, "grad_norm": 0.1337771862745285, "learning_rate": 0.0005, "loss": 2.1193, "step": 196520 }, { "epoch": 0.7480416860150879, "grad_norm": 0.1336856186389923, "learning_rate": 0.0005, "loss": 2.1182, "step": 196530 }, { "epoch": 0.7480797484832107, "grad_norm": 0.1441793590784073, "learning_rate": 0.0005, "loss": 2.1057, "step": 196540 }, { "epoch": 0.7481178109513333, "grad_norm": 0.12170455604791641, "learning_rate": 0.0005, "loss": 2.1246, "step": 196550 }, { "epoch": 0.748155873419456, "grad_norm": 0.1308586448431015, "learning_rate": 0.0005, "loss": 2.0871, "step": 196560 }, { "epoch": 0.7481939358875787, "grad_norm": 0.1299491673707962, "learning_rate": 0.0005, "loss": 2.099, "step": 196570 }, { "epoch": 0.7482319983557014, "grad_norm": 0.12823458015918732, "learning_rate": 0.0005, "loss": 2.121, "step": 196580 }, { "epoch": 0.7482700608238241, "grad_norm": 0.1290253847837448, "learning_rate": 0.0005, "loss": 2.1146, "step": 196590 }, { "epoch": 0.7483081232919467, "grad_norm": 0.14299185574054718, "learning_rate": 0.0005, "loss": 2.1083, "step": 196600 }, { "epoch": 0.7483461857600694, "grad_norm": 0.137240469455719, "learning_rate": 0.0005, "loss": 2.1029, "step": 196610 }, { "epoch": 0.7483842482281922, "grad_norm": 0.14135576784610748, "learning_rate": 0.0005, "loss": 2.111, "step": 196620 }, { "epoch": 0.7484223106963148, "grad_norm": 0.12895070016384125, "learning_rate": 0.0005, "loss": 2.1145, "step": 196630 }, { "epoch": 0.7484603731644375, "grad_norm": 0.12500692903995514, "learning_rate": 0.0005, "loss": 2.1112, "step": 196640 }, { "epoch": 0.7484984356325601, "grad_norm": 0.12122353166341782, "learning_rate": 0.0005, "loss": 2.1065, "step": 196650 }, { "epoch": 0.7485364981006828, "grad_norm": 0.12457163631916046, "learning_rate": 0.0005, "loss": 2.1111, "step": 196660 }, { "epoch": 0.7485745605688056, "grad_norm": 0.11478997766971588, "learning_rate": 0.0005, "loss": 2.1039, "step": 196670 }, { "epoch": 0.7486126230369282, "grad_norm": 0.12098927795886993, "learning_rate": 0.0005, "loss": 2.1193, "step": 196680 }, { "epoch": 0.7486506855050509, "grad_norm": 0.137788787484169, "learning_rate": 0.0005, "loss": 2.0984, "step": 196690 }, { "epoch": 0.7486887479731735, "grad_norm": 0.12748932838439941, "learning_rate": 0.0005, "loss": 2.1103, "step": 196700 }, { "epoch": 0.7487268104412963, "grad_norm": 0.12803438305854797, "learning_rate": 0.0005, "loss": 2.1136, "step": 196710 }, { "epoch": 0.748764872909419, "grad_norm": 0.12306778877973557, "learning_rate": 0.0005, "loss": 2.1154, "step": 196720 }, { "epoch": 0.7488029353775416, "grad_norm": 0.1417481154203415, "learning_rate": 0.0005, "loss": 2.1047, "step": 196730 }, { "epoch": 0.7488409978456643, "grad_norm": 0.1265956461429596, "learning_rate": 0.0005, "loss": 2.1019, "step": 196740 }, { "epoch": 0.748879060313787, "grad_norm": 0.14312238991260529, "learning_rate": 0.0005, "loss": 2.1139, "step": 196750 }, { "epoch": 0.7489171227819097, "grad_norm": 0.1359281837940216, "learning_rate": 0.0005, "loss": 2.1128, "step": 196760 }, { "epoch": 0.7489551852500324, "grad_norm": 0.13459917902946472, "learning_rate": 0.0005, "loss": 2.1201, "step": 196770 }, { "epoch": 0.748993247718155, "grad_norm": 0.12361203879117966, "learning_rate": 0.0005, "loss": 2.1086, "step": 196780 }, { "epoch": 0.7490313101862777, "grad_norm": 0.1266535371541977, "learning_rate": 0.0005, "loss": 2.1178, "step": 196790 }, { "epoch": 0.7490693726544004, "grad_norm": 0.13514138758182526, "learning_rate": 0.0005, "loss": 2.1083, "step": 196800 }, { "epoch": 0.7491074351225231, "grad_norm": 0.11725779622793198, "learning_rate": 0.0005, "loss": 2.1007, "step": 196810 }, { "epoch": 0.7491454975906457, "grad_norm": 0.12637847661972046, "learning_rate": 0.0005, "loss": 2.0989, "step": 196820 }, { "epoch": 0.7491835600587684, "grad_norm": 0.12415625900030136, "learning_rate": 0.0005, "loss": 2.1049, "step": 196830 }, { "epoch": 0.7492216225268912, "grad_norm": 0.1321948766708374, "learning_rate": 0.0005, "loss": 2.1108, "step": 196840 }, { "epoch": 0.7492596849950138, "grad_norm": 0.13515117764472961, "learning_rate": 0.0005, "loss": 2.115, "step": 196850 }, { "epoch": 0.7492977474631365, "grad_norm": 0.12687243521213531, "learning_rate": 0.0005, "loss": 2.1004, "step": 196860 }, { "epoch": 0.7493358099312591, "grad_norm": 0.12546047568321228, "learning_rate": 0.0005, "loss": 2.1144, "step": 196870 }, { "epoch": 0.7493738723993819, "grad_norm": 0.13166911900043488, "learning_rate": 0.0005, "loss": 2.1192, "step": 196880 }, { "epoch": 0.7494119348675046, "grad_norm": 0.13394483923912048, "learning_rate": 0.0005, "loss": 2.1184, "step": 196890 }, { "epoch": 0.7494499973356272, "grad_norm": 0.12177181988954544, "learning_rate": 0.0005, "loss": 2.1071, "step": 196900 }, { "epoch": 0.7494880598037499, "grad_norm": 0.12611910700798035, "learning_rate": 0.0005, "loss": 2.106, "step": 196910 }, { "epoch": 0.7495261222718727, "grad_norm": 0.11354668438434601, "learning_rate": 0.0005, "loss": 2.1183, "step": 196920 }, { "epoch": 0.7495641847399953, "grad_norm": 0.134921133518219, "learning_rate": 0.0005, "loss": 2.1091, "step": 196930 }, { "epoch": 0.749602247208118, "grad_norm": 0.14567500352859497, "learning_rate": 0.0005, "loss": 2.1091, "step": 196940 }, { "epoch": 0.7496403096762406, "grad_norm": 0.12872666120529175, "learning_rate": 0.0005, "loss": 2.1073, "step": 196950 }, { "epoch": 0.7496783721443633, "grad_norm": 0.11634253710508347, "learning_rate": 0.0005, "loss": 2.105, "step": 196960 }, { "epoch": 0.749716434612486, "grad_norm": 0.11523030698299408, "learning_rate": 0.0005, "loss": 2.109, "step": 196970 }, { "epoch": 0.7497544970806087, "grad_norm": 0.12753266096115112, "learning_rate": 0.0005, "loss": 2.0921, "step": 196980 }, { "epoch": 0.7497925595487314, "grad_norm": 0.11636027693748474, "learning_rate": 0.0005, "loss": 2.1083, "step": 196990 }, { "epoch": 0.749830622016854, "grad_norm": 0.12546052038669586, "learning_rate": 0.0005, "loss": 2.112, "step": 197000 }, { "epoch": 0.7498686844849768, "grad_norm": 0.12241706252098083, "learning_rate": 0.0005, "loss": 2.0931, "step": 197010 }, { "epoch": 0.7499067469530994, "grad_norm": 0.12285008281469345, "learning_rate": 0.0005, "loss": 2.1173, "step": 197020 }, { "epoch": 0.7499448094212221, "grad_norm": 0.11864815652370453, "learning_rate": 0.0005, "loss": 2.1068, "step": 197030 }, { "epoch": 0.7499828718893448, "grad_norm": 0.12144052237272263, "learning_rate": 0.0005, "loss": 2.1176, "step": 197040 }, { "epoch": 0.7500209343574675, "grad_norm": 0.12249705940485, "learning_rate": 0.0005, "loss": 2.1103, "step": 197050 }, { "epoch": 0.7500589968255902, "grad_norm": 0.13299207389354706, "learning_rate": 0.0005, "loss": 2.0939, "step": 197060 }, { "epoch": 0.7500970592937128, "grad_norm": 0.12375347316265106, "learning_rate": 0.0005, "loss": 2.1131, "step": 197070 }, { "epoch": 0.7501351217618355, "grad_norm": 0.1229780837893486, "learning_rate": 0.0005, "loss": 2.0892, "step": 197080 }, { "epoch": 0.7501731842299582, "grad_norm": 0.12229334563016891, "learning_rate": 0.0005, "loss": 2.099, "step": 197090 }, { "epoch": 0.7502112466980809, "grad_norm": 0.1299109309911728, "learning_rate": 0.0005, "loss": 2.0912, "step": 197100 }, { "epoch": 0.7502493091662036, "grad_norm": 0.12679001688957214, "learning_rate": 0.0005, "loss": 2.1187, "step": 197110 }, { "epoch": 0.7502873716343262, "grad_norm": 0.12795282900333405, "learning_rate": 0.0005, "loss": 2.1104, "step": 197120 }, { "epoch": 0.7503254341024489, "grad_norm": 0.12865543365478516, "learning_rate": 0.0005, "loss": 2.1025, "step": 197130 }, { "epoch": 0.7503634965705717, "grad_norm": 0.13436903059482574, "learning_rate": 0.0005, "loss": 2.1104, "step": 197140 }, { "epoch": 0.7504015590386943, "grad_norm": 0.9106693267822266, "learning_rate": 0.0005, "loss": 2.1036, "step": 197150 }, { "epoch": 0.750439621506817, "grad_norm": 0.1274736523628235, "learning_rate": 0.0005, "loss": 2.1042, "step": 197160 }, { "epoch": 0.7504776839749396, "grad_norm": 0.12124037742614746, "learning_rate": 0.0005, "loss": 2.1026, "step": 197170 }, { "epoch": 0.7505157464430624, "grad_norm": 0.1154475286602974, "learning_rate": 0.0005, "loss": 2.1089, "step": 197180 }, { "epoch": 0.7505538089111851, "grad_norm": 0.1293182224035263, "learning_rate": 0.0005, "loss": 2.1131, "step": 197190 }, { "epoch": 0.7505918713793077, "grad_norm": 0.14153964817523956, "learning_rate": 0.0005, "loss": 2.1102, "step": 197200 }, { "epoch": 0.7506299338474304, "grad_norm": 0.14210331439971924, "learning_rate": 0.0005, "loss": 2.0971, "step": 197210 }, { "epoch": 0.750667996315553, "grad_norm": 0.11955045163631439, "learning_rate": 0.0005, "loss": 2.0928, "step": 197220 }, { "epoch": 0.7507060587836758, "grad_norm": 0.12392129749059677, "learning_rate": 0.0005, "loss": 2.0907, "step": 197230 }, { "epoch": 0.7507441212517985, "grad_norm": 0.12962493300437927, "learning_rate": 0.0005, "loss": 2.1036, "step": 197240 }, { "epoch": 0.7507821837199211, "grad_norm": 0.12972719967365265, "learning_rate": 0.0005, "loss": 2.101, "step": 197250 }, { "epoch": 0.7508202461880438, "grad_norm": 0.12515634298324585, "learning_rate": 0.0005, "loss": 2.1118, "step": 197260 }, { "epoch": 0.7508583086561665, "grad_norm": 0.1352843940258026, "learning_rate": 0.0005, "loss": 2.112, "step": 197270 }, { "epoch": 0.7508963711242892, "grad_norm": 0.12352333217859268, "learning_rate": 0.0005, "loss": 2.1114, "step": 197280 }, { "epoch": 0.7509344335924119, "grad_norm": 0.124315544962883, "learning_rate": 0.0005, "loss": 2.0998, "step": 197290 }, { "epoch": 0.7509724960605345, "grad_norm": 0.11670838296413422, "learning_rate": 0.0005, "loss": 2.0924, "step": 197300 }, { "epoch": 0.7510105585286573, "grad_norm": 0.12693895399570465, "learning_rate": 0.0005, "loss": 2.1062, "step": 197310 }, { "epoch": 0.7510486209967799, "grad_norm": 0.11586874723434448, "learning_rate": 0.0005, "loss": 2.1215, "step": 197320 }, { "epoch": 0.7510866834649026, "grad_norm": 0.11443132907152176, "learning_rate": 0.0005, "loss": 2.1066, "step": 197330 }, { "epoch": 0.7511247459330253, "grad_norm": 0.11413166671991348, "learning_rate": 0.0005, "loss": 2.0958, "step": 197340 }, { "epoch": 0.751162808401148, "grad_norm": 0.11946621537208557, "learning_rate": 0.0005, "loss": 2.0809, "step": 197350 }, { "epoch": 0.7512008708692707, "grad_norm": 0.1346030980348587, "learning_rate": 0.0005, "loss": 2.1042, "step": 197360 }, { "epoch": 0.7512389333373933, "grad_norm": 0.13821019232273102, "learning_rate": 0.0005, "loss": 2.1009, "step": 197370 }, { "epoch": 0.751276995805516, "grad_norm": 0.12743425369262695, "learning_rate": 0.0005, "loss": 2.1061, "step": 197380 }, { "epoch": 0.7513150582736386, "grad_norm": 0.14612269401550293, "learning_rate": 0.0005, "loss": 2.1188, "step": 197390 }, { "epoch": 0.7513531207417614, "grad_norm": 0.11492089182138443, "learning_rate": 0.0005, "loss": 2.1082, "step": 197400 }, { "epoch": 0.7513911832098841, "grad_norm": 0.11517133563756943, "learning_rate": 0.0005, "loss": 2.1104, "step": 197410 }, { "epoch": 0.7514292456780067, "grad_norm": 0.12532839179039001, "learning_rate": 0.0005, "loss": 2.1066, "step": 197420 }, { "epoch": 0.7514673081461294, "grad_norm": 0.13420192897319794, "learning_rate": 0.0005, "loss": 2.1222, "step": 197430 }, { "epoch": 0.7515053706142522, "grad_norm": 0.12853312492370605, "learning_rate": 0.0005, "loss": 2.1028, "step": 197440 }, { "epoch": 0.7515434330823748, "grad_norm": 0.12556856870651245, "learning_rate": 0.0005, "loss": 2.1147, "step": 197450 }, { "epoch": 0.7515814955504975, "grad_norm": 0.12764570116996765, "learning_rate": 0.0005, "loss": 2.1041, "step": 197460 }, { "epoch": 0.7516195580186201, "grad_norm": 0.12602615356445312, "learning_rate": 0.0005, "loss": 2.1102, "step": 197470 }, { "epoch": 0.7516576204867429, "grad_norm": 0.11851579695940018, "learning_rate": 0.0005, "loss": 2.1205, "step": 197480 }, { "epoch": 0.7516956829548656, "grad_norm": 0.13655637204647064, "learning_rate": 0.0005, "loss": 2.0996, "step": 197490 }, { "epoch": 0.7517337454229882, "grad_norm": 0.12730269134044647, "learning_rate": 0.0005, "loss": 2.1007, "step": 197500 }, { "epoch": 0.7517718078911109, "grad_norm": 0.12439996004104614, "learning_rate": 0.0005, "loss": 2.0978, "step": 197510 }, { "epoch": 0.7518098703592335, "grad_norm": 0.11916308104991913, "learning_rate": 0.0005, "loss": 2.1148, "step": 197520 }, { "epoch": 0.7518479328273563, "grad_norm": 0.2023012787103653, "learning_rate": 0.0005, "loss": 2.1005, "step": 197530 }, { "epoch": 0.751885995295479, "grad_norm": 0.11802316457033157, "learning_rate": 0.0005, "loss": 2.1121, "step": 197540 }, { "epoch": 0.7519240577636016, "grad_norm": 0.11546476930379868, "learning_rate": 0.0005, "loss": 2.105, "step": 197550 }, { "epoch": 0.7519621202317243, "grad_norm": 0.146450936794281, "learning_rate": 0.0005, "loss": 2.106, "step": 197560 }, { "epoch": 0.752000182699847, "grad_norm": 0.12375127524137497, "learning_rate": 0.0005, "loss": 2.1095, "step": 197570 }, { "epoch": 0.7520382451679697, "grad_norm": 0.12629228830337524, "learning_rate": 0.0005, "loss": 2.1076, "step": 197580 }, { "epoch": 0.7520763076360923, "grad_norm": 0.12279598414897919, "learning_rate": 0.0005, "loss": 2.1106, "step": 197590 }, { "epoch": 0.752114370104215, "grad_norm": 0.1399388462305069, "learning_rate": 0.0005, "loss": 2.1141, "step": 197600 }, { "epoch": 0.7521524325723378, "grad_norm": 0.12528078258037567, "learning_rate": 0.0005, "loss": 2.1, "step": 197610 }, { "epoch": 0.7521904950404604, "grad_norm": 0.11265812814235687, "learning_rate": 0.0005, "loss": 2.1092, "step": 197620 }, { "epoch": 0.7522285575085831, "grad_norm": 0.12309877574443817, "learning_rate": 0.0005, "loss": 2.1138, "step": 197630 }, { "epoch": 0.7522666199767057, "grad_norm": 0.12960395216941833, "learning_rate": 0.0005, "loss": 2.0989, "step": 197640 }, { "epoch": 0.7523046824448284, "grad_norm": 0.13863934576511383, "learning_rate": 0.0005, "loss": 2.1147, "step": 197650 }, { "epoch": 0.7523427449129512, "grad_norm": 0.1307358592748642, "learning_rate": 0.0005, "loss": 2.1165, "step": 197660 }, { "epoch": 0.7523808073810738, "grad_norm": 0.13089637458324432, "learning_rate": 0.0005, "loss": 2.1193, "step": 197670 }, { "epoch": 0.7524188698491965, "grad_norm": 0.12067008763551712, "learning_rate": 0.0005, "loss": 2.1056, "step": 197680 }, { "epoch": 0.7524569323173191, "grad_norm": 0.12283031642436981, "learning_rate": 0.0005, "loss": 2.1061, "step": 197690 }, { "epoch": 0.7524949947854419, "grad_norm": 0.12055887281894684, "learning_rate": 0.0005, "loss": 2.1071, "step": 197700 }, { "epoch": 0.7525330572535646, "grad_norm": 0.12535439431667328, "learning_rate": 0.0005, "loss": 2.1053, "step": 197710 }, { "epoch": 0.7525711197216872, "grad_norm": 0.13567043840885162, "learning_rate": 0.0005, "loss": 2.123, "step": 197720 }, { "epoch": 0.7526091821898099, "grad_norm": 0.12497591227293015, "learning_rate": 0.0005, "loss": 2.1095, "step": 197730 }, { "epoch": 0.7526472446579326, "grad_norm": 0.12373036891222, "learning_rate": 0.0005, "loss": 2.0952, "step": 197740 }, { "epoch": 0.7526853071260553, "grad_norm": 0.1281946897506714, "learning_rate": 0.0005, "loss": 2.1137, "step": 197750 }, { "epoch": 0.752723369594178, "grad_norm": 0.11898750066757202, "learning_rate": 0.0005, "loss": 2.1133, "step": 197760 }, { "epoch": 0.7527614320623006, "grad_norm": 0.11677069962024689, "learning_rate": 0.0005, "loss": 2.1301, "step": 197770 }, { "epoch": 0.7527994945304234, "grad_norm": 0.14142510294914246, "learning_rate": 0.0005, "loss": 2.1057, "step": 197780 }, { "epoch": 0.752837556998546, "grad_norm": 0.1283315122127533, "learning_rate": 0.0005, "loss": 2.1061, "step": 197790 }, { "epoch": 0.7528756194666687, "grad_norm": 0.13681048154830933, "learning_rate": 0.0005, "loss": 2.1167, "step": 197800 }, { "epoch": 0.7529136819347914, "grad_norm": 0.14908556640148163, "learning_rate": 0.0005, "loss": 2.0997, "step": 197810 }, { "epoch": 0.752951744402914, "grad_norm": 0.12765933573246002, "learning_rate": 0.0005, "loss": 2.1168, "step": 197820 }, { "epoch": 0.7529898068710368, "grad_norm": 0.13758790493011475, "learning_rate": 0.0005, "loss": 2.1079, "step": 197830 }, { "epoch": 0.7530278693391594, "grad_norm": 0.13992281258106232, "learning_rate": 0.0005, "loss": 2.0972, "step": 197840 }, { "epoch": 0.7530659318072821, "grad_norm": 0.12648849189281464, "learning_rate": 0.0005, "loss": 2.1075, "step": 197850 }, { "epoch": 0.7531039942754048, "grad_norm": 0.12558187544345856, "learning_rate": 0.0005, "loss": 2.1, "step": 197860 }, { "epoch": 0.7531420567435275, "grad_norm": 0.12360218912363052, "learning_rate": 0.0005, "loss": 2.1096, "step": 197870 }, { "epoch": 0.7531801192116502, "grad_norm": 0.137051522731781, "learning_rate": 0.0005, "loss": 2.1222, "step": 197880 }, { "epoch": 0.7532181816797728, "grad_norm": 0.12066777050495148, "learning_rate": 0.0005, "loss": 2.1135, "step": 197890 }, { "epoch": 0.7532562441478955, "grad_norm": 0.13327810168266296, "learning_rate": 0.0005, "loss": 2.1034, "step": 197900 }, { "epoch": 0.7532943066160183, "grad_norm": 0.11925477534532547, "learning_rate": 0.0005, "loss": 2.1092, "step": 197910 }, { "epoch": 0.7533323690841409, "grad_norm": 0.1300128847360611, "learning_rate": 0.0005, "loss": 2.1082, "step": 197920 }, { "epoch": 0.7533704315522636, "grad_norm": 0.12425397336483002, "learning_rate": 0.0005, "loss": 2.104, "step": 197930 }, { "epoch": 0.7534084940203862, "grad_norm": 0.1329755336046219, "learning_rate": 0.0005, "loss": 2.1045, "step": 197940 }, { "epoch": 0.7534465564885089, "grad_norm": 0.11931162327528, "learning_rate": 0.0005, "loss": 2.1167, "step": 197950 }, { "epoch": 0.7534846189566317, "grad_norm": 0.1208098903298378, "learning_rate": 0.0005, "loss": 2.1083, "step": 197960 }, { "epoch": 0.7535226814247543, "grad_norm": 0.13338631391525269, "learning_rate": 0.0005, "loss": 2.0987, "step": 197970 }, { "epoch": 0.753560743892877, "grad_norm": 0.12523089349269867, "learning_rate": 0.0005, "loss": 2.1006, "step": 197980 }, { "epoch": 0.7535988063609996, "grad_norm": 0.1101926937699318, "learning_rate": 0.0005, "loss": 2.1229, "step": 197990 }, { "epoch": 0.7536368688291224, "grad_norm": 0.1254616379737854, "learning_rate": 0.0005, "loss": 2.0887, "step": 198000 }, { "epoch": 0.753674931297245, "grad_norm": 0.14773495495319366, "learning_rate": 0.0005, "loss": 2.0991, "step": 198010 }, { "epoch": 0.7537129937653677, "grad_norm": 0.13330087065696716, "learning_rate": 0.0005, "loss": 2.103, "step": 198020 }, { "epoch": 0.7537510562334904, "grad_norm": 0.11752574890851974, "learning_rate": 0.0005, "loss": 2.1133, "step": 198030 }, { "epoch": 0.7537891187016131, "grad_norm": 0.13528861105442047, "learning_rate": 0.0005, "loss": 2.1003, "step": 198040 }, { "epoch": 0.7538271811697358, "grad_norm": 0.12782931327819824, "learning_rate": 0.0005, "loss": 2.0901, "step": 198050 }, { "epoch": 0.7538652436378585, "grad_norm": 0.1557457000017166, "learning_rate": 0.0005, "loss": 2.1128, "step": 198060 }, { "epoch": 0.7539033061059811, "grad_norm": 0.12887586653232574, "learning_rate": 0.0005, "loss": 2.1094, "step": 198070 }, { "epoch": 0.7539413685741038, "grad_norm": 0.12701363861560822, "learning_rate": 0.0005, "loss": 2.1035, "step": 198080 }, { "epoch": 0.7539794310422265, "grad_norm": 0.12219791114330292, "learning_rate": 0.0005, "loss": 2.1198, "step": 198090 }, { "epoch": 0.7540174935103492, "grad_norm": 0.125774547457695, "learning_rate": 0.0005, "loss": 2.1097, "step": 198100 }, { "epoch": 0.7540555559784718, "grad_norm": 0.11728359758853912, "learning_rate": 0.0005, "loss": 2.1164, "step": 198110 }, { "epoch": 0.7540936184465945, "grad_norm": 0.11758928745985031, "learning_rate": 0.0005, "loss": 2.0999, "step": 198120 }, { "epoch": 0.7541316809147173, "grad_norm": 0.11796324700117111, "learning_rate": 0.0005, "loss": 2.0959, "step": 198130 }, { "epoch": 0.7541697433828399, "grad_norm": 0.1250022053718567, "learning_rate": 0.0005, "loss": 2.093, "step": 198140 }, { "epoch": 0.7542078058509626, "grad_norm": 0.12501591444015503, "learning_rate": 0.0005, "loss": 2.1088, "step": 198150 }, { "epoch": 0.7542458683190852, "grad_norm": 0.12826436758041382, "learning_rate": 0.0005, "loss": 2.1081, "step": 198160 }, { "epoch": 0.754283930787208, "grad_norm": 0.11726287007331848, "learning_rate": 0.0005, "loss": 2.1065, "step": 198170 }, { "epoch": 0.7543219932553307, "grad_norm": 0.12219440191984177, "learning_rate": 0.0005, "loss": 2.1117, "step": 198180 }, { "epoch": 0.7543600557234533, "grad_norm": 0.11771136522293091, "learning_rate": 0.0005, "loss": 2.1099, "step": 198190 }, { "epoch": 0.754398118191576, "grad_norm": 0.12274880707263947, "learning_rate": 0.0005, "loss": 2.1316, "step": 198200 }, { "epoch": 0.7544361806596988, "grad_norm": 0.12219416350126266, "learning_rate": 0.0005, "loss": 2.1064, "step": 198210 }, { "epoch": 0.7544742431278214, "grad_norm": 0.11942414939403534, "learning_rate": 0.0005, "loss": 2.1033, "step": 198220 }, { "epoch": 0.7545123055959441, "grad_norm": 0.1254412829875946, "learning_rate": 0.0005, "loss": 2.1091, "step": 198230 }, { "epoch": 0.7545503680640667, "grad_norm": 0.12787742912769318, "learning_rate": 0.0005, "loss": 2.1126, "step": 198240 }, { "epoch": 0.7545884305321894, "grad_norm": 0.14581698179244995, "learning_rate": 0.0005, "loss": 2.117, "step": 198250 }, { "epoch": 0.7546264930003121, "grad_norm": 0.12357164174318314, "learning_rate": 0.0005, "loss": 2.1009, "step": 198260 }, { "epoch": 0.7546645554684348, "grad_norm": 0.13686387240886688, "learning_rate": 0.0005, "loss": 2.1075, "step": 198270 }, { "epoch": 0.7547026179365575, "grad_norm": 0.12348747253417969, "learning_rate": 0.0005, "loss": 2.1044, "step": 198280 }, { "epoch": 0.7547406804046801, "grad_norm": 0.11589358001947403, "learning_rate": 0.0005, "loss": 2.1137, "step": 198290 }, { "epoch": 0.7547787428728029, "grad_norm": 0.12138720601797104, "learning_rate": 0.0005, "loss": 2.0988, "step": 198300 }, { "epoch": 0.7548168053409255, "grad_norm": 0.11561553925275803, "learning_rate": 0.0005, "loss": 2.1124, "step": 198310 }, { "epoch": 0.7548548678090482, "grad_norm": 0.12074074149131775, "learning_rate": 0.0005, "loss": 2.1165, "step": 198320 }, { "epoch": 0.7548929302771709, "grad_norm": 0.11432056128978729, "learning_rate": 0.0005, "loss": 2.1265, "step": 198330 }, { "epoch": 0.7549309927452936, "grad_norm": 0.12902531027793884, "learning_rate": 0.0005, "loss": 2.1241, "step": 198340 }, { "epoch": 0.7549690552134163, "grad_norm": 0.11342615634202957, "learning_rate": 0.0005, "loss": 2.095, "step": 198350 }, { "epoch": 0.7550071176815389, "grad_norm": 0.12891358137130737, "learning_rate": 0.0005, "loss": 2.0979, "step": 198360 }, { "epoch": 0.7550451801496616, "grad_norm": 0.1385073959827423, "learning_rate": 0.0005, "loss": 2.1064, "step": 198370 }, { "epoch": 0.7550832426177843, "grad_norm": 0.14252620935440063, "learning_rate": 0.0005, "loss": 2.1144, "step": 198380 }, { "epoch": 0.755121305085907, "grad_norm": 0.14449909329414368, "learning_rate": 0.0005, "loss": 2.1062, "step": 198390 }, { "epoch": 0.7551593675540297, "grad_norm": 0.13095831871032715, "learning_rate": 0.0005, "loss": 2.0991, "step": 198400 }, { "epoch": 0.7551974300221523, "grad_norm": 0.12750248610973358, "learning_rate": 0.0005, "loss": 2.1086, "step": 198410 }, { "epoch": 0.755235492490275, "grad_norm": 0.12844283878803253, "learning_rate": 0.0005, "loss": 2.1214, "step": 198420 }, { "epoch": 0.7552735549583978, "grad_norm": 0.12403746694326401, "learning_rate": 0.0005, "loss": 2.1214, "step": 198430 }, { "epoch": 0.7553116174265204, "grad_norm": 0.12165076285600662, "learning_rate": 0.0005, "loss": 2.1187, "step": 198440 }, { "epoch": 0.7553496798946431, "grad_norm": 0.1258837878704071, "learning_rate": 0.0005, "loss": 2.105, "step": 198450 }, { "epoch": 0.7553877423627657, "grad_norm": 0.12263844907283783, "learning_rate": 0.0005, "loss": 2.1009, "step": 198460 }, { "epoch": 0.7554258048308885, "grad_norm": 0.15989969670772552, "learning_rate": 0.0005, "loss": 2.1001, "step": 198470 }, { "epoch": 0.7554638672990112, "grad_norm": 0.12192397564649582, "learning_rate": 0.0005, "loss": 2.1086, "step": 198480 }, { "epoch": 0.7555019297671338, "grad_norm": 0.1142887994647026, "learning_rate": 0.0005, "loss": 2.115, "step": 198490 }, { "epoch": 0.7555399922352565, "grad_norm": 0.12102162837982178, "learning_rate": 0.0005, "loss": 2.0939, "step": 198500 }, { "epoch": 0.7555780547033791, "grad_norm": 0.12950734794139862, "learning_rate": 0.0005, "loss": 2.093, "step": 198510 }, { "epoch": 0.7556161171715019, "grad_norm": 0.11939499527215958, "learning_rate": 0.0005, "loss": 2.1101, "step": 198520 }, { "epoch": 0.7556541796396246, "grad_norm": 0.12329903990030289, "learning_rate": 0.0005, "loss": 2.0973, "step": 198530 }, { "epoch": 0.7556922421077472, "grad_norm": 0.1571332812309265, "learning_rate": 0.0005, "loss": 2.0888, "step": 198540 }, { "epoch": 0.7557303045758699, "grad_norm": 0.12774313986301422, "learning_rate": 0.0005, "loss": 2.1094, "step": 198550 }, { "epoch": 0.7557683670439926, "grad_norm": 0.11515670269727707, "learning_rate": 0.0005, "loss": 2.1102, "step": 198560 }, { "epoch": 0.7558064295121153, "grad_norm": 0.12537802755832672, "learning_rate": 0.0005, "loss": 2.0992, "step": 198570 }, { "epoch": 0.755844491980238, "grad_norm": 0.13329669833183289, "learning_rate": 0.0005, "loss": 2.1084, "step": 198580 }, { "epoch": 0.7558825544483606, "grad_norm": 0.11480581760406494, "learning_rate": 0.0005, "loss": 2.1071, "step": 198590 }, { "epoch": 0.7559206169164834, "grad_norm": 0.12687240540981293, "learning_rate": 0.0005, "loss": 2.0982, "step": 198600 }, { "epoch": 0.755958679384606, "grad_norm": 0.12661078572273254, "learning_rate": 0.0005, "loss": 2.1068, "step": 198610 }, { "epoch": 0.7559967418527287, "grad_norm": 0.13912835717201233, "learning_rate": 0.0005, "loss": 2.113, "step": 198620 }, { "epoch": 0.7560348043208514, "grad_norm": 0.1346118450164795, "learning_rate": 0.0005, "loss": 2.1047, "step": 198630 }, { "epoch": 0.7560728667889741, "grad_norm": 0.13172611594200134, "learning_rate": 0.0005, "loss": 2.0918, "step": 198640 }, { "epoch": 0.7561109292570968, "grad_norm": 0.12862594425678253, "learning_rate": 0.0005, "loss": 2.1159, "step": 198650 }, { "epoch": 0.7561489917252194, "grad_norm": 0.12063287943601608, "learning_rate": 0.0005, "loss": 2.1107, "step": 198660 }, { "epoch": 0.7561870541933421, "grad_norm": 0.11567655950784683, "learning_rate": 0.0005, "loss": 2.1113, "step": 198670 }, { "epoch": 0.7562251166614647, "grad_norm": 0.12064255028963089, "learning_rate": 0.0005, "loss": 2.1089, "step": 198680 }, { "epoch": 0.7562631791295875, "grad_norm": 0.12698319554328918, "learning_rate": 0.0005, "loss": 2.0936, "step": 198690 }, { "epoch": 0.7563012415977102, "grad_norm": 0.12099333107471466, "learning_rate": 0.0005, "loss": 2.1033, "step": 198700 }, { "epoch": 0.7563393040658328, "grad_norm": 0.1265001893043518, "learning_rate": 0.0005, "loss": 2.1127, "step": 198710 }, { "epoch": 0.7563773665339555, "grad_norm": 0.12512876093387604, "learning_rate": 0.0005, "loss": 2.1019, "step": 198720 }, { "epoch": 0.7564154290020783, "grad_norm": 0.12623614072799683, "learning_rate": 0.0005, "loss": 2.1067, "step": 198730 }, { "epoch": 0.7564534914702009, "grad_norm": 0.11762641370296478, "learning_rate": 0.0005, "loss": 2.105, "step": 198740 }, { "epoch": 0.7564915539383236, "grad_norm": 0.11356106400489807, "learning_rate": 0.0005, "loss": 2.1016, "step": 198750 }, { "epoch": 0.7565296164064462, "grad_norm": 0.13299451768398285, "learning_rate": 0.0005, "loss": 2.1074, "step": 198760 }, { "epoch": 0.756567678874569, "grad_norm": 0.13182711601257324, "learning_rate": 0.0005, "loss": 2.1139, "step": 198770 }, { "epoch": 0.7566057413426917, "grad_norm": 0.13028521835803986, "learning_rate": 0.0005, "loss": 2.0855, "step": 198780 }, { "epoch": 0.7566438038108143, "grad_norm": 0.11709822714328766, "learning_rate": 0.0005, "loss": 2.0949, "step": 198790 }, { "epoch": 0.756681866278937, "grad_norm": 0.14088694751262665, "learning_rate": 0.0005, "loss": 2.1229, "step": 198800 }, { "epoch": 0.7567199287470596, "grad_norm": 0.12376152724027634, "learning_rate": 0.0005, "loss": 2.1175, "step": 198810 }, { "epoch": 0.7567579912151824, "grad_norm": 0.12194863706827164, "learning_rate": 0.0005, "loss": 2.1228, "step": 198820 }, { "epoch": 0.756796053683305, "grad_norm": 0.12565350532531738, "learning_rate": 0.0005, "loss": 2.1134, "step": 198830 }, { "epoch": 0.7568341161514277, "grad_norm": 0.12101472169160843, "learning_rate": 0.0005, "loss": 2.1027, "step": 198840 }, { "epoch": 0.7568721786195504, "grad_norm": 0.11491557955741882, "learning_rate": 0.0005, "loss": 2.1205, "step": 198850 }, { "epoch": 0.7569102410876731, "grad_norm": 0.1167779192328453, "learning_rate": 0.0005, "loss": 2.1065, "step": 198860 }, { "epoch": 0.7569483035557958, "grad_norm": 0.12635232508182526, "learning_rate": 0.0005, "loss": 2.1023, "step": 198870 }, { "epoch": 0.7569863660239184, "grad_norm": 0.12471341341733932, "learning_rate": 0.0005, "loss": 2.1154, "step": 198880 }, { "epoch": 0.7570244284920411, "grad_norm": 0.12258568406105042, "learning_rate": 0.0005, "loss": 2.0975, "step": 198890 }, { "epoch": 0.7570624909601639, "grad_norm": 0.1298074573278427, "learning_rate": 0.0005, "loss": 2.1042, "step": 198900 }, { "epoch": 0.7571005534282865, "grad_norm": 0.13026870787143707, "learning_rate": 0.0005, "loss": 2.1225, "step": 198910 }, { "epoch": 0.7571386158964092, "grad_norm": 0.120946004986763, "learning_rate": 0.0005, "loss": 2.1033, "step": 198920 }, { "epoch": 0.7571766783645318, "grad_norm": 0.12533290684223175, "learning_rate": 0.0005, "loss": 2.0977, "step": 198930 }, { "epoch": 0.7572147408326545, "grad_norm": 0.10581686347723007, "learning_rate": 0.0005, "loss": 2.1059, "step": 198940 }, { "epoch": 0.7572528033007773, "grad_norm": 0.12052454799413681, "learning_rate": 0.0005, "loss": 2.1065, "step": 198950 }, { "epoch": 0.7572908657688999, "grad_norm": 0.13290107250213623, "learning_rate": 0.0005, "loss": 2.1176, "step": 198960 }, { "epoch": 0.7573289282370226, "grad_norm": 0.1411670446395874, "learning_rate": 0.0005, "loss": 2.12, "step": 198970 }, { "epoch": 0.7573669907051452, "grad_norm": 0.13029508292675018, "learning_rate": 0.0005, "loss": 2.1085, "step": 198980 }, { "epoch": 0.757405053173268, "grad_norm": 0.12159781903028488, "learning_rate": 0.0005, "loss": 2.1098, "step": 198990 }, { "epoch": 0.7574431156413907, "grad_norm": 0.1355499029159546, "learning_rate": 0.0005, "loss": 2.1179, "step": 199000 }, { "epoch": 0.7574811781095133, "grad_norm": 0.12084466218948364, "learning_rate": 0.0005, "loss": 2.109, "step": 199010 }, { "epoch": 0.757519240577636, "grad_norm": 0.12449130415916443, "learning_rate": 0.0005, "loss": 2.1193, "step": 199020 }, { "epoch": 0.7575573030457587, "grad_norm": 0.1268312633037567, "learning_rate": 0.0005, "loss": 2.1046, "step": 199030 }, { "epoch": 0.7575953655138814, "grad_norm": 0.12807604670524597, "learning_rate": 0.0005, "loss": 2.1107, "step": 199040 }, { "epoch": 0.7576334279820041, "grad_norm": 0.12403193861246109, "learning_rate": 0.0005, "loss": 2.1057, "step": 199050 }, { "epoch": 0.7576714904501267, "grad_norm": 0.12660513818264008, "learning_rate": 0.0005, "loss": 2.1002, "step": 199060 }, { "epoch": 0.7577095529182495, "grad_norm": 0.13343201577663422, "learning_rate": 0.0005, "loss": 2.1118, "step": 199070 }, { "epoch": 0.7577476153863721, "grad_norm": 0.13150076568126678, "learning_rate": 0.0005, "loss": 2.1075, "step": 199080 }, { "epoch": 0.7577856778544948, "grad_norm": 0.12581336498260498, "learning_rate": 0.0005, "loss": 2.1055, "step": 199090 }, { "epoch": 0.7578237403226175, "grad_norm": 0.11675869673490524, "learning_rate": 0.0005, "loss": 2.1173, "step": 199100 }, { "epoch": 0.7578618027907401, "grad_norm": 0.1217801496386528, "learning_rate": 0.0005, "loss": 2.1082, "step": 199110 }, { "epoch": 0.7578998652588629, "grad_norm": 0.11997724324464798, "learning_rate": 0.0005, "loss": 2.1158, "step": 199120 }, { "epoch": 0.7579379277269855, "grad_norm": 0.13471657037734985, "learning_rate": 0.0005, "loss": 2.1291, "step": 199130 }, { "epoch": 0.7579759901951082, "grad_norm": 0.13399042189121246, "learning_rate": 0.0005, "loss": 2.1235, "step": 199140 }, { "epoch": 0.7580140526632309, "grad_norm": 0.1244891956448555, "learning_rate": 0.0005, "loss": 2.1075, "step": 199150 }, { "epoch": 0.7580521151313536, "grad_norm": 0.12309864163398743, "learning_rate": 0.0005, "loss": 2.1206, "step": 199160 }, { "epoch": 0.7580901775994763, "grad_norm": 0.11609305441379547, "learning_rate": 0.0005, "loss": 2.0961, "step": 199170 }, { "epoch": 0.7581282400675989, "grad_norm": 0.12005341053009033, "learning_rate": 0.0005, "loss": 2.1092, "step": 199180 }, { "epoch": 0.7581663025357216, "grad_norm": 0.12097519636154175, "learning_rate": 0.0005, "loss": 2.1045, "step": 199190 }, { "epoch": 0.7582043650038444, "grad_norm": 0.1215854361653328, "learning_rate": 0.0005, "loss": 2.1187, "step": 199200 }, { "epoch": 0.758242427471967, "grad_norm": 0.11642411351203918, "learning_rate": 0.0005, "loss": 2.116, "step": 199210 }, { "epoch": 0.7582804899400897, "grad_norm": 0.1203550472855568, "learning_rate": 0.0005, "loss": 2.1013, "step": 199220 }, { "epoch": 0.7583185524082123, "grad_norm": 0.14909736812114716, "learning_rate": 0.0005, "loss": 2.0923, "step": 199230 }, { "epoch": 0.758356614876335, "grad_norm": 0.13295716047286987, "learning_rate": 0.0005, "loss": 2.1066, "step": 199240 }, { "epoch": 0.7583946773444578, "grad_norm": 0.13675452768802643, "learning_rate": 0.0005, "loss": 2.114, "step": 199250 }, { "epoch": 0.7584327398125804, "grad_norm": 0.12822073698043823, "learning_rate": 0.0005, "loss": 2.0961, "step": 199260 }, { "epoch": 0.7584708022807031, "grad_norm": 0.1309756189584732, "learning_rate": 0.0005, "loss": 2.0837, "step": 199270 }, { "epoch": 0.7585088647488257, "grad_norm": 0.11035849899053574, "learning_rate": 0.0005, "loss": 2.1044, "step": 199280 }, { "epoch": 0.7585469272169485, "grad_norm": 0.12121044844388962, "learning_rate": 0.0005, "loss": 2.0881, "step": 199290 }, { "epoch": 0.7585849896850712, "grad_norm": 0.13430899381637573, "learning_rate": 0.0005, "loss": 2.1066, "step": 199300 }, { "epoch": 0.7586230521531938, "grad_norm": 0.11547856032848358, "learning_rate": 0.0005, "loss": 2.1052, "step": 199310 }, { "epoch": 0.7586611146213165, "grad_norm": 0.12349169701337814, "learning_rate": 0.0005, "loss": 2.0869, "step": 199320 }, { "epoch": 0.7586991770894392, "grad_norm": 0.14382505416870117, "learning_rate": 0.0005, "loss": 2.1048, "step": 199330 }, { "epoch": 0.7587372395575619, "grad_norm": 0.13851898908615112, "learning_rate": 0.0005, "loss": 2.1078, "step": 199340 }, { "epoch": 0.7587753020256846, "grad_norm": 0.12755434215068817, "learning_rate": 0.0005, "loss": 2.1152, "step": 199350 }, { "epoch": 0.7588133644938072, "grad_norm": 0.1360461562871933, "learning_rate": 0.0005, "loss": 2.0972, "step": 199360 }, { "epoch": 0.7588514269619299, "grad_norm": 0.1464185267686844, "learning_rate": 0.0005, "loss": 2.1043, "step": 199370 }, { "epoch": 0.7588894894300526, "grad_norm": 0.14608561992645264, "learning_rate": 0.0005, "loss": 2.1027, "step": 199380 }, { "epoch": 0.7589275518981753, "grad_norm": 0.11467023193836212, "learning_rate": 0.0005, "loss": 2.0922, "step": 199390 }, { "epoch": 0.758965614366298, "grad_norm": 0.1256762444972992, "learning_rate": 0.0005, "loss": 2.0968, "step": 199400 }, { "epoch": 0.7590036768344206, "grad_norm": 0.12068627774715424, "learning_rate": 0.0005, "loss": 2.0935, "step": 199410 }, { "epoch": 0.7590417393025434, "grad_norm": 0.16103871166706085, "learning_rate": 0.0005, "loss": 2.1063, "step": 199420 }, { "epoch": 0.759079801770666, "grad_norm": 0.12145240604877472, "learning_rate": 0.0005, "loss": 2.1141, "step": 199430 }, { "epoch": 0.7591178642387887, "grad_norm": 0.1241162121295929, "learning_rate": 0.0005, "loss": 2.0901, "step": 199440 }, { "epoch": 0.7591559267069113, "grad_norm": 0.11487778276205063, "learning_rate": 0.0005, "loss": 2.1079, "step": 199450 }, { "epoch": 0.7591939891750341, "grad_norm": 0.12314852327108383, "learning_rate": 0.0005, "loss": 2.0937, "step": 199460 }, { "epoch": 0.7592320516431568, "grad_norm": 0.21512353420257568, "learning_rate": 0.0005, "loss": 2.1157, "step": 199470 }, { "epoch": 0.7592701141112794, "grad_norm": 0.1336342692375183, "learning_rate": 0.0005, "loss": 2.1115, "step": 199480 }, { "epoch": 0.7593081765794021, "grad_norm": 0.11381682753562927, "learning_rate": 0.0005, "loss": 2.0994, "step": 199490 }, { "epoch": 0.7593462390475249, "grad_norm": 0.11951464414596558, "learning_rate": 0.0005, "loss": 2.1109, "step": 199500 }, { "epoch": 0.7593843015156475, "grad_norm": 0.11122014373540878, "learning_rate": 0.0005, "loss": 2.1155, "step": 199510 }, { "epoch": 0.7594223639837702, "grad_norm": 0.11937428265810013, "learning_rate": 0.0005, "loss": 2.1051, "step": 199520 }, { "epoch": 0.7594604264518928, "grad_norm": 0.12456253916025162, "learning_rate": 0.0005, "loss": 2.1068, "step": 199530 }, { "epoch": 0.7594984889200155, "grad_norm": 0.11650332808494568, "learning_rate": 0.0005, "loss": 2.0945, "step": 199540 }, { "epoch": 0.7595365513881382, "grad_norm": 0.1122107282280922, "learning_rate": 0.0005, "loss": 2.1168, "step": 199550 }, { "epoch": 0.7595746138562609, "grad_norm": 0.12337896972894669, "learning_rate": 0.0005, "loss": 2.0962, "step": 199560 }, { "epoch": 0.7596126763243836, "grad_norm": 0.11624779552221298, "learning_rate": 0.0005, "loss": 2.1046, "step": 199570 }, { "epoch": 0.7596507387925062, "grad_norm": 0.13512980937957764, "learning_rate": 0.0005, "loss": 2.1128, "step": 199580 }, { "epoch": 0.759688801260629, "grad_norm": 0.13231684267520905, "learning_rate": 0.0005, "loss": 2.1065, "step": 199590 }, { "epoch": 0.7597268637287516, "grad_norm": 0.1199183389544487, "learning_rate": 0.0005, "loss": 2.1129, "step": 199600 }, { "epoch": 0.7597649261968743, "grad_norm": 0.12749294936656952, "learning_rate": 0.0005, "loss": 2.1108, "step": 199610 }, { "epoch": 0.759802988664997, "grad_norm": 0.13242657482624054, "learning_rate": 0.0005, "loss": 2.1013, "step": 199620 }, { "epoch": 0.7598410511331197, "grad_norm": 0.12247808277606964, "learning_rate": 0.0005, "loss": 2.1226, "step": 199630 }, { "epoch": 0.7598791136012424, "grad_norm": 0.1193656176328659, "learning_rate": 0.0005, "loss": 2.0984, "step": 199640 }, { "epoch": 0.759917176069365, "grad_norm": 0.12609358131885529, "learning_rate": 0.0005, "loss": 2.1056, "step": 199650 }, { "epoch": 0.7599552385374877, "grad_norm": 0.12350145727396011, "learning_rate": 0.0005, "loss": 2.1053, "step": 199660 }, { "epoch": 0.7599933010056104, "grad_norm": 0.1205834224820137, "learning_rate": 0.0005, "loss": 2.1156, "step": 199670 }, { "epoch": 0.7600313634737331, "grad_norm": 0.12084044516086578, "learning_rate": 0.0005, "loss": 2.1046, "step": 199680 }, { "epoch": 0.7600694259418558, "grad_norm": 0.12557877600193024, "learning_rate": 0.0005, "loss": 2.1193, "step": 199690 }, { "epoch": 0.7601074884099784, "grad_norm": 0.1353178471326828, "learning_rate": 0.0005, "loss": 2.1047, "step": 199700 }, { "epoch": 0.7601455508781011, "grad_norm": 0.12090218812227249, "learning_rate": 0.0005, "loss": 2.1, "step": 199710 }, { "epoch": 0.7601836133462239, "grad_norm": 0.13385021686553955, "learning_rate": 0.0005, "loss": 2.1001, "step": 199720 }, { "epoch": 0.7602216758143465, "grad_norm": 0.1355661153793335, "learning_rate": 0.0005, "loss": 2.0958, "step": 199730 }, { "epoch": 0.7602597382824692, "grad_norm": 0.1311371773481369, "learning_rate": 0.0005, "loss": 2.1114, "step": 199740 }, { "epoch": 0.7602978007505918, "grad_norm": 0.14127229154109955, "learning_rate": 0.0005, "loss": 2.1108, "step": 199750 }, { "epoch": 0.7603358632187146, "grad_norm": 0.12275160849094391, "learning_rate": 0.0005, "loss": 2.1129, "step": 199760 }, { "epoch": 0.7603739256868373, "grad_norm": 0.12980370223522186, "learning_rate": 0.0005, "loss": 2.113, "step": 199770 }, { "epoch": 0.7604119881549599, "grad_norm": 0.12649033963680267, "learning_rate": 0.0005, "loss": 2.1031, "step": 199780 }, { "epoch": 0.7604500506230826, "grad_norm": 0.12469767779111862, "learning_rate": 0.0005, "loss": 2.0962, "step": 199790 }, { "epoch": 0.7604881130912052, "grad_norm": 0.1242150142788887, "learning_rate": 0.0005, "loss": 2.1314, "step": 199800 }, { "epoch": 0.760526175559328, "grad_norm": 0.13924376666545868, "learning_rate": 0.0005, "loss": 2.1166, "step": 199810 }, { "epoch": 0.7605642380274507, "grad_norm": 0.14627453684806824, "learning_rate": 0.0005, "loss": 2.0973, "step": 199820 }, { "epoch": 0.7606023004955733, "grad_norm": 0.14123176038265228, "learning_rate": 0.0005, "loss": 2.1051, "step": 199830 }, { "epoch": 0.760640362963696, "grad_norm": 0.14151117205619812, "learning_rate": 0.0005, "loss": 2.1149, "step": 199840 }, { "epoch": 0.7606784254318187, "grad_norm": 0.12917184829711914, "learning_rate": 0.0005, "loss": 2.1178, "step": 199850 }, { "epoch": 0.7607164878999414, "grad_norm": 0.14700210094451904, "learning_rate": 0.0005, "loss": 2.1041, "step": 199860 }, { "epoch": 0.760754550368064, "grad_norm": 0.12420197576284409, "learning_rate": 0.0005, "loss": 2.1019, "step": 199870 }, { "epoch": 0.7607926128361867, "grad_norm": 0.12090126425027847, "learning_rate": 0.0005, "loss": 2.1061, "step": 199880 }, { "epoch": 0.7608306753043095, "grad_norm": 0.12003166973590851, "learning_rate": 0.0005, "loss": 2.1053, "step": 199890 }, { "epoch": 0.7608687377724321, "grad_norm": 0.11866210401058197, "learning_rate": 0.0005, "loss": 2.1169, "step": 199900 }, { "epoch": 0.7609068002405548, "grad_norm": 0.13188976049423218, "learning_rate": 0.0005, "loss": 2.0998, "step": 199910 }, { "epoch": 0.7609448627086774, "grad_norm": 0.12912617623806, "learning_rate": 0.0005, "loss": 2.1116, "step": 199920 }, { "epoch": 0.7609829251768002, "grad_norm": 0.14618924260139465, "learning_rate": 0.0005, "loss": 2.111, "step": 199930 }, { "epoch": 0.7610209876449229, "grad_norm": 0.12827840447425842, "learning_rate": 0.0005, "loss": 2.1032, "step": 199940 }, { "epoch": 0.7610590501130455, "grad_norm": 0.12038593739271164, "learning_rate": 0.0005, "loss": 2.107, "step": 199950 }, { "epoch": 0.7610971125811682, "grad_norm": 0.13553407788276672, "learning_rate": 0.0005, "loss": 2.0962, "step": 199960 }, { "epoch": 0.7611351750492908, "grad_norm": 0.12781314551830292, "learning_rate": 0.0005, "loss": 2.1178, "step": 199970 }, { "epoch": 0.7611732375174136, "grad_norm": 0.11799361556768417, "learning_rate": 0.0005, "loss": 2.1084, "step": 199980 }, { "epoch": 0.7612112999855363, "grad_norm": 0.12439846992492676, "learning_rate": 0.0005, "loss": 2.1148, "step": 199990 }, { "epoch": 0.7612493624536589, "grad_norm": 0.1184629574418068, "learning_rate": 0.0005, "loss": 2.1058, "step": 200000 }, { "epoch": 0.7612874249217816, "grad_norm": 0.12614157795906067, "learning_rate": 0.0005, "loss": 2.1094, "step": 200010 }, { "epoch": 0.7613254873899044, "grad_norm": 0.14780183136463165, "learning_rate": 0.0005, "loss": 2.0919, "step": 200020 }, { "epoch": 0.761363549858027, "grad_norm": 0.12393063306808472, "learning_rate": 0.0005, "loss": 2.1061, "step": 200030 }, { "epoch": 0.7614016123261497, "grad_norm": 0.11860448122024536, "learning_rate": 0.0005, "loss": 2.1023, "step": 200040 }, { "epoch": 0.7614396747942723, "grad_norm": 0.11324331164360046, "learning_rate": 0.0005, "loss": 2.0999, "step": 200050 }, { "epoch": 0.7614777372623951, "grad_norm": 0.12680920958518982, "learning_rate": 0.0005, "loss": 2.1124, "step": 200060 }, { "epoch": 0.7615157997305178, "grad_norm": 0.11807206273078918, "learning_rate": 0.0005, "loss": 2.101, "step": 200070 }, { "epoch": 0.7615538621986404, "grad_norm": 0.12784326076507568, "learning_rate": 0.0005, "loss": 2.1126, "step": 200080 }, { "epoch": 0.7615919246667631, "grad_norm": 0.12069597095251083, "learning_rate": 0.0005, "loss": 2.1191, "step": 200090 }, { "epoch": 0.7616299871348857, "grad_norm": 0.12362676113843918, "learning_rate": 0.0005, "loss": 2.1032, "step": 200100 }, { "epoch": 0.7616680496030085, "grad_norm": 0.11808501929044724, "learning_rate": 0.0005, "loss": 2.1098, "step": 200110 }, { "epoch": 0.7617061120711311, "grad_norm": 0.1286737024784088, "learning_rate": 0.0005, "loss": 2.0964, "step": 200120 }, { "epoch": 0.7617441745392538, "grad_norm": 0.1303025186061859, "learning_rate": 0.0005, "loss": 2.1031, "step": 200130 }, { "epoch": 0.7617822370073765, "grad_norm": 0.12651576101779938, "learning_rate": 0.0005, "loss": 2.0939, "step": 200140 }, { "epoch": 0.7618202994754992, "grad_norm": 0.1360395848751068, "learning_rate": 0.0005, "loss": 2.113, "step": 200150 }, { "epoch": 0.7618583619436219, "grad_norm": 0.11983554065227509, "learning_rate": 0.0005, "loss": 2.1298, "step": 200160 }, { "epoch": 0.7618964244117445, "grad_norm": 0.12265199422836304, "learning_rate": 0.0005, "loss": 2.1249, "step": 200170 }, { "epoch": 0.7619344868798672, "grad_norm": 0.12509949505329132, "learning_rate": 0.0005, "loss": 2.1171, "step": 200180 }, { "epoch": 0.76197254934799, "grad_norm": 0.11727918684482574, "learning_rate": 0.0005, "loss": 2.1039, "step": 200190 }, { "epoch": 0.7620106118161126, "grad_norm": 0.12236443907022476, "learning_rate": 0.0005, "loss": 2.1038, "step": 200200 }, { "epoch": 0.7620486742842353, "grad_norm": 0.11449822783470154, "learning_rate": 0.0005, "loss": 2.1033, "step": 200210 }, { "epoch": 0.7620867367523579, "grad_norm": 0.1347184032201767, "learning_rate": 0.0005, "loss": 2.1074, "step": 200220 }, { "epoch": 0.7621247992204806, "grad_norm": 0.12472956627607346, "learning_rate": 0.0005, "loss": 2.1097, "step": 200230 }, { "epoch": 0.7621628616886034, "grad_norm": 0.13330808281898499, "learning_rate": 0.0005, "loss": 2.1002, "step": 200240 }, { "epoch": 0.762200924156726, "grad_norm": 0.14842481911182404, "learning_rate": 0.0005, "loss": 2.1151, "step": 200250 }, { "epoch": 0.7622389866248487, "grad_norm": 0.12240416556596756, "learning_rate": 0.0005, "loss": 2.109, "step": 200260 }, { "epoch": 0.7622770490929713, "grad_norm": 0.12104672193527222, "learning_rate": 0.0005, "loss": 2.1092, "step": 200270 }, { "epoch": 0.7623151115610941, "grad_norm": 0.1383139193058014, "learning_rate": 0.0005, "loss": 2.1162, "step": 200280 }, { "epoch": 0.7623531740292168, "grad_norm": 0.12329646944999695, "learning_rate": 0.0005, "loss": 2.1168, "step": 200290 }, { "epoch": 0.7623912364973394, "grad_norm": 0.1255401223897934, "learning_rate": 0.0005, "loss": 2.0927, "step": 200300 }, { "epoch": 0.7624292989654621, "grad_norm": 0.12841983139514923, "learning_rate": 0.0005, "loss": 2.0941, "step": 200310 }, { "epoch": 0.7624673614335848, "grad_norm": 0.12036871910095215, "learning_rate": 0.0005, "loss": 2.1105, "step": 200320 }, { "epoch": 0.7625054239017075, "grad_norm": 0.11924967914819717, "learning_rate": 0.0005, "loss": 2.1089, "step": 200330 }, { "epoch": 0.7625434863698302, "grad_norm": 0.12155918776988983, "learning_rate": 0.0005, "loss": 2.1103, "step": 200340 }, { "epoch": 0.7625815488379528, "grad_norm": 0.11829260736703873, "learning_rate": 0.0005, "loss": 2.1019, "step": 200350 }, { "epoch": 0.7626196113060756, "grad_norm": 0.1354644000530243, "learning_rate": 0.0005, "loss": 2.1174, "step": 200360 }, { "epoch": 0.7626576737741982, "grad_norm": 0.1325652152299881, "learning_rate": 0.0005, "loss": 2.1152, "step": 200370 }, { "epoch": 0.7626957362423209, "grad_norm": 0.13303276896476746, "learning_rate": 0.0005, "loss": 2.1064, "step": 200380 }, { "epoch": 0.7627337987104436, "grad_norm": 0.11551357805728912, "learning_rate": 0.0005, "loss": 2.1103, "step": 200390 }, { "epoch": 0.7627718611785662, "grad_norm": 0.11996026337146759, "learning_rate": 0.0005, "loss": 2.1102, "step": 200400 }, { "epoch": 0.762809923646689, "grad_norm": 0.12938952445983887, "learning_rate": 0.0005, "loss": 2.1097, "step": 200410 }, { "epoch": 0.7628479861148116, "grad_norm": 0.13679099082946777, "learning_rate": 0.0005, "loss": 2.1092, "step": 200420 }, { "epoch": 0.7628860485829343, "grad_norm": 0.12176800519227982, "learning_rate": 0.0005, "loss": 2.1028, "step": 200430 }, { "epoch": 0.762924111051057, "grad_norm": 0.1698097288608551, "learning_rate": 0.0005, "loss": 2.0942, "step": 200440 }, { "epoch": 0.7629621735191797, "grad_norm": 0.11960088461637497, "learning_rate": 0.0005, "loss": 2.1108, "step": 200450 }, { "epoch": 0.7630002359873024, "grad_norm": 0.1185685396194458, "learning_rate": 0.0005, "loss": 2.0984, "step": 200460 }, { "epoch": 0.763038298455425, "grad_norm": 0.11566752195358276, "learning_rate": 0.0005, "loss": 2.1233, "step": 200470 }, { "epoch": 0.7630763609235477, "grad_norm": 0.12025351822376251, "learning_rate": 0.0005, "loss": 2.1089, "step": 200480 }, { "epoch": 0.7631144233916705, "grad_norm": 0.12671546638011932, "learning_rate": 0.0005, "loss": 2.1085, "step": 200490 }, { "epoch": 0.7631524858597931, "grad_norm": 0.1308915913105011, "learning_rate": 0.0005, "loss": 2.0939, "step": 200500 }, { "epoch": 0.7631905483279158, "grad_norm": 0.12134160101413727, "learning_rate": 0.0005, "loss": 2.0914, "step": 200510 }, { "epoch": 0.7632286107960384, "grad_norm": 0.11867324262857437, "learning_rate": 0.0005, "loss": 2.1218, "step": 200520 }, { "epoch": 0.7632666732641611, "grad_norm": 0.13097916543483734, "learning_rate": 0.0005, "loss": 2.1043, "step": 200530 }, { "epoch": 0.7633047357322839, "grad_norm": 0.14320102334022522, "learning_rate": 0.0005, "loss": 2.1088, "step": 200540 }, { "epoch": 0.7633427982004065, "grad_norm": 0.12619368731975555, "learning_rate": 0.0005, "loss": 2.1038, "step": 200550 }, { "epoch": 0.7633808606685292, "grad_norm": 0.1211523711681366, "learning_rate": 0.0005, "loss": 2.12, "step": 200560 }, { "epoch": 0.7634189231366518, "grad_norm": 0.12414732575416565, "learning_rate": 0.0005, "loss": 2.1117, "step": 200570 }, { "epoch": 0.7634569856047746, "grad_norm": 0.12435490638017654, "learning_rate": 0.0005, "loss": 2.109, "step": 200580 }, { "epoch": 0.7634950480728973, "grad_norm": 0.12518084049224854, "learning_rate": 0.0005, "loss": 2.1033, "step": 200590 }, { "epoch": 0.7635331105410199, "grad_norm": 0.1124156191945076, "learning_rate": 0.0005, "loss": 2.111, "step": 200600 }, { "epoch": 0.7635711730091426, "grad_norm": 0.12874804437160492, "learning_rate": 0.0005, "loss": 2.103, "step": 200610 }, { "epoch": 0.7636092354772653, "grad_norm": 0.13261353969573975, "learning_rate": 0.0005, "loss": 2.1216, "step": 200620 }, { "epoch": 0.763647297945388, "grad_norm": 0.11985619366168976, "learning_rate": 0.0005, "loss": 2.1076, "step": 200630 }, { "epoch": 0.7636853604135106, "grad_norm": 0.12247840315103531, "learning_rate": 0.0005, "loss": 2.1014, "step": 200640 }, { "epoch": 0.7637234228816333, "grad_norm": 0.1114402785897255, "learning_rate": 0.0005, "loss": 2.1108, "step": 200650 }, { "epoch": 0.763761485349756, "grad_norm": 0.1159055233001709, "learning_rate": 0.0005, "loss": 2.1006, "step": 200660 }, { "epoch": 0.7637995478178787, "grad_norm": 0.1403997242450714, "learning_rate": 0.0005, "loss": 2.1101, "step": 200670 }, { "epoch": 0.7638376102860014, "grad_norm": 0.12809248268604279, "learning_rate": 0.0005, "loss": 2.1151, "step": 200680 }, { "epoch": 0.763875672754124, "grad_norm": 0.11926622688770294, "learning_rate": 0.0005, "loss": 2.1021, "step": 200690 }, { "epoch": 0.7639137352222467, "grad_norm": 0.12996363639831543, "learning_rate": 0.0005, "loss": 2.1131, "step": 200700 }, { "epoch": 0.7639517976903695, "grad_norm": 0.12981660664081573, "learning_rate": 0.0005, "loss": 2.0952, "step": 200710 }, { "epoch": 0.7639898601584921, "grad_norm": 0.21764759719371796, "learning_rate": 0.0005, "loss": 2.1018, "step": 200720 }, { "epoch": 0.7640279226266148, "grad_norm": 0.21405267715454102, "learning_rate": 0.0005, "loss": 2.1045, "step": 200730 }, { "epoch": 0.7640659850947374, "grad_norm": 0.1331353485584259, "learning_rate": 0.0005, "loss": 2.1014, "step": 200740 }, { "epoch": 0.7641040475628602, "grad_norm": 0.12313000112771988, "learning_rate": 0.0005, "loss": 2.0937, "step": 200750 }, { "epoch": 0.7641421100309829, "grad_norm": 0.12090528011322021, "learning_rate": 0.0005, "loss": 2.1044, "step": 200760 }, { "epoch": 0.7641801724991055, "grad_norm": 0.12569141387939453, "learning_rate": 0.0005, "loss": 2.1093, "step": 200770 }, { "epoch": 0.7642182349672282, "grad_norm": 0.11512047052383423, "learning_rate": 0.0005, "loss": 2.103, "step": 200780 }, { "epoch": 0.764256297435351, "grad_norm": 0.13008204102516174, "learning_rate": 0.0005, "loss": 2.1059, "step": 200790 }, { "epoch": 0.7642943599034736, "grad_norm": 0.11905677616596222, "learning_rate": 0.0005, "loss": 2.1078, "step": 200800 }, { "epoch": 0.7643324223715963, "grad_norm": 0.12396584451198578, "learning_rate": 0.0005, "loss": 2.0924, "step": 200810 }, { "epoch": 0.7643704848397189, "grad_norm": 0.11942270398139954, "learning_rate": 0.0005, "loss": 2.0799, "step": 200820 }, { "epoch": 0.7644085473078416, "grad_norm": 0.11927188187837601, "learning_rate": 0.0005, "loss": 2.1281, "step": 200830 }, { "epoch": 0.7644466097759643, "grad_norm": 0.1283026933670044, "learning_rate": 0.0005, "loss": 2.1087, "step": 200840 }, { "epoch": 0.764484672244087, "grad_norm": 0.13145646452903748, "learning_rate": 0.0005, "loss": 2.0986, "step": 200850 }, { "epoch": 0.7645227347122097, "grad_norm": 0.12313584238290787, "learning_rate": 0.0005, "loss": 2.1137, "step": 200860 }, { "epoch": 0.7645607971803323, "grad_norm": 0.13153089582920074, "learning_rate": 0.0005, "loss": 2.107, "step": 200870 }, { "epoch": 0.7645988596484551, "grad_norm": 0.12019491195678711, "learning_rate": 0.0005, "loss": 2.0964, "step": 200880 }, { "epoch": 0.7646369221165777, "grad_norm": 0.12518441677093506, "learning_rate": 0.0005, "loss": 2.1079, "step": 200890 }, { "epoch": 0.7646749845847004, "grad_norm": 0.11960908025503159, "learning_rate": 0.0005, "loss": 2.0951, "step": 200900 }, { "epoch": 0.7647130470528231, "grad_norm": 0.1407465934753418, "learning_rate": 0.0005, "loss": 2.1137, "step": 200910 }, { "epoch": 0.7647511095209458, "grad_norm": 0.1314762830734253, "learning_rate": 0.0005, "loss": 2.1015, "step": 200920 }, { "epoch": 0.7647891719890685, "grad_norm": 0.11958575248718262, "learning_rate": 0.0005, "loss": 2.1064, "step": 200930 }, { "epoch": 0.7648272344571911, "grad_norm": 0.12189696729183197, "learning_rate": 0.0005, "loss": 2.1009, "step": 200940 }, { "epoch": 0.7648652969253138, "grad_norm": 0.15705542266368866, "learning_rate": 0.0005, "loss": 2.1095, "step": 200950 }, { "epoch": 0.7649033593934365, "grad_norm": 0.11517345160245895, "learning_rate": 0.0005, "loss": 2.0988, "step": 200960 }, { "epoch": 0.7649414218615592, "grad_norm": 0.13677245378494263, "learning_rate": 0.0005, "loss": 2.1189, "step": 200970 }, { "epoch": 0.7649794843296819, "grad_norm": 0.13079141080379486, "learning_rate": 0.0005, "loss": 2.1181, "step": 200980 }, { "epoch": 0.7650175467978045, "grad_norm": 0.12438200414180756, "learning_rate": 0.0005, "loss": 2.1206, "step": 200990 }, { "epoch": 0.7650556092659272, "grad_norm": 0.12100034207105637, "learning_rate": 0.0005, "loss": 2.109, "step": 201000 }, { "epoch": 0.76509367173405, "grad_norm": 0.1258230209350586, "learning_rate": 0.0005, "loss": 2.1065, "step": 201010 }, { "epoch": 0.7651317342021726, "grad_norm": 0.13739217817783356, "learning_rate": 0.0005, "loss": 2.1129, "step": 201020 }, { "epoch": 0.7651697966702953, "grad_norm": 0.12209373712539673, "learning_rate": 0.0005, "loss": 2.0926, "step": 201030 }, { "epoch": 0.7652078591384179, "grad_norm": 0.11340746283531189, "learning_rate": 0.0005, "loss": 2.1156, "step": 201040 }, { "epoch": 0.7652459216065407, "grad_norm": 0.14591138064861298, "learning_rate": 0.0005, "loss": 2.0934, "step": 201050 }, { "epoch": 0.7652839840746634, "grad_norm": 0.11925285309553146, "learning_rate": 0.0005, "loss": 2.1016, "step": 201060 }, { "epoch": 0.765322046542786, "grad_norm": 0.1297868937253952, "learning_rate": 0.0005, "loss": 2.1063, "step": 201070 }, { "epoch": 0.7653601090109087, "grad_norm": 0.12385216355323792, "learning_rate": 0.0005, "loss": 2.1, "step": 201080 }, { "epoch": 0.7653981714790314, "grad_norm": 0.12152455747127533, "learning_rate": 0.0005, "loss": 2.111, "step": 201090 }, { "epoch": 0.7654362339471541, "grad_norm": 0.12108520418405533, "learning_rate": 0.0005, "loss": 2.1002, "step": 201100 }, { "epoch": 0.7654742964152768, "grad_norm": 0.12145870923995972, "learning_rate": 0.0005, "loss": 2.108, "step": 201110 }, { "epoch": 0.7655123588833994, "grad_norm": 0.12569770216941833, "learning_rate": 0.0005, "loss": 2.1181, "step": 201120 }, { "epoch": 0.7655504213515221, "grad_norm": 0.12960554659366608, "learning_rate": 0.0005, "loss": 2.116, "step": 201130 }, { "epoch": 0.7655884838196448, "grad_norm": 0.1400604248046875, "learning_rate": 0.0005, "loss": 2.0998, "step": 201140 }, { "epoch": 0.7656265462877675, "grad_norm": 0.13274477422237396, "learning_rate": 0.0005, "loss": 2.0978, "step": 201150 }, { "epoch": 0.7656646087558902, "grad_norm": 0.11507438123226166, "learning_rate": 0.0005, "loss": 2.1268, "step": 201160 }, { "epoch": 0.7657026712240128, "grad_norm": 0.11901625990867615, "learning_rate": 0.0005, "loss": 2.105, "step": 201170 }, { "epoch": 0.7657407336921356, "grad_norm": 0.1272687017917633, "learning_rate": 0.0005, "loss": 2.1051, "step": 201180 }, { "epoch": 0.7657787961602582, "grad_norm": 0.13874635100364685, "learning_rate": 0.0005, "loss": 2.1062, "step": 201190 }, { "epoch": 0.7658168586283809, "grad_norm": 0.5305657982826233, "learning_rate": 0.0005, "loss": 2.1095, "step": 201200 }, { "epoch": 0.7658549210965035, "grad_norm": 0.1429230123758316, "learning_rate": 0.0005, "loss": 2.1133, "step": 201210 }, { "epoch": 0.7658929835646263, "grad_norm": 0.11990445107221603, "learning_rate": 0.0005, "loss": 2.1045, "step": 201220 }, { "epoch": 0.765931046032749, "grad_norm": 0.11194054782390594, "learning_rate": 0.0005, "loss": 2.1088, "step": 201230 }, { "epoch": 0.7659691085008716, "grad_norm": 0.12997521460056305, "learning_rate": 0.0005, "loss": 2.0912, "step": 201240 }, { "epoch": 0.7660071709689943, "grad_norm": 0.12883532047271729, "learning_rate": 0.0005, "loss": 2.1181, "step": 201250 }, { "epoch": 0.766045233437117, "grad_norm": 0.11726586520671844, "learning_rate": 0.0005, "loss": 2.1101, "step": 201260 }, { "epoch": 0.7660832959052397, "grad_norm": 0.1368391364812851, "learning_rate": 0.0005, "loss": 2.0987, "step": 201270 }, { "epoch": 0.7661213583733624, "grad_norm": 0.11998681724071503, "learning_rate": 0.0005, "loss": 2.1023, "step": 201280 }, { "epoch": 0.766159420841485, "grad_norm": 0.13713191449642181, "learning_rate": 0.0005, "loss": 2.1088, "step": 201290 }, { "epoch": 0.7661974833096077, "grad_norm": 0.14142447710037231, "learning_rate": 0.0005, "loss": 2.0997, "step": 201300 }, { "epoch": 0.7662355457777305, "grad_norm": 0.12667404115200043, "learning_rate": 0.0005, "loss": 2.1015, "step": 201310 }, { "epoch": 0.7662736082458531, "grad_norm": 0.11173027753829956, "learning_rate": 0.0005, "loss": 2.0948, "step": 201320 }, { "epoch": 0.7663116707139758, "grad_norm": 0.1272125542163849, "learning_rate": 0.0005, "loss": 2.106, "step": 201330 }, { "epoch": 0.7663497331820984, "grad_norm": 0.11534397304058075, "learning_rate": 0.0005, "loss": 2.1017, "step": 201340 }, { "epoch": 0.7663877956502212, "grad_norm": 0.12607638537883759, "learning_rate": 0.0005, "loss": 2.1095, "step": 201350 }, { "epoch": 0.7664258581183439, "grad_norm": 0.13547714054584503, "learning_rate": 0.0005, "loss": 2.1267, "step": 201360 }, { "epoch": 0.7664639205864665, "grad_norm": 0.13281914591789246, "learning_rate": 0.0005, "loss": 2.0975, "step": 201370 }, { "epoch": 0.7665019830545892, "grad_norm": 0.15444037318229675, "learning_rate": 0.0005, "loss": 2.1139, "step": 201380 }, { "epoch": 0.7665400455227118, "grad_norm": 0.14034345746040344, "learning_rate": 0.0005, "loss": 2.1115, "step": 201390 }, { "epoch": 0.7665781079908346, "grad_norm": 0.3182571530342102, "learning_rate": 0.0005, "loss": 2.1095, "step": 201400 }, { "epoch": 0.7666161704589572, "grad_norm": 0.1384763866662979, "learning_rate": 0.0005, "loss": 2.1098, "step": 201410 }, { "epoch": 0.7666542329270799, "grad_norm": 0.12868386507034302, "learning_rate": 0.0005, "loss": 2.115, "step": 201420 }, { "epoch": 0.7666922953952026, "grad_norm": 0.12463357299566269, "learning_rate": 0.0005, "loss": 2.1101, "step": 201430 }, { "epoch": 0.7667303578633253, "grad_norm": 0.12342123687267303, "learning_rate": 0.0005, "loss": 2.1095, "step": 201440 }, { "epoch": 0.766768420331448, "grad_norm": 0.13239067792892456, "learning_rate": 0.0005, "loss": 2.0958, "step": 201450 }, { "epoch": 0.7668064827995706, "grad_norm": 0.11232977360486984, "learning_rate": 0.0005, "loss": 2.1039, "step": 201460 }, { "epoch": 0.7668445452676933, "grad_norm": 0.1485995352268219, "learning_rate": 0.0005, "loss": 2.1093, "step": 201470 }, { "epoch": 0.7668826077358161, "grad_norm": 0.12763135135173798, "learning_rate": 0.0005, "loss": 2.1081, "step": 201480 }, { "epoch": 0.7669206702039387, "grad_norm": 0.13601966202259064, "learning_rate": 0.0005, "loss": 2.1077, "step": 201490 }, { "epoch": 0.7669587326720614, "grad_norm": 0.15210683643817902, "learning_rate": 0.0005, "loss": 2.1141, "step": 201500 }, { "epoch": 0.766996795140184, "grad_norm": 0.12328286468982697, "learning_rate": 0.0005, "loss": 2.1209, "step": 201510 }, { "epoch": 0.7670348576083068, "grad_norm": 0.12505197525024414, "learning_rate": 0.0005, "loss": 2.1052, "step": 201520 }, { "epoch": 0.7670729200764295, "grad_norm": 0.1358686238527298, "learning_rate": 0.0005, "loss": 2.1115, "step": 201530 }, { "epoch": 0.7671109825445521, "grad_norm": 0.13519304990768433, "learning_rate": 0.0005, "loss": 2.1004, "step": 201540 }, { "epoch": 0.7671490450126748, "grad_norm": 0.11909667402505875, "learning_rate": 0.0005, "loss": 2.0925, "step": 201550 }, { "epoch": 0.7671871074807974, "grad_norm": 0.12398222088813782, "learning_rate": 0.0005, "loss": 2.0942, "step": 201560 }, { "epoch": 0.7672251699489202, "grad_norm": 0.11791833490133286, "learning_rate": 0.0005, "loss": 2.1267, "step": 201570 }, { "epoch": 0.7672632324170429, "grad_norm": 0.12576699256896973, "learning_rate": 0.0005, "loss": 2.099, "step": 201580 }, { "epoch": 0.7673012948851655, "grad_norm": 0.12361087650060654, "learning_rate": 0.0005, "loss": 2.1091, "step": 201590 }, { "epoch": 0.7673393573532882, "grad_norm": 0.24520492553710938, "learning_rate": 0.0005, "loss": 2.1159, "step": 201600 }, { "epoch": 0.7673774198214109, "grad_norm": 0.12726250290870667, "learning_rate": 0.0005, "loss": 2.1119, "step": 201610 }, { "epoch": 0.7674154822895336, "grad_norm": 0.12805522978305817, "learning_rate": 0.0005, "loss": 2.1055, "step": 201620 }, { "epoch": 0.7674535447576563, "grad_norm": 0.24818246066570282, "learning_rate": 0.0005, "loss": 2.1014, "step": 201630 }, { "epoch": 0.7674916072257789, "grad_norm": 0.11738839745521545, "learning_rate": 0.0005, "loss": 2.0948, "step": 201640 }, { "epoch": 0.7675296696939017, "grad_norm": 0.1346309930086136, "learning_rate": 0.0005, "loss": 2.0941, "step": 201650 }, { "epoch": 0.7675677321620243, "grad_norm": 0.11849494278430939, "learning_rate": 0.0005, "loss": 2.0968, "step": 201660 }, { "epoch": 0.767605794630147, "grad_norm": 0.12269473075866699, "learning_rate": 0.0005, "loss": 2.0933, "step": 201670 }, { "epoch": 0.7676438570982697, "grad_norm": 0.13212372362613678, "learning_rate": 0.0005, "loss": 2.099, "step": 201680 }, { "epoch": 0.7676819195663923, "grad_norm": 0.1193777471780777, "learning_rate": 0.0005, "loss": 2.0979, "step": 201690 }, { "epoch": 0.7677199820345151, "grad_norm": 0.1222861036658287, "learning_rate": 0.0005, "loss": 2.1194, "step": 201700 }, { "epoch": 0.7677580445026377, "grad_norm": 0.13242977857589722, "learning_rate": 0.0005, "loss": 2.1069, "step": 201710 }, { "epoch": 0.7677961069707604, "grad_norm": 0.12794439494609833, "learning_rate": 0.0005, "loss": 2.1169, "step": 201720 }, { "epoch": 0.767834169438883, "grad_norm": 0.2587161064147949, "learning_rate": 0.0005, "loss": 2.0911, "step": 201730 }, { "epoch": 0.7678722319070058, "grad_norm": 0.12928904592990875, "learning_rate": 0.0005, "loss": 2.1073, "step": 201740 }, { "epoch": 0.7679102943751285, "grad_norm": 0.15386945009231567, "learning_rate": 0.0005, "loss": 2.1125, "step": 201750 }, { "epoch": 0.7679483568432511, "grad_norm": 0.13748303055763245, "learning_rate": 0.0005, "loss": 2.1024, "step": 201760 }, { "epoch": 0.7679864193113738, "grad_norm": 0.12886695563793182, "learning_rate": 0.0005, "loss": 2.1009, "step": 201770 }, { "epoch": 0.7680244817794966, "grad_norm": 0.12823881208896637, "learning_rate": 0.0005, "loss": 2.1299, "step": 201780 }, { "epoch": 0.7680625442476192, "grad_norm": 0.11645886301994324, "learning_rate": 0.0005, "loss": 2.1088, "step": 201790 }, { "epoch": 0.7681006067157419, "grad_norm": 0.12088745087385178, "learning_rate": 0.0005, "loss": 2.1165, "step": 201800 }, { "epoch": 0.7681386691838645, "grad_norm": 0.1247321143746376, "learning_rate": 0.0005, "loss": 2.1041, "step": 201810 }, { "epoch": 0.7681767316519872, "grad_norm": 0.21556483209133148, "learning_rate": 0.0005, "loss": 2.0999, "step": 201820 }, { "epoch": 0.76821479412011, "grad_norm": 0.15077947080135345, "learning_rate": 0.0005, "loss": 2.1107, "step": 201830 }, { "epoch": 0.7682528565882326, "grad_norm": 0.14259713888168335, "learning_rate": 0.0005, "loss": 2.1086, "step": 201840 }, { "epoch": 0.7682909190563553, "grad_norm": 0.12281884253025055, "learning_rate": 0.0005, "loss": 2.099, "step": 201850 }, { "epoch": 0.7683289815244779, "grad_norm": 0.1274409294128418, "learning_rate": 0.0005, "loss": 2.1116, "step": 201860 }, { "epoch": 0.7683670439926007, "grad_norm": 0.13390718400478363, "learning_rate": 0.0005, "loss": 2.099, "step": 201870 }, { "epoch": 0.7684051064607234, "grad_norm": 0.12256969511508942, "learning_rate": 0.0005, "loss": 2.1121, "step": 201880 }, { "epoch": 0.768443168928846, "grad_norm": 0.13756145536899567, "learning_rate": 0.0005, "loss": 2.1043, "step": 201890 }, { "epoch": 0.7684812313969687, "grad_norm": 0.12480933964252472, "learning_rate": 0.0005, "loss": 2.1207, "step": 201900 }, { "epoch": 0.7685192938650914, "grad_norm": 0.11438208073377609, "learning_rate": 0.0005, "loss": 2.1081, "step": 201910 }, { "epoch": 0.7685573563332141, "grad_norm": 0.12336448580026627, "learning_rate": 0.0005, "loss": 2.1168, "step": 201920 }, { "epoch": 0.7685954188013367, "grad_norm": 0.12992775440216064, "learning_rate": 0.0005, "loss": 2.113, "step": 201930 }, { "epoch": 0.7686334812694594, "grad_norm": 0.11728628724813461, "learning_rate": 0.0005, "loss": 2.1134, "step": 201940 }, { "epoch": 0.7686715437375822, "grad_norm": 0.12237447500228882, "learning_rate": 0.0005, "loss": 2.1213, "step": 201950 }, { "epoch": 0.7687096062057048, "grad_norm": 0.11688285320997238, "learning_rate": 0.0005, "loss": 2.1229, "step": 201960 }, { "epoch": 0.7687476686738275, "grad_norm": 0.1265353411436081, "learning_rate": 0.0005, "loss": 2.0984, "step": 201970 }, { "epoch": 0.7687857311419501, "grad_norm": 0.11828229576349258, "learning_rate": 0.0005, "loss": 2.1039, "step": 201980 }, { "epoch": 0.7688237936100728, "grad_norm": 0.1361830234527588, "learning_rate": 0.0005, "loss": 2.11, "step": 201990 }, { "epoch": 0.7688618560781956, "grad_norm": 0.12680485844612122, "learning_rate": 0.0005, "loss": 2.1065, "step": 202000 }, { "epoch": 0.7688999185463182, "grad_norm": 0.12592792510986328, "learning_rate": 0.0005, "loss": 2.1091, "step": 202010 }, { "epoch": 0.7689379810144409, "grad_norm": 0.1196167916059494, "learning_rate": 0.0005, "loss": 2.102, "step": 202020 }, { "epoch": 0.7689760434825635, "grad_norm": 0.11954740434885025, "learning_rate": 0.0005, "loss": 2.0975, "step": 202030 }, { "epoch": 0.7690141059506863, "grad_norm": 0.13095378875732422, "learning_rate": 0.0005, "loss": 2.0931, "step": 202040 }, { "epoch": 0.769052168418809, "grad_norm": 0.13523481786251068, "learning_rate": 0.0005, "loss": 2.098, "step": 202050 }, { "epoch": 0.7690902308869316, "grad_norm": 0.14069612324237823, "learning_rate": 0.0005, "loss": 2.1131, "step": 202060 }, { "epoch": 0.7691282933550543, "grad_norm": 0.11833822727203369, "learning_rate": 0.0005, "loss": 2.1175, "step": 202070 }, { "epoch": 0.769166355823177, "grad_norm": 0.13712801039218903, "learning_rate": 0.0005, "loss": 2.0987, "step": 202080 }, { "epoch": 0.7692044182912997, "grad_norm": 0.13787589967250824, "learning_rate": 0.0005, "loss": 2.1092, "step": 202090 }, { "epoch": 0.7692424807594224, "grad_norm": 0.1386062055826187, "learning_rate": 0.0005, "loss": 2.1225, "step": 202100 }, { "epoch": 0.769280543227545, "grad_norm": 0.12532974779605865, "learning_rate": 0.0005, "loss": 2.1132, "step": 202110 }, { "epoch": 0.7693186056956677, "grad_norm": 0.12713083624839783, "learning_rate": 0.0005, "loss": 2.0961, "step": 202120 }, { "epoch": 0.7693566681637904, "grad_norm": 0.12442374974489212, "learning_rate": 0.0005, "loss": 2.1098, "step": 202130 }, { "epoch": 0.7693947306319131, "grad_norm": 0.12369605898857117, "learning_rate": 0.0005, "loss": 2.0881, "step": 202140 }, { "epoch": 0.7694327931000358, "grad_norm": 0.12773537635803223, "learning_rate": 0.0005, "loss": 2.1001, "step": 202150 }, { "epoch": 0.7694708555681584, "grad_norm": 0.13192373514175415, "learning_rate": 0.0005, "loss": 2.1077, "step": 202160 }, { "epoch": 0.7695089180362812, "grad_norm": 0.1284187138080597, "learning_rate": 0.0005, "loss": 2.0935, "step": 202170 }, { "epoch": 0.7695469805044038, "grad_norm": 0.12153860926628113, "learning_rate": 0.0005, "loss": 2.0988, "step": 202180 }, { "epoch": 0.7695850429725265, "grad_norm": 0.1268356591463089, "learning_rate": 0.0005, "loss": 2.1022, "step": 202190 }, { "epoch": 0.7696231054406492, "grad_norm": 0.12742312252521515, "learning_rate": 0.0005, "loss": 2.1112, "step": 202200 }, { "epoch": 0.7696611679087719, "grad_norm": 0.11673905700445175, "learning_rate": 0.0005, "loss": 2.0851, "step": 202210 }, { "epoch": 0.7696992303768946, "grad_norm": 0.11608495563268661, "learning_rate": 0.0005, "loss": 2.1153, "step": 202220 }, { "epoch": 0.7697372928450172, "grad_norm": 0.12505364418029785, "learning_rate": 0.0005, "loss": 2.1149, "step": 202230 }, { "epoch": 0.7697753553131399, "grad_norm": 0.11059442907571793, "learning_rate": 0.0005, "loss": 2.1103, "step": 202240 }, { "epoch": 0.7698134177812626, "grad_norm": 0.13075938820838928, "learning_rate": 0.0005, "loss": 2.0916, "step": 202250 }, { "epoch": 0.7698514802493853, "grad_norm": 0.13375014066696167, "learning_rate": 0.0005, "loss": 2.102, "step": 202260 }, { "epoch": 0.769889542717508, "grad_norm": 0.1336306482553482, "learning_rate": 0.0005, "loss": 2.1078, "step": 202270 }, { "epoch": 0.7699276051856306, "grad_norm": 0.1256602555513382, "learning_rate": 0.0005, "loss": 2.1124, "step": 202280 }, { "epoch": 0.7699656676537533, "grad_norm": 0.1295190006494522, "learning_rate": 0.0005, "loss": 2.0898, "step": 202290 }, { "epoch": 0.7700037301218761, "grad_norm": 0.13925063610076904, "learning_rate": 0.0005, "loss": 2.1136, "step": 202300 }, { "epoch": 0.7700417925899987, "grad_norm": 0.13413967192173004, "learning_rate": 0.0005, "loss": 2.1068, "step": 202310 }, { "epoch": 0.7700798550581214, "grad_norm": 0.11593659222126007, "learning_rate": 0.0005, "loss": 2.1, "step": 202320 }, { "epoch": 0.770117917526244, "grad_norm": 0.12970569729804993, "learning_rate": 0.0005, "loss": 2.1156, "step": 202330 }, { "epoch": 0.7701559799943668, "grad_norm": 0.12410783767700195, "learning_rate": 0.0005, "loss": 2.0962, "step": 202340 }, { "epoch": 0.7701940424624895, "grad_norm": 0.1210460290312767, "learning_rate": 0.0005, "loss": 2.1021, "step": 202350 }, { "epoch": 0.7702321049306121, "grad_norm": 0.13017313182353973, "learning_rate": 0.0005, "loss": 2.11, "step": 202360 }, { "epoch": 0.7702701673987348, "grad_norm": 0.1134258434176445, "learning_rate": 0.0005, "loss": 2.1189, "step": 202370 }, { "epoch": 0.7703082298668575, "grad_norm": 0.13476477563381195, "learning_rate": 0.0005, "loss": 2.1003, "step": 202380 }, { "epoch": 0.7703462923349802, "grad_norm": 0.11765899509191513, "learning_rate": 0.0005, "loss": 2.0993, "step": 202390 }, { "epoch": 0.7703843548031029, "grad_norm": 0.12300023436546326, "learning_rate": 0.0005, "loss": 2.1077, "step": 202400 }, { "epoch": 0.7704224172712255, "grad_norm": 0.14942879974842072, "learning_rate": 0.0005, "loss": 2.1169, "step": 202410 }, { "epoch": 0.7704604797393482, "grad_norm": 0.12374628335237503, "learning_rate": 0.0005, "loss": 2.1059, "step": 202420 }, { "epoch": 0.7704985422074709, "grad_norm": 0.12553882598876953, "learning_rate": 0.0005, "loss": 2.1061, "step": 202430 }, { "epoch": 0.7705366046755936, "grad_norm": 0.13185171782970428, "learning_rate": 0.0005, "loss": 2.1112, "step": 202440 }, { "epoch": 0.7705746671437163, "grad_norm": 0.12644770741462708, "learning_rate": 0.0005, "loss": 2.1045, "step": 202450 }, { "epoch": 0.7706127296118389, "grad_norm": 0.13731904327869415, "learning_rate": 0.0005, "loss": 2.113, "step": 202460 }, { "epoch": 0.7706507920799617, "grad_norm": 0.1281971037387848, "learning_rate": 0.0005, "loss": 2.1106, "step": 202470 }, { "epoch": 0.7706888545480843, "grad_norm": 0.12450539320707321, "learning_rate": 0.0005, "loss": 2.105, "step": 202480 }, { "epoch": 0.770726917016207, "grad_norm": 0.11814303696155548, "learning_rate": 0.0005, "loss": 2.1049, "step": 202490 }, { "epoch": 0.7707649794843296, "grad_norm": 0.13239553570747375, "learning_rate": 0.0005, "loss": 2.1027, "step": 202500 }, { "epoch": 0.7708030419524524, "grad_norm": 0.11851152032613754, "learning_rate": 0.0005, "loss": 2.1045, "step": 202510 }, { "epoch": 0.7708411044205751, "grad_norm": 0.1262698620557785, "learning_rate": 0.0005, "loss": 2.1019, "step": 202520 }, { "epoch": 0.7708791668886977, "grad_norm": 0.13262896239757538, "learning_rate": 0.0005, "loss": 2.0998, "step": 202530 }, { "epoch": 0.7709172293568204, "grad_norm": 0.13419535756111145, "learning_rate": 0.0005, "loss": 2.1183, "step": 202540 }, { "epoch": 0.770955291824943, "grad_norm": 0.13374435901641846, "learning_rate": 0.0005, "loss": 2.1263, "step": 202550 }, { "epoch": 0.7709933542930658, "grad_norm": 0.1315140277147293, "learning_rate": 0.0005, "loss": 2.1118, "step": 202560 }, { "epoch": 0.7710314167611885, "grad_norm": 0.13533872365951538, "learning_rate": 0.0005, "loss": 2.1153, "step": 202570 }, { "epoch": 0.7710694792293111, "grad_norm": 0.12378238141536713, "learning_rate": 0.0005, "loss": 2.1044, "step": 202580 }, { "epoch": 0.7711075416974338, "grad_norm": 0.1197347342967987, "learning_rate": 0.0005, "loss": 2.1072, "step": 202590 }, { "epoch": 0.7711456041655566, "grad_norm": 0.13101764023303986, "learning_rate": 0.0005, "loss": 2.112, "step": 202600 }, { "epoch": 0.7711836666336792, "grad_norm": 0.1293804794549942, "learning_rate": 0.0005, "loss": 2.093, "step": 202610 }, { "epoch": 0.7712217291018019, "grad_norm": 0.12614557147026062, "learning_rate": 0.0005, "loss": 2.099, "step": 202620 }, { "epoch": 0.7712597915699245, "grad_norm": 0.1223040446639061, "learning_rate": 0.0005, "loss": 2.1068, "step": 202630 }, { "epoch": 0.7712978540380473, "grad_norm": 0.11462346464395523, "learning_rate": 0.0005, "loss": 2.1084, "step": 202640 }, { "epoch": 0.77133591650617, "grad_norm": 0.13106228411197662, "learning_rate": 0.0005, "loss": 2.1035, "step": 202650 }, { "epoch": 0.7713739789742926, "grad_norm": 0.12828728556632996, "learning_rate": 0.0005, "loss": 2.1022, "step": 202660 }, { "epoch": 0.7714120414424153, "grad_norm": 0.13604027032852173, "learning_rate": 0.0005, "loss": 2.0991, "step": 202670 }, { "epoch": 0.7714501039105379, "grad_norm": 0.11754266917705536, "learning_rate": 0.0005, "loss": 2.0876, "step": 202680 }, { "epoch": 0.7714881663786607, "grad_norm": 0.1196461096405983, "learning_rate": 0.0005, "loss": 2.1088, "step": 202690 }, { "epoch": 0.7715262288467833, "grad_norm": 0.14312899112701416, "learning_rate": 0.0005, "loss": 2.1072, "step": 202700 }, { "epoch": 0.771564291314906, "grad_norm": 0.11657819151878357, "learning_rate": 0.0005, "loss": 2.1035, "step": 202710 }, { "epoch": 0.7716023537830287, "grad_norm": 0.12473199516534805, "learning_rate": 0.0005, "loss": 2.1112, "step": 202720 }, { "epoch": 0.7716404162511514, "grad_norm": 0.14507940411567688, "learning_rate": 0.0005, "loss": 2.099, "step": 202730 }, { "epoch": 0.7716784787192741, "grad_norm": 0.12810270488262177, "learning_rate": 0.0005, "loss": 2.1261, "step": 202740 }, { "epoch": 0.7717165411873967, "grad_norm": 0.12481430917978287, "learning_rate": 0.0005, "loss": 2.118, "step": 202750 }, { "epoch": 0.7717546036555194, "grad_norm": 0.13375705480575562, "learning_rate": 0.0005, "loss": 2.1056, "step": 202760 }, { "epoch": 0.7717926661236422, "grad_norm": 0.14170774817466736, "learning_rate": 0.0005, "loss": 2.1006, "step": 202770 }, { "epoch": 0.7718307285917648, "grad_norm": 0.1388128399848938, "learning_rate": 0.0005, "loss": 2.1088, "step": 202780 }, { "epoch": 0.7718687910598875, "grad_norm": 0.12213142216205597, "learning_rate": 0.0005, "loss": 2.1093, "step": 202790 }, { "epoch": 0.7719068535280101, "grad_norm": 0.13339495658874512, "learning_rate": 0.0005, "loss": 2.1105, "step": 202800 }, { "epoch": 0.7719449159961329, "grad_norm": 0.11571691185235977, "learning_rate": 0.0005, "loss": 2.0938, "step": 202810 }, { "epoch": 0.7719829784642556, "grad_norm": 0.13037385046482086, "learning_rate": 0.0005, "loss": 2.108, "step": 202820 }, { "epoch": 0.7720210409323782, "grad_norm": 0.11974932998418808, "learning_rate": 0.0005, "loss": 2.1044, "step": 202830 }, { "epoch": 0.7720591034005009, "grad_norm": 0.115937739610672, "learning_rate": 0.0005, "loss": 2.1114, "step": 202840 }, { "epoch": 0.7720971658686235, "grad_norm": 0.12142065167427063, "learning_rate": 0.0005, "loss": 2.1093, "step": 202850 }, { "epoch": 0.7721352283367463, "grad_norm": 0.13116484880447388, "learning_rate": 0.0005, "loss": 2.1004, "step": 202860 }, { "epoch": 0.772173290804869, "grad_norm": 0.11825598776340485, "learning_rate": 0.0005, "loss": 2.116, "step": 202870 }, { "epoch": 0.7722113532729916, "grad_norm": 0.11560803651809692, "learning_rate": 0.0005, "loss": 2.1147, "step": 202880 }, { "epoch": 0.7722494157411143, "grad_norm": 0.12228305637836456, "learning_rate": 0.0005, "loss": 2.1101, "step": 202890 }, { "epoch": 0.772287478209237, "grad_norm": 0.1562349647283554, "learning_rate": 0.0005, "loss": 2.0923, "step": 202900 }, { "epoch": 0.7723255406773597, "grad_norm": 0.12264727801084518, "learning_rate": 0.0005, "loss": 2.0976, "step": 202910 }, { "epoch": 0.7723636031454824, "grad_norm": 0.13685239851474762, "learning_rate": 0.0005, "loss": 2.101, "step": 202920 }, { "epoch": 0.772401665613605, "grad_norm": 0.1482173651456833, "learning_rate": 0.0005, "loss": 2.1207, "step": 202930 }, { "epoch": 0.7724397280817278, "grad_norm": 0.11645165830850601, "learning_rate": 0.0005, "loss": 2.0975, "step": 202940 }, { "epoch": 0.7724777905498504, "grad_norm": 0.12072696536779404, "learning_rate": 0.0005, "loss": 2.0881, "step": 202950 }, { "epoch": 0.7725158530179731, "grad_norm": 0.13543497025966644, "learning_rate": 0.0005, "loss": 2.1097, "step": 202960 }, { "epoch": 0.7725539154860958, "grad_norm": 0.1298336237668991, "learning_rate": 0.0005, "loss": 2.0991, "step": 202970 }, { "epoch": 0.7725919779542184, "grad_norm": 0.14146368205547333, "learning_rate": 0.0005, "loss": 2.1032, "step": 202980 }, { "epoch": 0.7726300404223412, "grad_norm": 0.1257074922323227, "learning_rate": 0.0005, "loss": 2.1029, "step": 202990 }, { "epoch": 0.7726681028904638, "grad_norm": 0.14784252643585205, "learning_rate": 0.0005, "loss": 2.1157, "step": 203000 }, { "epoch": 0.7727061653585865, "grad_norm": 0.11903432011604309, "learning_rate": 0.0005, "loss": 2.1056, "step": 203010 }, { "epoch": 0.7727442278267092, "grad_norm": 0.12048203498125076, "learning_rate": 0.0005, "loss": 2.1156, "step": 203020 }, { "epoch": 0.7727822902948319, "grad_norm": 0.12632669508457184, "learning_rate": 0.0005, "loss": 2.1034, "step": 203030 }, { "epoch": 0.7728203527629546, "grad_norm": 0.11561502516269684, "learning_rate": 0.0005, "loss": 2.0985, "step": 203040 }, { "epoch": 0.7728584152310772, "grad_norm": 0.12799832224845886, "learning_rate": 0.0005, "loss": 2.0985, "step": 203050 }, { "epoch": 0.7728964776991999, "grad_norm": 0.13653217256069183, "learning_rate": 0.0005, "loss": 2.1134, "step": 203060 }, { "epoch": 0.7729345401673227, "grad_norm": 0.12045499682426453, "learning_rate": 0.0005, "loss": 2.1048, "step": 203070 }, { "epoch": 0.7729726026354453, "grad_norm": 0.12969614565372467, "learning_rate": 0.0005, "loss": 2.1105, "step": 203080 }, { "epoch": 0.773010665103568, "grad_norm": 0.12215245515108109, "learning_rate": 0.0005, "loss": 2.102, "step": 203090 }, { "epoch": 0.7730487275716906, "grad_norm": 0.13063253462314606, "learning_rate": 0.0005, "loss": 2.111, "step": 203100 }, { "epoch": 0.7730867900398133, "grad_norm": 0.12350734323263168, "learning_rate": 0.0005, "loss": 2.1029, "step": 203110 }, { "epoch": 0.7731248525079361, "grad_norm": 0.12446156144142151, "learning_rate": 0.0005, "loss": 2.1102, "step": 203120 }, { "epoch": 0.7731629149760587, "grad_norm": 0.12515167891979218, "learning_rate": 0.0005, "loss": 2.1161, "step": 203130 }, { "epoch": 0.7732009774441814, "grad_norm": 0.11699115484952927, "learning_rate": 0.0005, "loss": 2.0834, "step": 203140 }, { "epoch": 0.773239039912304, "grad_norm": 0.13818705081939697, "learning_rate": 0.0005, "loss": 2.1008, "step": 203150 }, { "epoch": 0.7732771023804268, "grad_norm": 0.12045501172542572, "learning_rate": 0.0005, "loss": 2.1127, "step": 203160 }, { "epoch": 0.7733151648485495, "grad_norm": 0.13033844530582428, "learning_rate": 0.0005, "loss": 2.113, "step": 203170 }, { "epoch": 0.7733532273166721, "grad_norm": 0.1280549317598343, "learning_rate": 0.0005, "loss": 2.1209, "step": 203180 }, { "epoch": 0.7733912897847948, "grad_norm": 0.11952662467956543, "learning_rate": 0.0005, "loss": 2.1019, "step": 203190 }, { "epoch": 0.7734293522529175, "grad_norm": 0.12275929749011993, "learning_rate": 0.0005, "loss": 2.1135, "step": 203200 }, { "epoch": 0.7734674147210402, "grad_norm": 0.12167345732450485, "learning_rate": 0.0005, "loss": 2.1058, "step": 203210 }, { "epoch": 0.7735054771891628, "grad_norm": 0.13266120851039886, "learning_rate": 0.0005, "loss": 2.1138, "step": 203220 }, { "epoch": 0.7735435396572855, "grad_norm": 0.1521034985780716, "learning_rate": 0.0005, "loss": 2.1092, "step": 203230 }, { "epoch": 0.7735816021254083, "grad_norm": 0.12634935975074768, "learning_rate": 0.0005, "loss": 2.1115, "step": 203240 }, { "epoch": 0.7736196645935309, "grad_norm": 0.11577683687210083, "learning_rate": 0.0005, "loss": 2.1308, "step": 203250 }, { "epoch": 0.7736577270616536, "grad_norm": 0.12791483104228973, "learning_rate": 0.0005, "loss": 2.1032, "step": 203260 }, { "epoch": 0.7736957895297762, "grad_norm": 0.12884661555290222, "learning_rate": 0.0005, "loss": 2.1023, "step": 203270 }, { "epoch": 0.7737338519978989, "grad_norm": 0.12367019057273865, "learning_rate": 0.0005, "loss": 2.1113, "step": 203280 }, { "epoch": 0.7737719144660217, "grad_norm": 0.14287038147449493, "learning_rate": 0.0005, "loss": 2.1044, "step": 203290 }, { "epoch": 0.7738099769341443, "grad_norm": 0.12698234617710114, "learning_rate": 0.0005, "loss": 2.0931, "step": 203300 }, { "epoch": 0.773848039402267, "grad_norm": 0.13310617208480835, "learning_rate": 0.0005, "loss": 2.108, "step": 203310 }, { "epoch": 0.7738861018703896, "grad_norm": 0.12289171665906906, "learning_rate": 0.0005, "loss": 2.1221, "step": 203320 }, { "epoch": 0.7739241643385124, "grad_norm": 0.13482849299907684, "learning_rate": 0.0005, "loss": 2.1094, "step": 203330 }, { "epoch": 0.7739622268066351, "grad_norm": 0.12322328984737396, "learning_rate": 0.0005, "loss": 2.1073, "step": 203340 }, { "epoch": 0.7740002892747577, "grad_norm": 0.12590280175209045, "learning_rate": 0.0005, "loss": 2.0959, "step": 203350 }, { "epoch": 0.7740383517428804, "grad_norm": 0.12155240774154663, "learning_rate": 0.0005, "loss": 2.1077, "step": 203360 }, { "epoch": 0.7740764142110031, "grad_norm": 0.1313089281320572, "learning_rate": 0.0005, "loss": 2.1105, "step": 203370 }, { "epoch": 0.7741144766791258, "grad_norm": 0.12607799470424652, "learning_rate": 0.0005, "loss": 2.1169, "step": 203380 }, { "epoch": 0.7741525391472485, "grad_norm": 0.13865213096141815, "learning_rate": 0.0005, "loss": 2.1109, "step": 203390 }, { "epoch": 0.7741906016153711, "grad_norm": 0.14330661296844482, "learning_rate": 0.0005, "loss": 2.1184, "step": 203400 }, { "epoch": 0.7742286640834938, "grad_norm": 0.11868394911289215, "learning_rate": 0.0005, "loss": 2.1111, "step": 203410 }, { "epoch": 0.7742667265516165, "grad_norm": 0.1340259611606598, "learning_rate": 0.0005, "loss": 2.1121, "step": 203420 }, { "epoch": 0.7743047890197392, "grad_norm": 0.12151813507080078, "learning_rate": 0.0005, "loss": 2.1159, "step": 203430 }, { "epoch": 0.7743428514878619, "grad_norm": 0.11983367055654526, "learning_rate": 0.0005, "loss": 2.104, "step": 203440 }, { "epoch": 0.7743809139559845, "grad_norm": 0.12431453168392181, "learning_rate": 0.0005, "loss": 2.1062, "step": 203450 }, { "epoch": 0.7744189764241073, "grad_norm": 0.11596907675266266, "learning_rate": 0.0005, "loss": 2.1152, "step": 203460 }, { "epoch": 0.7744570388922299, "grad_norm": 0.1255701184272766, "learning_rate": 0.0005, "loss": 2.1174, "step": 203470 }, { "epoch": 0.7744951013603526, "grad_norm": 0.126678466796875, "learning_rate": 0.0005, "loss": 2.0857, "step": 203480 }, { "epoch": 0.7745331638284753, "grad_norm": 0.12204180657863617, "learning_rate": 0.0005, "loss": 2.0935, "step": 203490 }, { "epoch": 0.774571226296598, "grad_norm": 0.13281655311584473, "learning_rate": 0.0005, "loss": 2.0969, "step": 203500 }, { "epoch": 0.7746092887647207, "grad_norm": 0.11845386028289795, "learning_rate": 0.0005, "loss": 2.1167, "step": 203510 }, { "epoch": 0.7746473512328433, "grad_norm": 0.1315074861049652, "learning_rate": 0.0005, "loss": 2.0944, "step": 203520 }, { "epoch": 0.774685413700966, "grad_norm": 0.12224984914064407, "learning_rate": 0.0005, "loss": 2.1125, "step": 203530 }, { "epoch": 0.7747234761690887, "grad_norm": 0.11303869634866714, "learning_rate": 0.0005, "loss": 2.0993, "step": 203540 }, { "epoch": 0.7747615386372114, "grad_norm": 0.12494504451751709, "learning_rate": 0.0005, "loss": 2.112, "step": 203550 }, { "epoch": 0.7747996011053341, "grad_norm": 0.1141730546951294, "learning_rate": 0.0005, "loss": 2.0917, "step": 203560 }, { "epoch": 0.7748376635734567, "grad_norm": 0.11735429614782333, "learning_rate": 0.0005, "loss": 2.1014, "step": 203570 }, { "epoch": 0.7748757260415794, "grad_norm": 0.12974528968334198, "learning_rate": 0.0005, "loss": 2.1036, "step": 203580 }, { "epoch": 0.7749137885097022, "grad_norm": 0.12256599962711334, "learning_rate": 0.0005, "loss": 2.1101, "step": 203590 }, { "epoch": 0.7749518509778248, "grad_norm": 0.12738636136054993, "learning_rate": 0.0005, "loss": 2.1064, "step": 203600 }, { "epoch": 0.7749899134459475, "grad_norm": 0.11851944774389267, "learning_rate": 0.0005, "loss": 2.1161, "step": 203610 }, { "epoch": 0.7750279759140701, "grad_norm": 0.12773127853870392, "learning_rate": 0.0005, "loss": 2.0903, "step": 203620 }, { "epoch": 0.7750660383821929, "grad_norm": 0.1170206144452095, "learning_rate": 0.0005, "loss": 2.1061, "step": 203630 }, { "epoch": 0.7751041008503156, "grad_norm": 0.1569705307483673, "learning_rate": 0.0005, "loss": 2.1018, "step": 203640 }, { "epoch": 0.7751421633184382, "grad_norm": 0.1502056121826172, "learning_rate": 0.0005, "loss": 2.1011, "step": 203650 }, { "epoch": 0.7751802257865609, "grad_norm": 0.138113334774971, "learning_rate": 0.0005, "loss": 2.1031, "step": 203660 }, { "epoch": 0.7752182882546836, "grad_norm": 0.12981128692626953, "learning_rate": 0.0005, "loss": 2.1193, "step": 203670 }, { "epoch": 0.7752563507228063, "grad_norm": 0.13203611969947815, "learning_rate": 0.0005, "loss": 2.0993, "step": 203680 }, { "epoch": 0.775294413190929, "grad_norm": 0.1312369853258133, "learning_rate": 0.0005, "loss": 2.1147, "step": 203690 }, { "epoch": 0.7753324756590516, "grad_norm": 0.137653648853302, "learning_rate": 0.0005, "loss": 2.1067, "step": 203700 }, { "epoch": 0.7753705381271743, "grad_norm": 0.13897337019443512, "learning_rate": 0.0005, "loss": 2.1111, "step": 203710 }, { "epoch": 0.775408600595297, "grad_norm": 0.11673405766487122, "learning_rate": 0.0005, "loss": 2.1165, "step": 203720 }, { "epoch": 0.7754466630634197, "grad_norm": 0.12671113014221191, "learning_rate": 0.0005, "loss": 2.125, "step": 203730 }, { "epoch": 0.7754847255315424, "grad_norm": 0.11186876893043518, "learning_rate": 0.0005, "loss": 2.1013, "step": 203740 }, { "epoch": 0.775522787999665, "grad_norm": 0.11881640553474426, "learning_rate": 0.0005, "loss": 2.0873, "step": 203750 }, { "epoch": 0.7755608504677878, "grad_norm": 0.1171368807554245, "learning_rate": 0.0005, "loss": 2.1091, "step": 203760 }, { "epoch": 0.7755989129359104, "grad_norm": 0.11364400386810303, "learning_rate": 0.0005, "loss": 2.1064, "step": 203770 }, { "epoch": 0.7756369754040331, "grad_norm": 0.1332196146249771, "learning_rate": 0.0005, "loss": 2.1062, "step": 203780 }, { "epoch": 0.7756750378721557, "grad_norm": 0.12130193412303925, "learning_rate": 0.0005, "loss": 2.1009, "step": 203790 }, { "epoch": 0.7757131003402785, "grad_norm": 0.12022104114294052, "learning_rate": 0.0005, "loss": 2.108, "step": 203800 }, { "epoch": 0.7757511628084012, "grad_norm": 0.12510570883750916, "learning_rate": 0.0005, "loss": 2.1003, "step": 203810 }, { "epoch": 0.7757892252765238, "grad_norm": 0.1242448166012764, "learning_rate": 0.0005, "loss": 2.1093, "step": 203820 }, { "epoch": 0.7758272877446465, "grad_norm": 0.12493916600942612, "learning_rate": 0.0005, "loss": 2.0886, "step": 203830 }, { "epoch": 0.7758653502127691, "grad_norm": 0.1297662854194641, "learning_rate": 0.0005, "loss": 2.1143, "step": 203840 }, { "epoch": 0.7759034126808919, "grad_norm": 0.13508228957653046, "learning_rate": 0.0005, "loss": 2.0833, "step": 203850 }, { "epoch": 0.7759414751490146, "grad_norm": 0.13191717863082886, "learning_rate": 0.0005, "loss": 2.0949, "step": 203860 }, { "epoch": 0.7759795376171372, "grad_norm": 0.13562016189098358, "learning_rate": 0.0005, "loss": 2.1019, "step": 203870 }, { "epoch": 0.7760176000852599, "grad_norm": 0.1370725929737091, "learning_rate": 0.0005, "loss": 2.1064, "step": 203880 }, { "epoch": 0.7760556625533827, "grad_norm": 0.1385413259267807, "learning_rate": 0.0005, "loss": 2.1088, "step": 203890 }, { "epoch": 0.7760937250215053, "grad_norm": 0.12840357422828674, "learning_rate": 0.0005, "loss": 2.0966, "step": 203900 }, { "epoch": 0.776131787489628, "grad_norm": 0.13046427071094513, "learning_rate": 0.0005, "loss": 2.1018, "step": 203910 }, { "epoch": 0.7761698499577506, "grad_norm": 0.11721883714199066, "learning_rate": 0.0005, "loss": 2.1141, "step": 203920 }, { "epoch": 0.7762079124258734, "grad_norm": 0.18598304688930511, "learning_rate": 0.0005, "loss": 2.1031, "step": 203930 }, { "epoch": 0.776245974893996, "grad_norm": 0.12292864173650742, "learning_rate": 0.0005, "loss": 2.1138, "step": 203940 }, { "epoch": 0.7762840373621187, "grad_norm": 0.13777364790439606, "learning_rate": 0.0005, "loss": 2.1086, "step": 203950 }, { "epoch": 0.7763220998302414, "grad_norm": 0.1157660037279129, "learning_rate": 0.0005, "loss": 2.1084, "step": 203960 }, { "epoch": 0.776360162298364, "grad_norm": 0.13139058649539948, "learning_rate": 0.0005, "loss": 2.1039, "step": 203970 }, { "epoch": 0.7763982247664868, "grad_norm": 0.1354646384716034, "learning_rate": 0.0005, "loss": 2.0906, "step": 203980 }, { "epoch": 0.7764362872346094, "grad_norm": 0.13240858912467957, "learning_rate": 0.0005, "loss": 2.1156, "step": 203990 }, { "epoch": 0.7764743497027321, "grad_norm": 0.1313706338405609, "learning_rate": 0.0005, "loss": 2.1111, "step": 204000 }, { "epoch": 0.7765124121708548, "grad_norm": 0.11614730954170227, "learning_rate": 0.0005, "loss": 2.096, "step": 204010 }, { "epoch": 0.7765504746389775, "grad_norm": 0.11456860601902008, "learning_rate": 0.0005, "loss": 2.1211, "step": 204020 }, { "epoch": 0.7765885371071002, "grad_norm": 0.1360856592655182, "learning_rate": 0.0005, "loss": 2.094, "step": 204030 }, { "epoch": 0.7766265995752228, "grad_norm": 0.1354278028011322, "learning_rate": 0.0005, "loss": 2.119, "step": 204040 }, { "epoch": 0.7766646620433455, "grad_norm": 0.11774899065494537, "learning_rate": 0.0005, "loss": 2.0985, "step": 204050 }, { "epoch": 0.7767027245114683, "grad_norm": 0.14782720804214478, "learning_rate": 0.0005, "loss": 2.1129, "step": 204060 }, { "epoch": 0.7767407869795909, "grad_norm": 0.1334516555070877, "learning_rate": 0.0005, "loss": 2.1096, "step": 204070 }, { "epoch": 0.7767788494477136, "grad_norm": 0.13219749927520752, "learning_rate": 0.0005, "loss": 2.1232, "step": 204080 }, { "epoch": 0.7768169119158362, "grad_norm": 0.12305323034524918, "learning_rate": 0.0005, "loss": 2.0999, "step": 204090 }, { "epoch": 0.776854974383959, "grad_norm": 0.1264452338218689, "learning_rate": 0.0005, "loss": 2.1098, "step": 204100 }, { "epoch": 0.7768930368520817, "grad_norm": 0.13045167922973633, "learning_rate": 0.0005, "loss": 2.1, "step": 204110 }, { "epoch": 0.7769310993202043, "grad_norm": 0.1378946453332901, "learning_rate": 0.0005, "loss": 2.1031, "step": 204120 }, { "epoch": 0.776969161788327, "grad_norm": 0.14290933310985565, "learning_rate": 0.0005, "loss": 2.1221, "step": 204130 }, { "epoch": 0.7770072242564496, "grad_norm": 0.1294529139995575, "learning_rate": 0.0005, "loss": 2.1161, "step": 204140 }, { "epoch": 0.7770452867245724, "grad_norm": 0.11729662865400314, "learning_rate": 0.0005, "loss": 2.1155, "step": 204150 }, { "epoch": 0.7770833491926951, "grad_norm": 0.13432623445987701, "learning_rate": 0.0005, "loss": 2.1106, "step": 204160 }, { "epoch": 0.7771214116608177, "grad_norm": 0.11898574978113174, "learning_rate": 0.0005, "loss": 2.1039, "step": 204170 }, { "epoch": 0.7771594741289404, "grad_norm": 0.12093368917703629, "learning_rate": 0.0005, "loss": 2.1174, "step": 204180 }, { "epoch": 0.7771975365970631, "grad_norm": 0.1334381401538849, "learning_rate": 0.0005, "loss": 2.0988, "step": 204190 }, { "epoch": 0.7772355990651858, "grad_norm": 0.12980128824710846, "learning_rate": 0.0005, "loss": 2.0964, "step": 204200 }, { "epoch": 0.7772736615333085, "grad_norm": 0.1222313717007637, "learning_rate": 0.0005, "loss": 2.0976, "step": 204210 }, { "epoch": 0.7773117240014311, "grad_norm": 0.13379375636577606, "learning_rate": 0.0005, "loss": 2.1176, "step": 204220 }, { "epoch": 0.7773497864695539, "grad_norm": 0.12348196655511856, "learning_rate": 0.0005, "loss": 2.1005, "step": 204230 }, { "epoch": 0.7773878489376765, "grad_norm": 0.1255643665790558, "learning_rate": 0.0005, "loss": 2.1, "step": 204240 }, { "epoch": 0.7774259114057992, "grad_norm": 0.1284782737493515, "learning_rate": 0.0005, "loss": 2.105, "step": 204250 }, { "epoch": 0.7774639738739219, "grad_norm": 0.12363579124212265, "learning_rate": 0.0005, "loss": 2.1113, "step": 204260 }, { "epoch": 0.7775020363420445, "grad_norm": 0.11654678732156754, "learning_rate": 0.0005, "loss": 2.1036, "step": 204270 }, { "epoch": 0.7775400988101673, "grad_norm": 0.12405503541231155, "learning_rate": 0.0005, "loss": 2.113, "step": 204280 }, { "epoch": 0.7775781612782899, "grad_norm": 0.121688112616539, "learning_rate": 0.0005, "loss": 2.1037, "step": 204290 }, { "epoch": 0.7776162237464126, "grad_norm": 0.12813690304756165, "learning_rate": 0.0005, "loss": 2.1245, "step": 204300 }, { "epoch": 0.7776542862145353, "grad_norm": 0.12217184901237488, "learning_rate": 0.0005, "loss": 2.1177, "step": 204310 }, { "epoch": 0.777692348682658, "grad_norm": 0.12200063467025757, "learning_rate": 0.0005, "loss": 2.1009, "step": 204320 }, { "epoch": 0.7777304111507807, "grad_norm": 0.12824185192584991, "learning_rate": 0.0005, "loss": 2.095, "step": 204330 }, { "epoch": 0.7777684736189033, "grad_norm": 0.131599560379982, "learning_rate": 0.0005, "loss": 2.1216, "step": 204340 }, { "epoch": 0.777806536087026, "grad_norm": 0.12848694622516632, "learning_rate": 0.0005, "loss": 2.0989, "step": 204350 }, { "epoch": 0.7778445985551488, "grad_norm": 0.13016167283058167, "learning_rate": 0.0005, "loss": 2.1165, "step": 204360 }, { "epoch": 0.7778826610232714, "grad_norm": 0.12172859907150269, "learning_rate": 0.0005, "loss": 2.1154, "step": 204370 }, { "epoch": 0.7779207234913941, "grad_norm": 0.1151774451136589, "learning_rate": 0.0005, "loss": 2.1073, "step": 204380 }, { "epoch": 0.7779587859595167, "grad_norm": 0.1309356987476349, "learning_rate": 0.0005, "loss": 2.1162, "step": 204390 }, { "epoch": 0.7779968484276394, "grad_norm": 0.1296638697385788, "learning_rate": 0.0005, "loss": 2.1122, "step": 204400 }, { "epoch": 0.7780349108957622, "grad_norm": 0.11932416260242462, "learning_rate": 0.0005, "loss": 2.0981, "step": 204410 }, { "epoch": 0.7780729733638848, "grad_norm": 0.12049002200365067, "learning_rate": 0.0005, "loss": 2.1042, "step": 204420 }, { "epoch": 0.7781110358320075, "grad_norm": 0.1300167441368103, "learning_rate": 0.0005, "loss": 2.1002, "step": 204430 }, { "epoch": 0.7781490983001301, "grad_norm": 0.12067482620477676, "learning_rate": 0.0005, "loss": 2.1151, "step": 204440 }, { "epoch": 0.7781871607682529, "grad_norm": 0.12820187211036682, "learning_rate": 0.0005, "loss": 2.102, "step": 204450 }, { "epoch": 0.7782252232363756, "grad_norm": 0.1288333237171173, "learning_rate": 0.0005, "loss": 2.0919, "step": 204460 }, { "epoch": 0.7782632857044982, "grad_norm": 0.1281125694513321, "learning_rate": 0.0005, "loss": 2.1084, "step": 204470 }, { "epoch": 0.7783013481726209, "grad_norm": 0.12810519337654114, "learning_rate": 0.0005, "loss": 2.1116, "step": 204480 }, { "epoch": 0.7783394106407436, "grad_norm": 0.11424490809440613, "learning_rate": 0.0005, "loss": 2.1078, "step": 204490 }, { "epoch": 0.7783774731088663, "grad_norm": 0.12385211139917374, "learning_rate": 0.0005, "loss": 2.1037, "step": 204500 }, { "epoch": 0.778415535576989, "grad_norm": 0.13899962604045868, "learning_rate": 0.0005, "loss": 2.1092, "step": 204510 }, { "epoch": 0.7784535980451116, "grad_norm": 0.13048166036605835, "learning_rate": 0.0005, "loss": 2.0961, "step": 204520 }, { "epoch": 0.7784916605132344, "grad_norm": 0.12829838693141937, "learning_rate": 0.0005, "loss": 2.0889, "step": 204530 }, { "epoch": 0.778529722981357, "grad_norm": 0.11860474944114685, "learning_rate": 0.0005, "loss": 2.1158, "step": 204540 }, { "epoch": 0.7785677854494797, "grad_norm": 0.12046080082654953, "learning_rate": 0.0005, "loss": 2.11, "step": 204550 }, { "epoch": 0.7786058479176023, "grad_norm": 0.11780071258544922, "learning_rate": 0.0005, "loss": 2.1007, "step": 204560 }, { "epoch": 0.778643910385725, "grad_norm": 0.1262700855731964, "learning_rate": 0.0005, "loss": 2.1383, "step": 204570 }, { "epoch": 0.7786819728538478, "grad_norm": 0.14198298752307892, "learning_rate": 0.0005, "loss": 2.1055, "step": 204580 }, { "epoch": 0.7787200353219704, "grad_norm": 0.13928957283496857, "learning_rate": 0.0005, "loss": 2.1085, "step": 204590 }, { "epoch": 0.7787580977900931, "grad_norm": 0.12705102562904358, "learning_rate": 0.0005, "loss": 2.1069, "step": 204600 }, { "epoch": 0.7787961602582157, "grad_norm": 0.1210823580622673, "learning_rate": 0.0005, "loss": 2.1038, "step": 204610 }, { "epoch": 0.7788342227263385, "grad_norm": 0.12910155951976776, "learning_rate": 0.0005, "loss": 2.1093, "step": 204620 }, { "epoch": 0.7788722851944612, "grad_norm": 0.13223743438720703, "learning_rate": 0.0005, "loss": 2.1137, "step": 204630 }, { "epoch": 0.7789103476625838, "grad_norm": 0.1313503235578537, "learning_rate": 0.0005, "loss": 2.1063, "step": 204640 }, { "epoch": 0.7789484101307065, "grad_norm": 0.13831321895122528, "learning_rate": 0.0005, "loss": 2.1119, "step": 204650 }, { "epoch": 0.7789864725988292, "grad_norm": 0.12389397621154785, "learning_rate": 0.0005, "loss": 2.1098, "step": 204660 }, { "epoch": 0.7790245350669519, "grad_norm": 0.12389600276947021, "learning_rate": 0.0005, "loss": 2.1049, "step": 204670 }, { "epoch": 0.7790625975350746, "grad_norm": 0.12264768034219742, "learning_rate": 0.0005, "loss": 2.1157, "step": 204680 }, { "epoch": 0.7791006600031972, "grad_norm": 0.1387430876493454, "learning_rate": 0.0005, "loss": 2.1243, "step": 204690 }, { "epoch": 0.7791387224713199, "grad_norm": 0.129319965839386, "learning_rate": 0.0005, "loss": 2.1134, "step": 204700 }, { "epoch": 0.7791767849394426, "grad_norm": 0.12320511043071747, "learning_rate": 0.0005, "loss": 2.1163, "step": 204710 }, { "epoch": 0.7792148474075653, "grad_norm": 0.11558236181735992, "learning_rate": 0.0005, "loss": 2.0915, "step": 204720 }, { "epoch": 0.779252909875688, "grad_norm": 0.12244782596826553, "learning_rate": 0.0005, "loss": 2.1156, "step": 204730 }, { "epoch": 0.7792909723438106, "grad_norm": 0.13204488158226013, "learning_rate": 0.0005, "loss": 2.1138, "step": 204740 }, { "epoch": 0.7793290348119334, "grad_norm": 0.1383647918701172, "learning_rate": 0.0005, "loss": 2.0981, "step": 204750 }, { "epoch": 0.779367097280056, "grad_norm": 0.11390195786952972, "learning_rate": 0.0005, "loss": 2.1185, "step": 204760 }, { "epoch": 0.7794051597481787, "grad_norm": 0.10992393642663956, "learning_rate": 0.0005, "loss": 2.0976, "step": 204770 }, { "epoch": 0.7794432222163014, "grad_norm": 0.12079618126153946, "learning_rate": 0.0005, "loss": 2.1002, "step": 204780 }, { "epoch": 0.7794812846844241, "grad_norm": 0.13436715304851532, "learning_rate": 0.0005, "loss": 2.1007, "step": 204790 }, { "epoch": 0.7795193471525468, "grad_norm": 0.12580615282058716, "learning_rate": 0.0005, "loss": 2.0882, "step": 204800 }, { "epoch": 0.7795574096206694, "grad_norm": 0.12018632143735886, "learning_rate": 0.0005, "loss": 2.1041, "step": 204810 }, { "epoch": 0.7795954720887921, "grad_norm": 0.1208050474524498, "learning_rate": 0.0005, "loss": 2.089, "step": 204820 }, { "epoch": 0.7796335345569148, "grad_norm": 0.12312716245651245, "learning_rate": 0.0005, "loss": 2.0985, "step": 204830 }, { "epoch": 0.7796715970250375, "grad_norm": 0.14190800487995148, "learning_rate": 0.0005, "loss": 2.1007, "step": 204840 }, { "epoch": 0.7797096594931602, "grad_norm": 0.14695966243743896, "learning_rate": 0.0005, "loss": 2.0925, "step": 204850 }, { "epoch": 0.7797477219612828, "grad_norm": 0.12959155440330505, "learning_rate": 0.0005, "loss": 2.0896, "step": 204860 }, { "epoch": 0.7797857844294055, "grad_norm": 0.12169928848743439, "learning_rate": 0.0005, "loss": 2.1201, "step": 204870 }, { "epoch": 0.7798238468975283, "grad_norm": 0.13090549409389496, "learning_rate": 0.0005, "loss": 2.1039, "step": 204880 }, { "epoch": 0.7798619093656509, "grad_norm": 0.12255292385816574, "learning_rate": 0.0005, "loss": 2.1095, "step": 204890 }, { "epoch": 0.7798999718337736, "grad_norm": 0.1383610963821411, "learning_rate": 0.0005, "loss": 2.1088, "step": 204900 }, { "epoch": 0.7799380343018962, "grad_norm": 0.12175476551055908, "learning_rate": 0.0005, "loss": 2.0998, "step": 204910 }, { "epoch": 0.779976096770019, "grad_norm": 0.1260368973016739, "learning_rate": 0.0005, "loss": 2.1006, "step": 204920 }, { "epoch": 0.7800141592381417, "grad_norm": 0.12578240036964417, "learning_rate": 0.0005, "loss": 2.0863, "step": 204930 }, { "epoch": 0.7800522217062643, "grad_norm": 0.1140437051653862, "learning_rate": 0.0005, "loss": 2.1075, "step": 204940 }, { "epoch": 0.780090284174387, "grad_norm": 0.13163577020168304, "learning_rate": 0.0005, "loss": 2.1092, "step": 204950 }, { "epoch": 0.7801283466425097, "grad_norm": 0.11661121249198914, "learning_rate": 0.0005, "loss": 2.1179, "step": 204960 }, { "epoch": 0.7801664091106324, "grad_norm": 0.12443295121192932, "learning_rate": 0.0005, "loss": 2.1064, "step": 204970 }, { "epoch": 0.780204471578755, "grad_norm": 0.13133709132671356, "learning_rate": 0.0005, "loss": 2.106, "step": 204980 }, { "epoch": 0.7802425340468777, "grad_norm": 0.12514156103134155, "learning_rate": 0.0005, "loss": 2.1135, "step": 204990 }, { "epoch": 0.7802805965150004, "grad_norm": 0.13035859167575836, "learning_rate": 0.0005, "loss": 2.1084, "step": 205000 }, { "epoch": 0.7803186589831231, "grad_norm": 0.12508799135684967, "learning_rate": 0.0005, "loss": 2.1023, "step": 205010 }, { "epoch": 0.7803567214512458, "grad_norm": 0.12291455268859863, "learning_rate": 0.0005, "loss": 2.1123, "step": 205020 }, { "epoch": 0.7803947839193685, "grad_norm": 0.12133867293596268, "learning_rate": 0.0005, "loss": 2.1245, "step": 205030 }, { "epoch": 0.7804328463874911, "grad_norm": 0.11362049728631973, "learning_rate": 0.0005, "loss": 2.102, "step": 205040 }, { "epoch": 0.7804709088556139, "grad_norm": 0.132710263133049, "learning_rate": 0.0005, "loss": 2.1191, "step": 205050 }, { "epoch": 0.7805089713237365, "grad_norm": 0.1385025829076767, "learning_rate": 0.0005, "loss": 2.1142, "step": 205060 }, { "epoch": 0.7805470337918592, "grad_norm": 0.13487720489501953, "learning_rate": 0.0005, "loss": 2.1216, "step": 205070 }, { "epoch": 0.7805850962599818, "grad_norm": 0.11675132066011429, "learning_rate": 0.0005, "loss": 2.1149, "step": 205080 }, { "epoch": 0.7806231587281046, "grad_norm": 0.1177266389131546, "learning_rate": 0.0005, "loss": 2.1143, "step": 205090 }, { "epoch": 0.7806612211962273, "grad_norm": 0.12549777328968048, "learning_rate": 0.0005, "loss": 2.1081, "step": 205100 }, { "epoch": 0.7806992836643499, "grad_norm": 0.12713997066020966, "learning_rate": 0.0005, "loss": 2.1087, "step": 205110 }, { "epoch": 0.7807373461324726, "grad_norm": 0.1339418888092041, "learning_rate": 0.0005, "loss": 2.122, "step": 205120 }, { "epoch": 0.7807754086005952, "grad_norm": 0.12972566485404968, "learning_rate": 0.0005, "loss": 2.0958, "step": 205130 }, { "epoch": 0.780813471068718, "grad_norm": 0.12319502234458923, "learning_rate": 0.0005, "loss": 2.0862, "step": 205140 }, { "epoch": 0.7808515335368407, "grad_norm": 0.11615166813135147, "learning_rate": 0.0005, "loss": 2.1114, "step": 205150 }, { "epoch": 0.7808895960049633, "grad_norm": 0.11988827586174011, "learning_rate": 0.0005, "loss": 2.1261, "step": 205160 }, { "epoch": 0.780927658473086, "grad_norm": 0.1277739554643631, "learning_rate": 0.0005, "loss": 2.1102, "step": 205170 }, { "epoch": 0.7809657209412088, "grad_norm": 0.12377830594778061, "learning_rate": 0.0005, "loss": 2.1053, "step": 205180 }, { "epoch": 0.7810037834093314, "grad_norm": 0.11800064146518707, "learning_rate": 0.0005, "loss": 2.1158, "step": 205190 }, { "epoch": 0.7810418458774541, "grad_norm": 0.12103444337844849, "learning_rate": 0.0005, "loss": 2.1099, "step": 205200 }, { "epoch": 0.7810799083455767, "grad_norm": 0.13712644577026367, "learning_rate": 0.0005, "loss": 2.1046, "step": 205210 }, { "epoch": 0.7811179708136995, "grad_norm": 0.1243886649608612, "learning_rate": 0.0005, "loss": 2.1165, "step": 205220 }, { "epoch": 0.7811560332818221, "grad_norm": 0.12707261741161346, "learning_rate": 0.0005, "loss": 2.1102, "step": 205230 }, { "epoch": 0.7811940957499448, "grad_norm": 0.1293025016784668, "learning_rate": 0.0005, "loss": 2.1073, "step": 205240 }, { "epoch": 0.7812321582180675, "grad_norm": 0.11886018514633179, "learning_rate": 0.0005, "loss": 2.113, "step": 205250 }, { "epoch": 0.7812702206861901, "grad_norm": 0.1304692178964615, "learning_rate": 0.0005, "loss": 2.1145, "step": 205260 }, { "epoch": 0.7813082831543129, "grad_norm": 0.13967598974704742, "learning_rate": 0.0005, "loss": 2.1097, "step": 205270 }, { "epoch": 0.7813463456224355, "grad_norm": 0.14922238886356354, "learning_rate": 0.0005, "loss": 2.1159, "step": 205280 }, { "epoch": 0.7813844080905582, "grad_norm": 0.15449193120002747, "learning_rate": 0.0005, "loss": 2.1173, "step": 205290 }, { "epoch": 0.7814224705586809, "grad_norm": 0.13307182490825653, "learning_rate": 0.0005, "loss": 2.0898, "step": 205300 }, { "epoch": 0.7814605330268036, "grad_norm": 0.1463232934474945, "learning_rate": 0.0005, "loss": 2.1101, "step": 205310 }, { "epoch": 0.7814985954949263, "grad_norm": 0.12033987790346146, "learning_rate": 0.0005, "loss": 2.1162, "step": 205320 }, { "epoch": 0.7815366579630489, "grad_norm": 0.12558874487876892, "learning_rate": 0.0005, "loss": 2.0963, "step": 205330 }, { "epoch": 0.7815747204311716, "grad_norm": 0.1404847800731659, "learning_rate": 0.0005, "loss": 2.1007, "step": 205340 }, { "epoch": 0.7816127828992944, "grad_norm": 0.14405734837055206, "learning_rate": 0.0005, "loss": 2.1066, "step": 205350 }, { "epoch": 0.781650845367417, "grad_norm": 0.11858896166086197, "learning_rate": 0.0005, "loss": 2.1158, "step": 205360 }, { "epoch": 0.7816889078355397, "grad_norm": 0.12527897953987122, "learning_rate": 0.0005, "loss": 2.1011, "step": 205370 }, { "epoch": 0.7817269703036623, "grad_norm": 0.14698106050491333, "learning_rate": 0.0005, "loss": 2.0952, "step": 205380 }, { "epoch": 0.7817650327717851, "grad_norm": 0.13634824752807617, "learning_rate": 0.0005, "loss": 2.1003, "step": 205390 }, { "epoch": 0.7818030952399078, "grad_norm": 0.11414165049791336, "learning_rate": 0.0005, "loss": 2.1065, "step": 205400 }, { "epoch": 0.7818411577080304, "grad_norm": 0.13669812679290771, "learning_rate": 0.0005, "loss": 2.1015, "step": 205410 }, { "epoch": 0.7818792201761531, "grad_norm": 0.1324278861284256, "learning_rate": 0.0005, "loss": 2.0997, "step": 205420 }, { "epoch": 0.7819172826442757, "grad_norm": 0.13586843013763428, "learning_rate": 0.0005, "loss": 2.1058, "step": 205430 }, { "epoch": 0.7819553451123985, "grad_norm": 0.12450312823057175, "learning_rate": 0.0005, "loss": 2.0972, "step": 205440 }, { "epoch": 0.7819934075805212, "grad_norm": 0.12089171260595322, "learning_rate": 0.0005, "loss": 2.118, "step": 205450 }, { "epoch": 0.7820314700486438, "grad_norm": 0.11433006823062897, "learning_rate": 0.0005, "loss": 2.1157, "step": 205460 }, { "epoch": 0.7820695325167665, "grad_norm": 0.12635235488414764, "learning_rate": 0.0005, "loss": 2.0977, "step": 205470 }, { "epoch": 0.7821075949848892, "grad_norm": 0.12928998470306396, "learning_rate": 0.0005, "loss": 2.0966, "step": 205480 }, { "epoch": 0.7821456574530119, "grad_norm": 0.116602323949337, "learning_rate": 0.0005, "loss": 2.1029, "step": 205490 }, { "epoch": 0.7821837199211346, "grad_norm": 0.1305515021085739, "learning_rate": 0.0005, "loss": 2.1098, "step": 205500 }, { "epoch": 0.7822217823892572, "grad_norm": 0.12704770267009735, "learning_rate": 0.0005, "loss": 2.1036, "step": 205510 }, { "epoch": 0.78225984485738, "grad_norm": 0.1347886472940445, "learning_rate": 0.0005, "loss": 2.1005, "step": 205520 }, { "epoch": 0.7822979073255026, "grad_norm": 0.12773995101451874, "learning_rate": 0.0005, "loss": 2.1114, "step": 205530 }, { "epoch": 0.7823359697936253, "grad_norm": 0.12586063146591187, "learning_rate": 0.0005, "loss": 2.1187, "step": 205540 }, { "epoch": 0.782374032261748, "grad_norm": 0.12715336680412292, "learning_rate": 0.0005, "loss": 2.0949, "step": 205550 }, { "epoch": 0.7824120947298706, "grad_norm": 0.1147814616560936, "learning_rate": 0.0005, "loss": 2.1044, "step": 205560 }, { "epoch": 0.7824501571979934, "grad_norm": 0.1331065148115158, "learning_rate": 0.0005, "loss": 2.0975, "step": 205570 }, { "epoch": 0.782488219666116, "grad_norm": 0.129253551363945, "learning_rate": 0.0005, "loss": 2.1004, "step": 205580 }, { "epoch": 0.7825262821342387, "grad_norm": 0.13816684484481812, "learning_rate": 0.0005, "loss": 2.1155, "step": 205590 }, { "epoch": 0.7825643446023614, "grad_norm": 0.13440492749214172, "learning_rate": 0.0005, "loss": 2.1168, "step": 205600 }, { "epoch": 0.7826024070704841, "grad_norm": 0.1320820450782776, "learning_rate": 0.0005, "loss": 2.0969, "step": 205610 }, { "epoch": 0.7826404695386068, "grad_norm": 0.11831782013177872, "learning_rate": 0.0005, "loss": 2.1082, "step": 205620 }, { "epoch": 0.7826785320067294, "grad_norm": 0.11591319739818573, "learning_rate": 0.0005, "loss": 2.1108, "step": 205630 }, { "epoch": 0.7827165944748521, "grad_norm": 0.11812224239110947, "learning_rate": 0.0005, "loss": 2.1065, "step": 205640 }, { "epoch": 0.7827546569429749, "grad_norm": 0.12301401793956757, "learning_rate": 0.0005, "loss": 2.1015, "step": 205650 }, { "epoch": 0.7827927194110975, "grad_norm": 0.12146145850419998, "learning_rate": 0.0005, "loss": 2.1183, "step": 205660 }, { "epoch": 0.7828307818792202, "grad_norm": 0.13071522116661072, "learning_rate": 0.0005, "loss": 2.117, "step": 205670 }, { "epoch": 0.7828688443473428, "grad_norm": 0.12665431201457977, "learning_rate": 0.0005, "loss": 2.0957, "step": 205680 }, { "epoch": 0.7829069068154656, "grad_norm": 0.12369175255298615, "learning_rate": 0.0005, "loss": 2.1187, "step": 205690 }, { "epoch": 0.7829449692835883, "grad_norm": 0.14192961156368256, "learning_rate": 0.0005, "loss": 2.1105, "step": 205700 }, { "epoch": 0.7829830317517109, "grad_norm": 0.1278933882713318, "learning_rate": 0.0005, "loss": 2.1056, "step": 205710 }, { "epoch": 0.7830210942198336, "grad_norm": 0.12124519050121307, "learning_rate": 0.0005, "loss": 2.0972, "step": 205720 }, { "epoch": 0.7830591566879562, "grad_norm": 0.13264502584934235, "learning_rate": 0.0005, "loss": 2.0932, "step": 205730 }, { "epoch": 0.783097219156079, "grad_norm": 0.1269812136888504, "learning_rate": 0.0005, "loss": 2.0937, "step": 205740 }, { "epoch": 0.7831352816242017, "grad_norm": 0.1288755089044571, "learning_rate": 0.0005, "loss": 2.1071, "step": 205750 }, { "epoch": 0.7831733440923243, "grad_norm": 0.13168203830718994, "learning_rate": 0.0005, "loss": 2.1004, "step": 205760 }, { "epoch": 0.783211406560447, "grad_norm": 0.11975059658288956, "learning_rate": 0.0005, "loss": 2.1008, "step": 205770 }, { "epoch": 0.7832494690285697, "grad_norm": 0.1149614229798317, "learning_rate": 0.0005, "loss": 2.1152, "step": 205780 }, { "epoch": 0.7832875314966924, "grad_norm": 0.11844199150800705, "learning_rate": 0.0005, "loss": 2.1116, "step": 205790 }, { "epoch": 0.783325593964815, "grad_norm": 0.12237623333930969, "learning_rate": 0.0005, "loss": 2.0955, "step": 205800 }, { "epoch": 0.7833636564329377, "grad_norm": 0.12408531457185745, "learning_rate": 0.0005, "loss": 2.1099, "step": 205810 }, { "epoch": 0.7834017189010605, "grad_norm": 0.11463352292776108, "learning_rate": 0.0005, "loss": 2.1304, "step": 205820 }, { "epoch": 0.7834397813691831, "grad_norm": 0.1344767063856125, "learning_rate": 0.0005, "loss": 2.1236, "step": 205830 }, { "epoch": 0.7834778438373058, "grad_norm": 0.14116428792476654, "learning_rate": 0.0005, "loss": 2.1114, "step": 205840 }, { "epoch": 0.7835159063054284, "grad_norm": 0.14916980266571045, "learning_rate": 0.0005, "loss": 2.1218, "step": 205850 }, { "epoch": 0.7835539687735511, "grad_norm": 0.1306719183921814, "learning_rate": 0.0005, "loss": 2.1003, "step": 205860 }, { "epoch": 0.7835920312416739, "grad_norm": 0.13697031140327454, "learning_rate": 0.0005, "loss": 2.1082, "step": 205870 }, { "epoch": 0.7836300937097965, "grad_norm": 0.13301123678684235, "learning_rate": 0.0005, "loss": 2.1206, "step": 205880 }, { "epoch": 0.7836681561779192, "grad_norm": 0.1257372945547104, "learning_rate": 0.0005, "loss": 2.0985, "step": 205890 }, { "epoch": 0.7837062186460418, "grad_norm": 0.138628751039505, "learning_rate": 0.0005, "loss": 2.1107, "step": 205900 }, { "epoch": 0.7837442811141646, "grad_norm": 0.11938808113336563, "learning_rate": 0.0005, "loss": 2.1161, "step": 205910 }, { "epoch": 0.7837823435822873, "grad_norm": 0.1246340274810791, "learning_rate": 0.0005, "loss": 2.1052, "step": 205920 }, { "epoch": 0.7838204060504099, "grad_norm": 0.1270332634449005, "learning_rate": 0.0005, "loss": 2.1131, "step": 205930 }, { "epoch": 0.7838584685185326, "grad_norm": 0.11850014328956604, "learning_rate": 0.0005, "loss": 2.0882, "step": 205940 }, { "epoch": 0.7838965309866553, "grad_norm": 0.1358293741941452, "learning_rate": 0.0005, "loss": 2.1184, "step": 205950 }, { "epoch": 0.783934593454778, "grad_norm": 0.11286400258541107, "learning_rate": 0.0005, "loss": 2.1189, "step": 205960 }, { "epoch": 0.7839726559229007, "grad_norm": 0.11473195999860764, "learning_rate": 0.0005, "loss": 2.1052, "step": 205970 }, { "epoch": 0.7840107183910233, "grad_norm": 0.12464620918035507, "learning_rate": 0.0005, "loss": 2.1031, "step": 205980 }, { "epoch": 0.784048780859146, "grad_norm": 0.128046914935112, "learning_rate": 0.0005, "loss": 2.1177, "step": 205990 }, { "epoch": 0.7840868433272687, "grad_norm": 0.13824665546417236, "learning_rate": 0.0005, "loss": 2.0938, "step": 206000 }, { "epoch": 0.7841249057953914, "grad_norm": 0.12734805047512054, "learning_rate": 0.0005, "loss": 2.1123, "step": 206010 }, { "epoch": 0.7841629682635141, "grad_norm": 0.11570902168750763, "learning_rate": 0.0005, "loss": 2.1077, "step": 206020 }, { "epoch": 0.7842010307316367, "grad_norm": 0.13064268231391907, "learning_rate": 0.0005, "loss": 2.104, "step": 206030 }, { "epoch": 0.7842390931997595, "grad_norm": 0.1355222761631012, "learning_rate": 0.0005, "loss": 2.0937, "step": 206040 }, { "epoch": 0.7842771556678821, "grad_norm": 0.15214493870735168, "learning_rate": 0.0005, "loss": 2.1122, "step": 206050 }, { "epoch": 0.7843152181360048, "grad_norm": 0.12032835930585861, "learning_rate": 0.0005, "loss": 2.094, "step": 206060 }, { "epoch": 0.7843532806041275, "grad_norm": 0.12022040039300919, "learning_rate": 0.0005, "loss": 2.1011, "step": 206070 }, { "epoch": 0.7843913430722502, "grad_norm": 0.12360106408596039, "learning_rate": 0.0005, "loss": 2.1078, "step": 206080 }, { "epoch": 0.7844294055403729, "grad_norm": 0.12494411319494247, "learning_rate": 0.0005, "loss": 2.1002, "step": 206090 }, { "epoch": 0.7844674680084955, "grad_norm": 0.12421322613954544, "learning_rate": 0.0005, "loss": 2.1108, "step": 206100 }, { "epoch": 0.7845055304766182, "grad_norm": 0.12146128714084625, "learning_rate": 0.0005, "loss": 2.0991, "step": 206110 }, { "epoch": 0.784543592944741, "grad_norm": 0.13392406702041626, "learning_rate": 0.0005, "loss": 2.0923, "step": 206120 }, { "epoch": 0.7845816554128636, "grad_norm": 0.1384228765964508, "learning_rate": 0.0005, "loss": 2.1081, "step": 206130 }, { "epoch": 0.7846197178809863, "grad_norm": 0.13139431178569794, "learning_rate": 0.0005, "loss": 2.1104, "step": 206140 }, { "epoch": 0.7846577803491089, "grad_norm": 0.1144791916012764, "learning_rate": 0.0005, "loss": 2.0991, "step": 206150 }, { "epoch": 0.7846958428172316, "grad_norm": 0.14469364285469055, "learning_rate": 0.0005, "loss": 2.1064, "step": 206160 }, { "epoch": 0.7847339052853544, "grad_norm": 0.13301540911197662, "learning_rate": 0.0005, "loss": 2.1041, "step": 206170 }, { "epoch": 0.784771967753477, "grad_norm": 0.1295434534549713, "learning_rate": 0.0005, "loss": 2.1104, "step": 206180 }, { "epoch": 0.7848100302215997, "grad_norm": 0.12937068939208984, "learning_rate": 0.0005, "loss": 2.108, "step": 206190 }, { "epoch": 0.7848480926897223, "grad_norm": 0.12335211783647537, "learning_rate": 0.0005, "loss": 2.1038, "step": 206200 }, { "epoch": 0.7848861551578451, "grad_norm": 0.1225755363702774, "learning_rate": 0.0005, "loss": 2.1006, "step": 206210 }, { "epoch": 0.7849242176259678, "grad_norm": 0.11895408481359482, "learning_rate": 0.0005, "loss": 2.1073, "step": 206220 }, { "epoch": 0.7849622800940904, "grad_norm": 0.12693949043750763, "learning_rate": 0.0005, "loss": 2.1079, "step": 206230 }, { "epoch": 0.7850003425622131, "grad_norm": 0.12901674211025238, "learning_rate": 0.0005, "loss": 2.0974, "step": 206240 }, { "epoch": 0.7850384050303358, "grad_norm": 0.11677834391593933, "learning_rate": 0.0005, "loss": 2.1115, "step": 206250 }, { "epoch": 0.7850764674984585, "grad_norm": 0.12869200110435486, "learning_rate": 0.0005, "loss": 2.1026, "step": 206260 }, { "epoch": 0.7851145299665812, "grad_norm": 0.2570255398750305, "learning_rate": 0.0005, "loss": 2.0962, "step": 206270 }, { "epoch": 0.7851525924347038, "grad_norm": 0.12122949957847595, "learning_rate": 0.0005, "loss": 2.0991, "step": 206280 }, { "epoch": 0.7851906549028265, "grad_norm": 0.11915149539709091, "learning_rate": 0.0005, "loss": 2.1194, "step": 206290 }, { "epoch": 0.7852287173709492, "grad_norm": 0.12245742976665497, "learning_rate": 0.0005, "loss": 2.1078, "step": 206300 }, { "epoch": 0.7852667798390719, "grad_norm": 0.1253976821899414, "learning_rate": 0.0005, "loss": 2.0965, "step": 206310 }, { "epoch": 0.7853048423071946, "grad_norm": 0.12328154593706131, "learning_rate": 0.0005, "loss": 2.0952, "step": 206320 }, { "epoch": 0.7853429047753172, "grad_norm": 0.1260962337255478, "learning_rate": 0.0005, "loss": 2.0922, "step": 206330 }, { "epoch": 0.78538096724344, "grad_norm": 0.12645284831523895, "learning_rate": 0.0005, "loss": 2.1049, "step": 206340 }, { "epoch": 0.7854190297115626, "grad_norm": 0.1292407214641571, "learning_rate": 0.0005, "loss": 2.1132, "step": 206350 }, { "epoch": 0.7854570921796853, "grad_norm": 0.12239421904087067, "learning_rate": 0.0005, "loss": 2.1071, "step": 206360 }, { "epoch": 0.785495154647808, "grad_norm": 0.1213301345705986, "learning_rate": 0.0005, "loss": 2.1028, "step": 206370 }, { "epoch": 0.7855332171159307, "grad_norm": 0.12082770466804504, "learning_rate": 0.0005, "loss": 2.114, "step": 206380 }, { "epoch": 0.7855712795840534, "grad_norm": 0.12510155141353607, "learning_rate": 0.0005, "loss": 2.117, "step": 206390 }, { "epoch": 0.785609342052176, "grad_norm": 0.1296463906764984, "learning_rate": 0.0005, "loss": 2.1201, "step": 206400 }, { "epoch": 0.7856474045202987, "grad_norm": 0.11684957891702652, "learning_rate": 0.0005, "loss": 2.1084, "step": 206410 }, { "epoch": 0.7856854669884213, "grad_norm": 0.121466264128685, "learning_rate": 0.0005, "loss": 2.1088, "step": 206420 }, { "epoch": 0.7857235294565441, "grad_norm": 0.11626929044723511, "learning_rate": 0.0005, "loss": 2.1176, "step": 206430 }, { "epoch": 0.7857615919246668, "grad_norm": 0.12083613127470016, "learning_rate": 0.0005, "loss": 2.0951, "step": 206440 }, { "epoch": 0.7857996543927894, "grad_norm": 0.1161016970872879, "learning_rate": 0.0005, "loss": 2.0856, "step": 206450 }, { "epoch": 0.7858377168609121, "grad_norm": 0.12474708259105682, "learning_rate": 0.0005, "loss": 2.102, "step": 206460 }, { "epoch": 0.7858757793290349, "grad_norm": 0.12049251049757004, "learning_rate": 0.0005, "loss": 2.103, "step": 206470 }, { "epoch": 0.7859138417971575, "grad_norm": 0.12773816287517548, "learning_rate": 0.0005, "loss": 2.1135, "step": 206480 }, { "epoch": 0.7859519042652802, "grad_norm": 0.12669214606285095, "learning_rate": 0.0005, "loss": 2.1111, "step": 206490 }, { "epoch": 0.7859899667334028, "grad_norm": 0.12713594734668732, "learning_rate": 0.0005, "loss": 2.1037, "step": 206500 }, { "epoch": 0.7860280292015256, "grad_norm": 0.1267489492893219, "learning_rate": 0.0005, "loss": 2.1135, "step": 206510 }, { "epoch": 0.7860660916696482, "grad_norm": 0.12541113793849945, "learning_rate": 0.0005, "loss": 2.1107, "step": 206520 }, { "epoch": 0.7861041541377709, "grad_norm": 0.12049649655818939, "learning_rate": 0.0005, "loss": 2.1116, "step": 206530 }, { "epoch": 0.7861422166058936, "grad_norm": 0.12635451555252075, "learning_rate": 0.0005, "loss": 2.1043, "step": 206540 }, { "epoch": 0.7861802790740163, "grad_norm": 0.11797259002923965, "learning_rate": 0.0005, "loss": 2.1133, "step": 206550 }, { "epoch": 0.786218341542139, "grad_norm": 0.12539760768413544, "learning_rate": 0.0005, "loss": 2.1085, "step": 206560 }, { "epoch": 0.7862564040102616, "grad_norm": 0.12407641857862473, "learning_rate": 0.0005, "loss": 2.1096, "step": 206570 }, { "epoch": 0.7862944664783843, "grad_norm": 0.12364064157009125, "learning_rate": 0.0005, "loss": 2.1078, "step": 206580 }, { "epoch": 0.786332528946507, "grad_norm": 0.13597682118415833, "learning_rate": 0.0005, "loss": 2.1061, "step": 206590 }, { "epoch": 0.7863705914146297, "grad_norm": 0.12736718356609344, "learning_rate": 0.0005, "loss": 2.1082, "step": 206600 }, { "epoch": 0.7864086538827524, "grad_norm": 0.1300850659608841, "learning_rate": 0.0005, "loss": 2.1053, "step": 206610 }, { "epoch": 0.786446716350875, "grad_norm": 0.1280023455619812, "learning_rate": 0.0005, "loss": 2.1156, "step": 206620 }, { "epoch": 0.7864847788189977, "grad_norm": 0.1388653814792633, "learning_rate": 0.0005, "loss": 2.094, "step": 206630 }, { "epoch": 0.7865228412871205, "grad_norm": 0.12239661067724228, "learning_rate": 0.0005, "loss": 2.1054, "step": 206640 }, { "epoch": 0.7865609037552431, "grad_norm": 0.12179192900657654, "learning_rate": 0.0005, "loss": 2.1066, "step": 206650 }, { "epoch": 0.7865989662233658, "grad_norm": 0.1308254599571228, "learning_rate": 0.0005, "loss": 2.1027, "step": 206660 }, { "epoch": 0.7866370286914884, "grad_norm": 0.12290980666875839, "learning_rate": 0.0005, "loss": 2.0996, "step": 206670 }, { "epoch": 0.7866750911596112, "grad_norm": 0.12178878486156464, "learning_rate": 0.0005, "loss": 2.1061, "step": 206680 }, { "epoch": 0.7867131536277339, "grad_norm": 0.11778522282838821, "learning_rate": 0.0005, "loss": 2.1115, "step": 206690 }, { "epoch": 0.7867512160958565, "grad_norm": 0.12160991132259369, "learning_rate": 0.0005, "loss": 2.1167, "step": 206700 }, { "epoch": 0.7867892785639792, "grad_norm": 0.11683660745620728, "learning_rate": 0.0005, "loss": 2.1034, "step": 206710 }, { "epoch": 0.7868273410321018, "grad_norm": 0.1343355029821396, "learning_rate": 0.0005, "loss": 2.1009, "step": 206720 }, { "epoch": 0.7868654035002246, "grad_norm": 0.12453968822956085, "learning_rate": 0.0005, "loss": 2.1071, "step": 206730 }, { "epoch": 0.7869034659683473, "grad_norm": 0.1248086541891098, "learning_rate": 0.0005, "loss": 2.1193, "step": 206740 }, { "epoch": 0.7869415284364699, "grad_norm": 0.12308883666992188, "learning_rate": 0.0005, "loss": 2.0996, "step": 206750 }, { "epoch": 0.7869795909045926, "grad_norm": 0.12302669137716293, "learning_rate": 0.0005, "loss": 2.1109, "step": 206760 }, { "epoch": 0.7870176533727153, "grad_norm": 0.1207866296172142, "learning_rate": 0.0005, "loss": 2.1047, "step": 206770 }, { "epoch": 0.787055715840838, "grad_norm": 0.13234055042266846, "learning_rate": 0.0005, "loss": 2.1103, "step": 206780 }, { "epoch": 0.7870937783089607, "grad_norm": 0.1425267904996872, "learning_rate": 0.0005, "loss": 2.1046, "step": 206790 }, { "epoch": 0.7871318407770833, "grad_norm": 0.12068969756364822, "learning_rate": 0.0005, "loss": 2.1092, "step": 206800 }, { "epoch": 0.7871699032452061, "grad_norm": 0.11635356396436691, "learning_rate": 0.0005, "loss": 2.115, "step": 206810 }, { "epoch": 0.7872079657133287, "grad_norm": 0.13005107641220093, "learning_rate": 0.0005, "loss": 2.1067, "step": 206820 }, { "epoch": 0.7872460281814514, "grad_norm": 0.13159166276454926, "learning_rate": 0.0005, "loss": 2.1014, "step": 206830 }, { "epoch": 0.787284090649574, "grad_norm": 0.1244141086935997, "learning_rate": 0.0005, "loss": 2.1163, "step": 206840 }, { "epoch": 0.7873221531176967, "grad_norm": 0.12854860723018646, "learning_rate": 0.0005, "loss": 2.0951, "step": 206850 }, { "epoch": 0.7873602155858195, "grad_norm": 0.12042578309774399, "learning_rate": 0.0005, "loss": 2.0953, "step": 206860 }, { "epoch": 0.7873982780539421, "grad_norm": 0.12977582216262817, "learning_rate": 0.0005, "loss": 2.0942, "step": 206870 }, { "epoch": 0.7874363405220648, "grad_norm": 0.11893955618143082, "learning_rate": 0.0005, "loss": 2.1019, "step": 206880 }, { "epoch": 0.7874744029901874, "grad_norm": 0.13247361779212952, "learning_rate": 0.0005, "loss": 2.0895, "step": 206890 }, { "epoch": 0.7875124654583102, "grad_norm": 0.12134560942649841, "learning_rate": 0.0005, "loss": 2.111, "step": 206900 }, { "epoch": 0.7875505279264329, "grad_norm": 0.12910045683383942, "learning_rate": 0.0005, "loss": 2.1295, "step": 206910 }, { "epoch": 0.7875885903945555, "grad_norm": 0.11358480155467987, "learning_rate": 0.0005, "loss": 2.1134, "step": 206920 }, { "epoch": 0.7876266528626782, "grad_norm": 0.13005992770195007, "learning_rate": 0.0005, "loss": 2.1004, "step": 206930 }, { "epoch": 0.787664715330801, "grad_norm": 0.12794363498687744, "learning_rate": 0.0005, "loss": 2.0932, "step": 206940 }, { "epoch": 0.7877027777989236, "grad_norm": 0.1298464983701706, "learning_rate": 0.0005, "loss": 2.1196, "step": 206950 }, { "epoch": 0.7877408402670463, "grad_norm": 0.12766271829605103, "learning_rate": 0.0005, "loss": 2.0947, "step": 206960 }, { "epoch": 0.7877789027351689, "grad_norm": 0.1273168921470642, "learning_rate": 0.0005, "loss": 2.1155, "step": 206970 }, { "epoch": 0.7878169652032917, "grad_norm": 0.12702283263206482, "learning_rate": 0.0005, "loss": 2.1065, "step": 206980 }, { "epoch": 0.7878550276714144, "grad_norm": 0.13174878060817719, "learning_rate": 0.0005, "loss": 2.1027, "step": 206990 }, { "epoch": 0.787893090139537, "grad_norm": 0.12054309993982315, "learning_rate": 0.0005, "loss": 2.1044, "step": 207000 }, { "epoch": 0.7879311526076597, "grad_norm": 0.12238851934671402, "learning_rate": 0.0005, "loss": 2.1149, "step": 207010 }, { "epoch": 0.7879692150757823, "grad_norm": 0.14216649532318115, "learning_rate": 0.0005, "loss": 2.0952, "step": 207020 }, { "epoch": 0.7880072775439051, "grad_norm": 0.14556671679019928, "learning_rate": 0.0005, "loss": 2.1081, "step": 207030 }, { "epoch": 0.7880453400120278, "grad_norm": 0.13143803179264069, "learning_rate": 0.0005, "loss": 2.0974, "step": 207040 }, { "epoch": 0.7880834024801504, "grad_norm": 0.12600690126419067, "learning_rate": 0.0005, "loss": 2.0934, "step": 207050 }, { "epoch": 0.7881214649482731, "grad_norm": 0.12676817178726196, "learning_rate": 0.0005, "loss": 2.1047, "step": 207060 }, { "epoch": 0.7881595274163958, "grad_norm": 0.12112493813037872, "learning_rate": 0.0005, "loss": 2.1083, "step": 207070 }, { "epoch": 0.7881975898845185, "grad_norm": 0.12040665000677109, "learning_rate": 0.0005, "loss": 2.1242, "step": 207080 }, { "epoch": 0.7882356523526411, "grad_norm": 0.12102606147527695, "learning_rate": 0.0005, "loss": 2.1064, "step": 207090 }, { "epoch": 0.7882737148207638, "grad_norm": 0.1392698436975479, "learning_rate": 0.0005, "loss": 2.0991, "step": 207100 }, { "epoch": 0.7883117772888866, "grad_norm": 0.13138611614704132, "learning_rate": 0.0005, "loss": 2.1114, "step": 207110 }, { "epoch": 0.7883498397570092, "grad_norm": 0.1238655149936676, "learning_rate": 0.0005, "loss": 2.0922, "step": 207120 }, { "epoch": 0.7883879022251319, "grad_norm": 0.13656646013259888, "learning_rate": 0.0005, "loss": 2.114, "step": 207130 }, { "epoch": 0.7884259646932545, "grad_norm": 0.12678642570972443, "learning_rate": 0.0005, "loss": 2.1143, "step": 207140 }, { "epoch": 0.7884640271613772, "grad_norm": 0.12136158347129822, "learning_rate": 0.0005, "loss": 2.1072, "step": 207150 }, { "epoch": 0.7885020896295, "grad_norm": 0.14471644163131714, "learning_rate": 0.0005, "loss": 2.1, "step": 207160 }, { "epoch": 0.7885401520976226, "grad_norm": 0.12014409154653549, "learning_rate": 0.0005, "loss": 2.1035, "step": 207170 }, { "epoch": 0.7885782145657453, "grad_norm": 0.11891982704401016, "learning_rate": 0.0005, "loss": 2.1175, "step": 207180 }, { "epoch": 0.7886162770338679, "grad_norm": 0.12429668009281158, "learning_rate": 0.0005, "loss": 2.1145, "step": 207190 }, { "epoch": 0.7886543395019907, "grad_norm": 0.12298885732889175, "learning_rate": 0.0005, "loss": 2.1125, "step": 207200 }, { "epoch": 0.7886924019701134, "grad_norm": 0.1149677112698555, "learning_rate": 0.0005, "loss": 2.1018, "step": 207210 }, { "epoch": 0.788730464438236, "grad_norm": 0.11625348776578903, "learning_rate": 0.0005, "loss": 2.0962, "step": 207220 }, { "epoch": 0.7887685269063587, "grad_norm": 0.14361906051635742, "learning_rate": 0.0005, "loss": 2.098, "step": 207230 }, { "epoch": 0.7888065893744814, "grad_norm": 0.14315269887447357, "learning_rate": 0.0005, "loss": 2.0979, "step": 207240 }, { "epoch": 0.7888446518426041, "grad_norm": 0.11848867684602737, "learning_rate": 0.0005, "loss": 2.1125, "step": 207250 }, { "epoch": 0.7888827143107268, "grad_norm": 0.12466177344322205, "learning_rate": 0.0005, "loss": 2.1108, "step": 207260 }, { "epoch": 0.7889207767788494, "grad_norm": 0.12067979574203491, "learning_rate": 0.0005, "loss": 2.0958, "step": 207270 }, { "epoch": 0.7889588392469721, "grad_norm": 0.15696817636489868, "learning_rate": 0.0005, "loss": 2.0891, "step": 207280 }, { "epoch": 0.7889969017150948, "grad_norm": 0.12369900941848755, "learning_rate": 0.0005, "loss": 2.1185, "step": 207290 }, { "epoch": 0.7890349641832175, "grad_norm": 0.1119314655661583, "learning_rate": 0.0005, "loss": 2.1181, "step": 207300 }, { "epoch": 0.7890730266513402, "grad_norm": 0.11977098882198334, "learning_rate": 0.0005, "loss": 2.0966, "step": 207310 }, { "epoch": 0.7891110891194628, "grad_norm": 0.12511758506298065, "learning_rate": 0.0005, "loss": 2.0865, "step": 207320 }, { "epoch": 0.7891491515875856, "grad_norm": 0.12501899898052216, "learning_rate": 0.0005, "loss": 2.1186, "step": 207330 }, { "epoch": 0.7891872140557082, "grad_norm": 0.12755510210990906, "learning_rate": 0.0005, "loss": 2.1231, "step": 207340 }, { "epoch": 0.7892252765238309, "grad_norm": 0.13348741829395294, "learning_rate": 0.0005, "loss": 2.1204, "step": 207350 }, { "epoch": 0.7892633389919536, "grad_norm": 0.13311418890953064, "learning_rate": 0.0005, "loss": 2.1234, "step": 207360 }, { "epoch": 0.7893014014600763, "grad_norm": 0.12708371877670288, "learning_rate": 0.0005, "loss": 2.1065, "step": 207370 }, { "epoch": 0.789339463928199, "grad_norm": 0.1293056756258011, "learning_rate": 0.0005, "loss": 2.106, "step": 207380 }, { "epoch": 0.7893775263963216, "grad_norm": 0.1280953288078308, "learning_rate": 0.0005, "loss": 2.1174, "step": 207390 }, { "epoch": 0.7894155888644443, "grad_norm": 0.1261862814426422, "learning_rate": 0.0005, "loss": 2.0965, "step": 207400 }, { "epoch": 0.7894536513325671, "grad_norm": 0.13907188177108765, "learning_rate": 0.0005, "loss": 2.1073, "step": 207410 }, { "epoch": 0.7894917138006897, "grad_norm": 0.11450821906328201, "learning_rate": 0.0005, "loss": 2.1064, "step": 207420 }, { "epoch": 0.7895297762688124, "grad_norm": 0.12620657682418823, "learning_rate": 0.0005, "loss": 2.0947, "step": 207430 }, { "epoch": 0.789567838736935, "grad_norm": 0.12586279213428497, "learning_rate": 0.0005, "loss": 2.1038, "step": 207440 }, { "epoch": 0.7896059012050577, "grad_norm": 0.12371962517499924, "learning_rate": 0.0005, "loss": 2.11, "step": 207450 }, { "epoch": 0.7896439636731805, "grad_norm": 0.11716640740633011, "learning_rate": 0.0005, "loss": 2.0993, "step": 207460 }, { "epoch": 0.7896820261413031, "grad_norm": 0.12114844471216202, "learning_rate": 0.0005, "loss": 2.0976, "step": 207470 }, { "epoch": 0.7897200886094258, "grad_norm": 0.11807013303041458, "learning_rate": 0.0005, "loss": 2.1032, "step": 207480 }, { "epoch": 0.7897581510775484, "grad_norm": 0.13748803734779358, "learning_rate": 0.0005, "loss": 2.1054, "step": 207490 }, { "epoch": 0.7897962135456712, "grad_norm": 0.1313491016626358, "learning_rate": 0.0005, "loss": 2.0936, "step": 207500 }, { "epoch": 0.7898342760137939, "grad_norm": 0.1214139312505722, "learning_rate": 0.0005, "loss": 2.1161, "step": 207510 }, { "epoch": 0.7898723384819165, "grad_norm": 0.12973102927207947, "learning_rate": 0.0005, "loss": 2.1056, "step": 207520 }, { "epoch": 0.7899104009500392, "grad_norm": 0.12636278569698334, "learning_rate": 0.0005, "loss": 2.1054, "step": 207530 }, { "epoch": 0.7899484634181619, "grad_norm": 0.12249350547790527, "learning_rate": 0.0005, "loss": 2.1085, "step": 207540 }, { "epoch": 0.7899865258862846, "grad_norm": 0.1312398761510849, "learning_rate": 0.0005, "loss": 2.1146, "step": 207550 }, { "epoch": 0.7900245883544073, "grad_norm": 0.12716726958751678, "learning_rate": 0.0005, "loss": 2.1096, "step": 207560 }, { "epoch": 0.7900626508225299, "grad_norm": 0.11871406435966492, "learning_rate": 0.0005, "loss": 2.1147, "step": 207570 }, { "epoch": 0.7901007132906526, "grad_norm": 0.129543274641037, "learning_rate": 0.0005, "loss": 2.0934, "step": 207580 }, { "epoch": 0.7901387757587753, "grad_norm": 0.12391343712806702, "learning_rate": 0.0005, "loss": 2.1164, "step": 207590 }, { "epoch": 0.790176838226898, "grad_norm": 0.12911324203014374, "learning_rate": 0.0005, "loss": 2.1094, "step": 207600 }, { "epoch": 0.7902149006950207, "grad_norm": 0.12524741888046265, "learning_rate": 0.0005, "loss": 2.1147, "step": 207610 }, { "epoch": 0.7902529631631433, "grad_norm": 0.12118924409151077, "learning_rate": 0.0005, "loss": 2.1016, "step": 207620 }, { "epoch": 0.7902910256312661, "grad_norm": 0.1353355050086975, "learning_rate": 0.0005, "loss": 2.0991, "step": 207630 }, { "epoch": 0.7903290880993887, "grad_norm": 0.12381858378648758, "learning_rate": 0.0005, "loss": 2.1067, "step": 207640 }, { "epoch": 0.7903671505675114, "grad_norm": 0.13531967997550964, "learning_rate": 0.0005, "loss": 2.1022, "step": 207650 }, { "epoch": 0.790405213035634, "grad_norm": 0.12401936948299408, "learning_rate": 0.0005, "loss": 2.1108, "step": 207660 }, { "epoch": 0.7904432755037568, "grad_norm": 0.1160985603928566, "learning_rate": 0.0005, "loss": 2.1142, "step": 207670 }, { "epoch": 0.7904813379718795, "grad_norm": 0.14534474909305573, "learning_rate": 0.0005, "loss": 2.0966, "step": 207680 }, { "epoch": 0.7905194004400021, "grad_norm": 0.13224861025810242, "learning_rate": 0.0005, "loss": 2.1137, "step": 207690 }, { "epoch": 0.7905574629081248, "grad_norm": 0.1339549869298935, "learning_rate": 0.0005, "loss": 2.1168, "step": 207700 }, { "epoch": 0.7905955253762474, "grad_norm": 0.14083094894886017, "learning_rate": 0.0005, "loss": 2.1066, "step": 207710 }, { "epoch": 0.7906335878443702, "grad_norm": 0.11693526059389114, "learning_rate": 0.0005, "loss": 2.0983, "step": 207720 }, { "epoch": 0.7906716503124929, "grad_norm": 0.12638410925865173, "learning_rate": 0.0005, "loss": 2.1178, "step": 207730 }, { "epoch": 0.7907097127806155, "grad_norm": 0.13595831394195557, "learning_rate": 0.0005, "loss": 2.0955, "step": 207740 }, { "epoch": 0.7907477752487382, "grad_norm": 0.12722502648830414, "learning_rate": 0.0005, "loss": 2.0989, "step": 207750 }, { "epoch": 0.790785837716861, "grad_norm": 0.15252414345741272, "learning_rate": 0.0005, "loss": 2.1103, "step": 207760 }, { "epoch": 0.7908239001849836, "grad_norm": 0.12534798681735992, "learning_rate": 0.0005, "loss": 2.1015, "step": 207770 }, { "epoch": 0.7908619626531063, "grad_norm": 0.12866796553134918, "learning_rate": 0.0005, "loss": 2.1139, "step": 207780 }, { "epoch": 0.7909000251212289, "grad_norm": 0.12148977071046829, "learning_rate": 0.0005, "loss": 2.0836, "step": 207790 }, { "epoch": 0.7909380875893517, "grad_norm": 0.13029548525810242, "learning_rate": 0.0005, "loss": 2.1073, "step": 207800 }, { "epoch": 0.7909761500574743, "grad_norm": 0.125240296125412, "learning_rate": 0.0005, "loss": 2.0913, "step": 207810 }, { "epoch": 0.791014212525597, "grad_norm": 0.1148528978228569, "learning_rate": 0.0005, "loss": 2.1074, "step": 207820 }, { "epoch": 0.7910522749937197, "grad_norm": 0.11937754601240158, "learning_rate": 0.0005, "loss": 2.0951, "step": 207830 }, { "epoch": 0.7910903374618424, "grad_norm": 0.1433212012052536, "learning_rate": 0.0005, "loss": 2.1023, "step": 207840 }, { "epoch": 0.7911283999299651, "grad_norm": 0.12246479839086533, "learning_rate": 0.0005, "loss": 2.0938, "step": 207850 }, { "epoch": 0.7911664623980877, "grad_norm": 0.12843631207942963, "learning_rate": 0.0005, "loss": 2.1021, "step": 207860 }, { "epoch": 0.7912045248662104, "grad_norm": 0.13483227789402008, "learning_rate": 0.0005, "loss": 2.1025, "step": 207870 }, { "epoch": 0.7912425873343331, "grad_norm": 0.11968355625867844, "learning_rate": 0.0005, "loss": 2.1007, "step": 207880 }, { "epoch": 0.7912806498024558, "grad_norm": 0.13005025684833527, "learning_rate": 0.0005, "loss": 2.1061, "step": 207890 }, { "epoch": 0.7913187122705785, "grad_norm": 0.11541354656219482, "learning_rate": 0.0005, "loss": 2.1045, "step": 207900 }, { "epoch": 0.7913567747387011, "grad_norm": 0.14411550760269165, "learning_rate": 0.0005, "loss": 2.101, "step": 207910 }, { "epoch": 0.7913948372068238, "grad_norm": 0.1196213811635971, "learning_rate": 0.0005, "loss": 2.1095, "step": 207920 }, { "epoch": 0.7914328996749466, "grad_norm": 0.13634739816188812, "learning_rate": 0.0005, "loss": 2.0959, "step": 207930 }, { "epoch": 0.7914709621430692, "grad_norm": 0.1274816393852234, "learning_rate": 0.0005, "loss": 2.0932, "step": 207940 }, { "epoch": 0.7915090246111919, "grad_norm": 0.11603652685880661, "learning_rate": 0.0005, "loss": 2.107, "step": 207950 }, { "epoch": 0.7915470870793145, "grad_norm": 0.11945189535617828, "learning_rate": 0.0005, "loss": 2.0871, "step": 207960 }, { "epoch": 0.7915851495474373, "grad_norm": 0.12008696794509888, "learning_rate": 0.0005, "loss": 2.1011, "step": 207970 }, { "epoch": 0.79162321201556, "grad_norm": 0.12966056168079376, "learning_rate": 0.0005, "loss": 2.1058, "step": 207980 }, { "epoch": 0.7916612744836826, "grad_norm": 0.11781937628984451, "learning_rate": 0.0005, "loss": 2.1033, "step": 207990 }, { "epoch": 0.7916993369518053, "grad_norm": 0.1327858716249466, "learning_rate": 0.0005, "loss": 2.1111, "step": 208000 }, { "epoch": 0.7917373994199279, "grad_norm": 0.16530166566371918, "learning_rate": 0.0005, "loss": 2.1069, "step": 208010 }, { "epoch": 0.7917754618880507, "grad_norm": 0.12523028254508972, "learning_rate": 0.0005, "loss": 2.1094, "step": 208020 }, { "epoch": 0.7918135243561734, "grad_norm": 0.11686685681343079, "learning_rate": 0.0005, "loss": 2.1067, "step": 208030 }, { "epoch": 0.791851586824296, "grad_norm": 0.11279519647359848, "learning_rate": 0.0005, "loss": 2.1022, "step": 208040 }, { "epoch": 0.7918896492924187, "grad_norm": 0.14504992961883545, "learning_rate": 0.0005, "loss": 2.1102, "step": 208050 }, { "epoch": 0.7919277117605414, "grad_norm": 0.1297408491373062, "learning_rate": 0.0005, "loss": 2.089, "step": 208060 }, { "epoch": 0.7919657742286641, "grad_norm": 0.1253969669342041, "learning_rate": 0.0005, "loss": 2.098, "step": 208070 }, { "epoch": 0.7920038366967868, "grad_norm": 0.11713798344135284, "learning_rate": 0.0005, "loss": 2.1167, "step": 208080 }, { "epoch": 0.7920418991649094, "grad_norm": 0.12483670562505722, "learning_rate": 0.0005, "loss": 2.1052, "step": 208090 }, { "epoch": 0.7920799616330322, "grad_norm": 0.13903804123401642, "learning_rate": 0.0005, "loss": 2.114, "step": 208100 }, { "epoch": 0.7921180241011548, "grad_norm": 0.13183006644248962, "learning_rate": 0.0005, "loss": 2.1069, "step": 208110 }, { "epoch": 0.7921560865692775, "grad_norm": 0.12532487511634827, "learning_rate": 0.0005, "loss": 2.0982, "step": 208120 }, { "epoch": 0.7921941490374002, "grad_norm": 0.12564100325107574, "learning_rate": 0.0005, "loss": 2.1287, "step": 208130 }, { "epoch": 0.7922322115055228, "grad_norm": 0.12045514583587646, "learning_rate": 0.0005, "loss": 2.0955, "step": 208140 }, { "epoch": 0.7922702739736456, "grad_norm": 0.11653630435466766, "learning_rate": 0.0005, "loss": 2.1023, "step": 208150 }, { "epoch": 0.7923083364417682, "grad_norm": 0.12388094514608383, "learning_rate": 0.0005, "loss": 2.1095, "step": 208160 }, { "epoch": 0.7923463989098909, "grad_norm": 0.12007354944944382, "learning_rate": 0.0005, "loss": 2.1074, "step": 208170 }, { "epoch": 0.7923844613780135, "grad_norm": 0.1252797544002533, "learning_rate": 0.0005, "loss": 2.1015, "step": 208180 }, { "epoch": 0.7924225238461363, "grad_norm": 0.11857400834560394, "learning_rate": 0.0005, "loss": 2.0968, "step": 208190 }, { "epoch": 0.792460586314259, "grad_norm": 0.19271457195281982, "learning_rate": 0.0005, "loss": 2.1, "step": 208200 }, { "epoch": 0.7924986487823816, "grad_norm": 0.13805493712425232, "learning_rate": 0.0005, "loss": 2.1121, "step": 208210 }, { "epoch": 0.7925367112505043, "grad_norm": 0.13163357973098755, "learning_rate": 0.0005, "loss": 2.1062, "step": 208220 }, { "epoch": 0.7925747737186271, "grad_norm": 0.1201360672712326, "learning_rate": 0.0005, "loss": 2.1089, "step": 208230 }, { "epoch": 0.7926128361867497, "grad_norm": 0.14493165910243988, "learning_rate": 0.0005, "loss": 2.1093, "step": 208240 }, { "epoch": 0.7926508986548724, "grad_norm": 0.1183045506477356, "learning_rate": 0.0005, "loss": 2.1069, "step": 208250 }, { "epoch": 0.792688961122995, "grad_norm": 0.14185409247875214, "learning_rate": 0.0005, "loss": 2.1066, "step": 208260 }, { "epoch": 0.7927270235911178, "grad_norm": 0.13562677800655365, "learning_rate": 0.0005, "loss": 2.1005, "step": 208270 }, { "epoch": 0.7927650860592405, "grad_norm": 0.13558503985404968, "learning_rate": 0.0005, "loss": 2.1018, "step": 208280 }, { "epoch": 0.7928031485273631, "grad_norm": 0.11868877708911896, "learning_rate": 0.0005, "loss": 2.0877, "step": 208290 }, { "epoch": 0.7928412109954858, "grad_norm": 0.1223066970705986, "learning_rate": 0.0005, "loss": 2.1116, "step": 208300 }, { "epoch": 0.7928792734636084, "grad_norm": 0.12212125211954117, "learning_rate": 0.0005, "loss": 2.1094, "step": 208310 }, { "epoch": 0.7929173359317312, "grad_norm": 0.12405651807785034, "learning_rate": 0.0005, "loss": 2.1072, "step": 208320 }, { "epoch": 0.7929553983998539, "grad_norm": 0.11782582849264145, "learning_rate": 0.0005, "loss": 2.1198, "step": 208330 }, { "epoch": 0.7929934608679765, "grad_norm": 0.15370948612689972, "learning_rate": 0.0005, "loss": 2.087, "step": 208340 }, { "epoch": 0.7930315233360992, "grad_norm": 0.13403061032295227, "learning_rate": 0.0005, "loss": 2.1237, "step": 208350 }, { "epoch": 0.7930695858042219, "grad_norm": 0.130899116396904, "learning_rate": 0.0005, "loss": 2.1127, "step": 208360 }, { "epoch": 0.7931076482723446, "grad_norm": 0.119450643658638, "learning_rate": 0.0005, "loss": 2.1077, "step": 208370 }, { "epoch": 0.7931457107404672, "grad_norm": 0.11362778395414352, "learning_rate": 0.0005, "loss": 2.1043, "step": 208380 }, { "epoch": 0.7931837732085899, "grad_norm": 0.12749043107032776, "learning_rate": 0.0005, "loss": 2.1093, "step": 208390 }, { "epoch": 0.7932218356767127, "grad_norm": 0.11601119488477707, "learning_rate": 0.0005, "loss": 2.0986, "step": 208400 }, { "epoch": 0.7932598981448353, "grad_norm": 0.1223764568567276, "learning_rate": 0.0005, "loss": 2.1052, "step": 208410 }, { "epoch": 0.793297960612958, "grad_norm": 0.1120501160621643, "learning_rate": 0.0005, "loss": 2.1291, "step": 208420 }, { "epoch": 0.7933360230810806, "grad_norm": 0.12687718868255615, "learning_rate": 0.0005, "loss": 2.1023, "step": 208430 }, { "epoch": 0.7933740855492033, "grad_norm": 0.13210518658161163, "learning_rate": 0.0005, "loss": 2.1, "step": 208440 }, { "epoch": 0.7934121480173261, "grad_norm": 0.12925389409065247, "learning_rate": 0.0005, "loss": 2.1145, "step": 208450 }, { "epoch": 0.7934502104854487, "grad_norm": 0.13438375294208527, "learning_rate": 0.0005, "loss": 2.1062, "step": 208460 }, { "epoch": 0.7934882729535714, "grad_norm": 0.12347417324781418, "learning_rate": 0.0005, "loss": 2.0848, "step": 208470 }, { "epoch": 0.793526335421694, "grad_norm": 0.11717919260263443, "learning_rate": 0.0005, "loss": 2.1025, "step": 208480 }, { "epoch": 0.7935643978898168, "grad_norm": 0.12019776552915573, "learning_rate": 0.0005, "loss": 2.1123, "step": 208490 }, { "epoch": 0.7936024603579395, "grad_norm": 0.15380197763442993, "learning_rate": 0.0005, "loss": 2.1061, "step": 208500 }, { "epoch": 0.7936405228260621, "grad_norm": 0.12014266103506088, "learning_rate": 0.0005, "loss": 2.1, "step": 208510 }, { "epoch": 0.7936785852941848, "grad_norm": 0.1318885236978531, "learning_rate": 0.0005, "loss": 2.1092, "step": 208520 }, { "epoch": 0.7937166477623075, "grad_norm": 0.12295283377170563, "learning_rate": 0.0005, "loss": 2.1045, "step": 208530 }, { "epoch": 0.7937547102304302, "grad_norm": 0.1341993659734726, "learning_rate": 0.0005, "loss": 2.1032, "step": 208540 }, { "epoch": 0.7937927726985529, "grad_norm": 0.1195361539721489, "learning_rate": 0.0005, "loss": 2.1011, "step": 208550 }, { "epoch": 0.7938308351666755, "grad_norm": 0.12576062977313995, "learning_rate": 0.0005, "loss": 2.0984, "step": 208560 }, { "epoch": 0.7938688976347982, "grad_norm": 0.12561865150928497, "learning_rate": 0.0005, "loss": 2.0903, "step": 208570 }, { "epoch": 0.7939069601029209, "grad_norm": 0.1389131397008896, "learning_rate": 0.0005, "loss": 2.0906, "step": 208580 }, { "epoch": 0.7939450225710436, "grad_norm": 0.11707048863172531, "learning_rate": 0.0005, "loss": 2.1015, "step": 208590 }, { "epoch": 0.7939830850391663, "grad_norm": 0.1230204775929451, "learning_rate": 0.0005, "loss": 2.1134, "step": 208600 }, { "epoch": 0.7940211475072889, "grad_norm": 0.12738430500030518, "learning_rate": 0.0005, "loss": 2.1062, "step": 208610 }, { "epoch": 0.7940592099754117, "grad_norm": 0.1250593066215515, "learning_rate": 0.0005, "loss": 2.1059, "step": 208620 }, { "epoch": 0.7940972724435343, "grad_norm": 0.1447353959083557, "learning_rate": 0.0005, "loss": 2.1152, "step": 208630 }, { "epoch": 0.794135334911657, "grad_norm": 0.1509043425321579, "learning_rate": 0.0005, "loss": 2.1057, "step": 208640 }, { "epoch": 0.7941733973797797, "grad_norm": 0.12834376096725464, "learning_rate": 0.0005, "loss": 2.1153, "step": 208650 }, { "epoch": 0.7942114598479024, "grad_norm": 0.11595456302165985, "learning_rate": 0.0005, "loss": 2.0962, "step": 208660 }, { "epoch": 0.7942495223160251, "grad_norm": 0.1368151754140854, "learning_rate": 0.0005, "loss": 2.1055, "step": 208670 }, { "epoch": 0.7942875847841477, "grad_norm": 0.12382905185222626, "learning_rate": 0.0005, "loss": 2.1084, "step": 208680 }, { "epoch": 0.7943256472522704, "grad_norm": 0.11919035017490387, "learning_rate": 0.0005, "loss": 2.0879, "step": 208690 }, { "epoch": 0.7943637097203932, "grad_norm": 0.1275068074464798, "learning_rate": 0.0005, "loss": 2.1028, "step": 208700 }, { "epoch": 0.7944017721885158, "grad_norm": 0.12816788256168365, "learning_rate": 0.0005, "loss": 2.1058, "step": 208710 }, { "epoch": 0.7944398346566385, "grad_norm": 0.2003813534975052, "learning_rate": 0.0005, "loss": 2.101, "step": 208720 }, { "epoch": 0.7944778971247611, "grad_norm": 0.11509547382593155, "learning_rate": 0.0005, "loss": 2.0979, "step": 208730 }, { "epoch": 0.7945159595928838, "grad_norm": 0.13221478462219238, "learning_rate": 0.0005, "loss": 2.1086, "step": 208740 }, { "epoch": 0.7945540220610066, "grad_norm": 0.11940953880548477, "learning_rate": 0.0005, "loss": 2.095, "step": 208750 }, { "epoch": 0.7945920845291292, "grad_norm": 0.13081574440002441, "learning_rate": 0.0005, "loss": 2.1171, "step": 208760 }, { "epoch": 0.7946301469972519, "grad_norm": 0.11604013293981552, "learning_rate": 0.0005, "loss": 2.0976, "step": 208770 }, { "epoch": 0.7946682094653745, "grad_norm": 0.13396811485290527, "learning_rate": 0.0005, "loss": 2.1059, "step": 208780 }, { "epoch": 0.7947062719334973, "grad_norm": 0.14683446288108826, "learning_rate": 0.0005, "loss": 2.1295, "step": 208790 }, { "epoch": 0.79474433440162, "grad_norm": 0.13151712715625763, "learning_rate": 0.0005, "loss": 2.1029, "step": 208800 }, { "epoch": 0.7947823968697426, "grad_norm": 0.13657481968402863, "learning_rate": 0.0005, "loss": 2.1027, "step": 208810 }, { "epoch": 0.7948204593378653, "grad_norm": 0.12927566468715668, "learning_rate": 0.0005, "loss": 2.1123, "step": 208820 }, { "epoch": 0.794858521805988, "grad_norm": 0.11834193766117096, "learning_rate": 0.0005, "loss": 2.0974, "step": 208830 }, { "epoch": 0.7948965842741107, "grad_norm": 0.12424030900001526, "learning_rate": 0.0005, "loss": 2.1195, "step": 208840 }, { "epoch": 0.7949346467422334, "grad_norm": 0.12404995411634445, "learning_rate": 0.0005, "loss": 2.1099, "step": 208850 }, { "epoch": 0.794972709210356, "grad_norm": 0.12764133512973785, "learning_rate": 0.0005, "loss": 2.1195, "step": 208860 }, { "epoch": 0.7950107716784787, "grad_norm": 0.1281396746635437, "learning_rate": 0.0005, "loss": 2.1113, "step": 208870 }, { "epoch": 0.7950488341466014, "grad_norm": 0.13411007821559906, "learning_rate": 0.0005, "loss": 2.1099, "step": 208880 }, { "epoch": 0.7950868966147241, "grad_norm": 0.12388242781162262, "learning_rate": 0.0005, "loss": 2.0904, "step": 208890 }, { "epoch": 0.7951249590828467, "grad_norm": 0.15650135278701782, "learning_rate": 0.0005, "loss": 2.1068, "step": 208900 }, { "epoch": 0.7951630215509694, "grad_norm": 0.11632000654935837, "learning_rate": 0.0005, "loss": 2.0931, "step": 208910 }, { "epoch": 0.7952010840190922, "grad_norm": 0.1179938018321991, "learning_rate": 0.0005, "loss": 2.1034, "step": 208920 }, { "epoch": 0.7952391464872148, "grad_norm": 0.12669743597507477, "learning_rate": 0.0005, "loss": 2.1209, "step": 208930 }, { "epoch": 0.7952772089553375, "grad_norm": 0.13332943618297577, "learning_rate": 0.0005, "loss": 2.0969, "step": 208940 }, { "epoch": 0.7953152714234601, "grad_norm": 0.1121673732995987, "learning_rate": 0.0005, "loss": 2.0955, "step": 208950 }, { "epoch": 0.7953533338915829, "grad_norm": 0.11808997392654419, "learning_rate": 0.0005, "loss": 2.1124, "step": 208960 }, { "epoch": 0.7953913963597056, "grad_norm": 0.1382598578929901, "learning_rate": 0.0005, "loss": 2.0944, "step": 208970 }, { "epoch": 0.7954294588278282, "grad_norm": 0.1268141269683838, "learning_rate": 0.0005, "loss": 2.1008, "step": 208980 }, { "epoch": 0.7954675212959509, "grad_norm": 0.13062426447868347, "learning_rate": 0.0005, "loss": 2.1078, "step": 208990 }, { "epoch": 0.7955055837640735, "grad_norm": 0.12167482823133469, "learning_rate": 0.0005, "loss": 2.102, "step": 209000 }, { "epoch": 0.7955436462321963, "grad_norm": 0.12169355899095535, "learning_rate": 0.0005, "loss": 2.0866, "step": 209010 }, { "epoch": 0.795581708700319, "grad_norm": 0.12138015776872635, "learning_rate": 0.0005, "loss": 2.1096, "step": 209020 }, { "epoch": 0.7956197711684416, "grad_norm": 0.16487336158752441, "learning_rate": 0.0005, "loss": 2.1031, "step": 209030 }, { "epoch": 0.7956578336365643, "grad_norm": 0.1197362020611763, "learning_rate": 0.0005, "loss": 2.1127, "step": 209040 }, { "epoch": 0.795695896104687, "grad_norm": 0.1222614049911499, "learning_rate": 0.0005, "loss": 2.0989, "step": 209050 }, { "epoch": 0.7957339585728097, "grad_norm": 0.13899779319763184, "learning_rate": 0.0005, "loss": 2.1131, "step": 209060 }, { "epoch": 0.7957720210409324, "grad_norm": 0.12792204320430756, "learning_rate": 0.0005, "loss": 2.1127, "step": 209070 }, { "epoch": 0.795810083509055, "grad_norm": 0.12948037683963776, "learning_rate": 0.0005, "loss": 2.1028, "step": 209080 }, { "epoch": 0.7958481459771778, "grad_norm": 0.11667436361312866, "learning_rate": 0.0005, "loss": 2.0971, "step": 209090 }, { "epoch": 0.7958862084453004, "grad_norm": 0.12511169910430908, "learning_rate": 0.0005, "loss": 2.0989, "step": 209100 }, { "epoch": 0.7959242709134231, "grad_norm": 0.12640981376171112, "learning_rate": 0.0005, "loss": 2.0958, "step": 209110 }, { "epoch": 0.7959623333815458, "grad_norm": 0.13863001763820648, "learning_rate": 0.0005, "loss": 2.1037, "step": 209120 }, { "epoch": 0.7960003958496685, "grad_norm": 0.12970925867557526, "learning_rate": 0.0005, "loss": 2.1019, "step": 209130 }, { "epoch": 0.7960384583177912, "grad_norm": 0.12926508486270905, "learning_rate": 0.0005, "loss": 2.1059, "step": 209140 }, { "epoch": 0.7960765207859138, "grad_norm": 0.139383926987648, "learning_rate": 0.0005, "loss": 2.1056, "step": 209150 }, { "epoch": 0.7961145832540365, "grad_norm": 0.17026956379413605, "learning_rate": 0.0005, "loss": 2.0958, "step": 209160 }, { "epoch": 0.7961526457221592, "grad_norm": 0.12663201987743378, "learning_rate": 0.0005, "loss": 2.0865, "step": 209170 }, { "epoch": 0.7961907081902819, "grad_norm": 0.1172584593296051, "learning_rate": 0.0005, "loss": 2.1212, "step": 209180 }, { "epoch": 0.7962287706584046, "grad_norm": 0.11819595843553543, "learning_rate": 0.0005, "loss": 2.108, "step": 209190 }, { "epoch": 0.7962668331265272, "grad_norm": 0.13188645243644714, "learning_rate": 0.0005, "loss": 2.116, "step": 209200 }, { "epoch": 0.7963048955946499, "grad_norm": 0.1291705071926117, "learning_rate": 0.0005, "loss": 2.1202, "step": 209210 }, { "epoch": 0.7963429580627727, "grad_norm": 0.13012315332889557, "learning_rate": 0.0005, "loss": 2.112, "step": 209220 }, { "epoch": 0.7963810205308953, "grad_norm": 0.1288910061120987, "learning_rate": 0.0005, "loss": 2.1027, "step": 209230 }, { "epoch": 0.796419082999018, "grad_norm": 0.12641553580760956, "learning_rate": 0.0005, "loss": 2.1066, "step": 209240 }, { "epoch": 0.7964571454671406, "grad_norm": 0.12806424498558044, "learning_rate": 0.0005, "loss": 2.1051, "step": 209250 }, { "epoch": 0.7964952079352634, "grad_norm": 0.1583370417356491, "learning_rate": 0.0005, "loss": 2.0912, "step": 209260 }, { "epoch": 0.7965332704033861, "grad_norm": 0.1468966156244278, "learning_rate": 0.0005, "loss": 2.1097, "step": 209270 }, { "epoch": 0.7965713328715087, "grad_norm": 0.11811856180429459, "learning_rate": 0.0005, "loss": 2.1102, "step": 209280 }, { "epoch": 0.7966093953396314, "grad_norm": 0.12157811969518661, "learning_rate": 0.0005, "loss": 2.1057, "step": 209290 }, { "epoch": 0.796647457807754, "grad_norm": 0.13439470529556274, "learning_rate": 0.0005, "loss": 2.1199, "step": 209300 }, { "epoch": 0.7966855202758768, "grad_norm": 0.12155460566282272, "learning_rate": 0.0005, "loss": 2.1176, "step": 209310 }, { "epoch": 0.7967235827439995, "grad_norm": 0.12126308679580688, "learning_rate": 0.0005, "loss": 2.1025, "step": 209320 }, { "epoch": 0.7967616452121221, "grad_norm": 0.11862965673208237, "learning_rate": 0.0005, "loss": 2.1169, "step": 209330 }, { "epoch": 0.7967997076802448, "grad_norm": 0.12274859100580215, "learning_rate": 0.0005, "loss": 2.1059, "step": 209340 }, { "epoch": 0.7968377701483675, "grad_norm": 0.12550798058509827, "learning_rate": 0.0005, "loss": 2.1084, "step": 209350 }, { "epoch": 0.7968758326164902, "grad_norm": 0.14728228747844696, "learning_rate": 0.0005, "loss": 2.1084, "step": 209360 }, { "epoch": 0.7969138950846129, "grad_norm": 0.11769311875104904, "learning_rate": 0.0005, "loss": 2.1213, "step": 209370 }, { "epoch": 0.7969519575527355, "grad_norm": 0.13724328577518463, "learning_rate": 0.0005, "loss": 2.0971, "step": 209380 }, { "epoch": 0.7969900200208583, "grad_norm": 0.1331387758255005, "learning_rate": 0.0005, "loss": 2.1054, "step": 209390 }, { "epoch": 0.7970280824889809, "grad_norm": 0.12574204802513123, "learning_rate": 0.0005, "loss": 2.103, "step": 209400 }, { "epoch": 0.7970661449571036, "grad_norm": 0.12337487936019897, "learning_rate": 0.0005, "loss": 2.1043, "step": 209410 }, { "epoch": 0.7971042074252263, "grad_norm": 0.1246960312128067, "learning_rate": 0.0005, "loss": 2.1069, "step": 209420 }, { "epoch": 0.7971422698933489, "grad_norm": 0.12354270368814468, "learning_rate": 0.0005, "loss": 2.1083, "step": 209430 }, { "epoch": 0.7971803323614717, "grad_norm": 0.13482049107551575, "learning_rate": 0.0005, "loss": 2.106, "step": 209440 }, { "epoch": 0.7972183948295943, "grad_norm": 0.11957293748855591, "learning_rate": 0.0005, "loss": 2.0908, "step": 209450 }, { "epoch": 0.797256457297717, "grad_norm": 0.11577066034078598, "learning_rate": 0.0005, "loss": 2.1104, "step": 209460 }, { "epoch": 0.7972945197658396, "grad_norm": 0.12052362412214279, "learning_rate": 0.0005, "loss": 2.1076, "step": 209470 }, { "epoch": 0.7973325822339624, "grad_norm": 0.12633970379829407, "learning_rate": 0.0005, "loss": 2.1141, "step": 209480 }, { "epoch": 0.7973706447020851, "grad_norm": 0.12112203985452652, "learning_rate": 0.0005, "loss": 2.1047, "step": 209490 }, { "epoch": 0.7974087071702077, "grad_norm": 0.1264239400625229, "learning_rate": 0.0005, "loss": 2.1077, "step": 209500 }, { "epoch": 0.7974467696383304, "grad_norm": 0.11804235726594925, "learning_rate": 0.0005, "loss": 2.1085, "step": 209510 }, { "epoch": 0.7974848321064532, "grad_norm": 0.12418785691261292, "learning_rate": 0.0005, "loss": 2.1066, "step": 209520 }, { "epoch": 0.7975228945745758, "grad_norm": 0.11656662076711655, "learning_rate": 0.0005, "loss": 2.1169, "step": 209530 }, { "epoch": 0.7975609570426985, "grad_norm": 0.12503372132778168, "learning_rate": 0.0005, "loss": 2.1039, "step": 209540 }, { "epoch": 0.7975990195108211, "grad_norm": 0.12820298969745636, "learning_rate": 0.0005, "loss": 2.1181, "step": 209550 }, { "epoch": 0.7976370819789439, "grad_norm": 0.13916277885437012, "learning_rate": 0.0005, "loss": 2.1127, "step": 209560 }, { "epoch": 0.7976751444470666, "grad_norm": 0.13408006727695465, "learning_rate": 0.0005, "loss": 2.1014, "step": 209570 }, { "epoch": 0.7977132069151892, "grad_norm": 0.12541238963603973, "learning_rate": 0.0005, "loss": 2.0987, "step": 209580 }, { "epoch": 0.7977512693833119, "grad_norm": 0.11731202900409698, "learning_rate": 0.0005, "loss": 2.0898, "step": 209590 }, { "epoch": 0.7977893318514345, "grad_norm": 0.12128131836652756, "learning_rate": 0.0005, "loss": 2.1056, "step": 209600 }, { "epoch": 0.7978273943195573, "grad_norm": 0.12297343462705612, "learning_rate": 0.0005, "loss": 2.102, "step": 209610 }, { "epoch": 0.79786545678768, "grad_norm": 0.13395269215106964, "learning_rate": 0.0005, "loss": 2.0958, "step": 209620 }, { "epoch": 0.7979035192558026, "grad_norm": 0.13858111202716827, "learning_rate": 0.0005, "loss": 2.0983, "step": 209630 }, { "epoch": 0.7979415817239253, "grad_norm": 0.12004192918539047, "learning_rate": 0.0005, "loss": 2.1101, "step": 209640 }, { "epoch": 0.797979644192048, "grad_norm": 0.12386671453714371, "learning_rate": 0.0005, "loss": 2.0935, "step": 209650 }, { "epoch": 0.7980177066601707, "grad_norm": 0.13386517763137817, "learning_rate": 0.0005, "loss": 2.1033, "step": 209660 }, { "epoch": 0.7980557691282933, "grad_norm": 0.1264449954032898, "learning_rate": 0.0005, "loss": 2.113, "step": 209670 }, { "epoch": 0.798093831596416, "grad_norm": 0.1198006197810173, "learning_rate": 0.0005, "loss": 2.0796, "step": 209680 }, { "epoch": 0.7981318940645388, "grad_norm": 0.13347522914409637, "learning_rate": 0.0005, "loss": 2.1117, "step": 209690 }, { "epoch": 0.7981699565326614, "grad_norm": 0.13666976988315582, "learning_rate": 0.0005, "loss": 2.1015, "step": 209700 }, { "epoch": 0.7982080190007841, "grad_norm": 0.12404890358448029, "learning_rate": 0.0005, "loss": 2.1083, "step": 209710 }, { "epoch": 0.7982460814689067, "grad_norm": 0.1316789984703064, "learning_rate": 0.0005, "loss": 2.1106, "step": 209720 }, { "epoch": 0.7982841439370294, "grad_norm": 0.13406074047088623, "learning_rate": 0.0005, "loss": 2.0984, "step": 209730 }, { "epoch": 0.7983222064051522, "grad_norm": 0.13223187625408173, "learning_rate": 0.0005, "loss": 2.1092, "step": 209740 }, { "epoch": 0.7983602688732748, "grad_norm": 0.12085633724927902, "learning_rate": 0.0005, "loss": 2.1028, "step": 209750 }, { "epoch": 0.7983983313413975, "grad_norm": 0.12477198988199234, "learning_rate": 0.0005, "loss": 2.0986, "step": 209760 }, { "epoch": 0.7984363938095201, "grad_norm": 0.12964096665382385, "learning_rate": 0.0005, "loss": 2.1109, "step": 209770 }, { "epoch": 0.7984744562776429, "grad_norm": 0.12128846347332001, "learning_rate": 0.0005, "loss": 2.1116, "step": 209780 }, { "epoch": 0.7985125187457656, "grad_norm": 0.11454357951879501, "learning_rate": 0.0005, "loss": 2.0966, "step": 209790 }, { "epoch": 0.7985505812138882, "grad_norm": 0.12122207880020142, "learning_rate": 0.0005, "loss": 2.104, "step": 209800 }, { "epoch": 0.7985886436820109, "grad_norm": 0.12776987254619598, "learning_rate": 0.0005, "loss": 2.1053, "step": 209810 }, { "epoch": 0.7986267061501336, "grad_norm": 0.11924677342176437, "learning_rate": 0.0005, "loss": 2.1034, "step": 209820 }, { "epoch": 0.7986647686182563, "grad_norm": 0.125722736120224, "learning_rate": 0.0005, "loss": 2.1173, "step": 209830 }, { "epoch": 0.798702831086379, "grad_norm": 0.12875454127788544, "learning_rate": 0.0005, "loss": 2.1007, "step": 209840 }, { "epoch": 0.7987408935545016, "grad_norm": 0.129396453499794, "learning_rate": 0.0005, "loss": 2.1092, "step": 209850 }, { "epoch": 0.7987789560226243, "grad_norm": 0.13016273081302643, "learning_rate": 0.0005, "loss": 2.106, "step": 209860 }, { "epoch": 0.798817018490747, "grad_norm": 0.1285935938358307, "learning_rate": 0.0005, "loss": 2.1246, "step": 209870 }, { "epoch": 0.7988550809588697, "grad_norm": 0.14093440771102905, "learning_rate": 0.0005, "loss": 2.1076, "step": 209880 }, { "epoch": 0.7988931434269924, "grad_norm": 0.1263287216424942, "learning_rate": 0.0005, "loss": 2.1111, "step": 209890 }, { "epoch": 0.798931205895115, "grad_norm": 0.1366177499294281, "learning_rate": 0.0005, "loss": 2.1029, "step": 209900 }, { "epoch": 0.7989692683632378, "grad_norm": 0.11795708537101746, "learning_rate": 0.0005, "loss": 2.1055, "step": 209910 }, { "epoch": 0.7990073308313604, "grad_norm": 0.1111733689904213, "learning_rate": 0.0005, "loss": 2.0974, "step": 209920 }, { "epoch": 0.7990453932994831, "grad_norm": 0.13642236590385437, "learning_rate": 0.0005, "loss": 2.1087, "step": 209930 }, { "epoch": 0.7990834557676058, "grad_norm": 0.12918546795845032, "learning_rate": 0.0005, "loss": 2.0907, "step": 209940 }, { "epoch": 0.7991215182357285, "grad_norm": 0.1210419088602066, "learning_rate": 0.0005, "loss": 2.1084, "step": 209950 }, { "epoch": 0.7991595807038512, "grad_norm": 0.13431444764137268, "learning_rate": 0.0005, "loss": 2.1132, "step": 209960 }, { "epoch": 0.7991976431719738, "grad_norm": 0.1349552571773529, "learning_rate": 0.0005, "loss": 2.1109, "step": 209970 }, { "epoch": 0.7992357056400965, "grad_norm": 0.1264919936656952, "learning_rate": 0.0005, "loss": 2.1114, "step": 209980 }, { "epoch": 0.7992737681082193, "grad_norm": 0.12178479880094528, "learning_rate": 0.0005, "loss": 2.1214, "step": 209990 }, { "epoch": 0.7993118305763419, "grad_norm": 0.13122200965881348, "learning_rate": 0.0005, "loss": 2.0908, "step": 210000 }, { "epoch": 0.7993498930444646, "grad_norm": 0.12984102964401245, "learning_rate": 0.0005, "loss": 2.1166, "step": 210010 }, { "epoch": 0.7993879555125872, "grad_norm": 0.11454375833272934, "learning_rate": 0.0005, "loss": 2.0861, "step": 210020 }, { "epoch": 0.7994260179807099, "grad_norm": 0.1272624135017395, "learning_rate": 0.0005, "loss": 2.1162, "step": 210030 }, { "epoch": 0.7994640804488327, "grad_norm": 0.12016329169273376, "learning_rate": 0.0005, "loss": 2.097, "step": 210040 }, { "epoch": 0.7995021429169553, "grad_norm": 0.13649529218673706, "learning_rate": 0.0005, "loss": 2.0994, "step": 210050 }, { "epoch": 0.799540205385078, "grad_norm": 0.1260644644498825, "learning_rate": 0.0005, "loss": 2.1027, "step": 210060 }, { "epoch": 0.7995782678532006, "grad_norm": 0.1434025764465332, "learning_rate": 0.0005, "loss": 2.1043, "step": 210070 }, { "epoch": 0.7996163303213234, "grad_norm": 0.1485346555709839, "learning_rate": 0.0005, "loss": 2.1054, "step": 210080 }, { "epoch": 0.7996543927894461, "grad_norm": 0.12919403612613678, "learning_rate": 0.0005, "loss": 2.1137, "step": 210090 }, { "epoch": 0.7996924552575687, "grad_norm": 0.13078376650810242, "learning_rate": 0.0005, "loss": 2.1043, "step": 210100 }, { "epoch": 0.7997305177256914, "grad_norm": 0.13152313232421875, "learning_rate": 0.0005, "loss": 2.1221, "step": 210110 }, { "epoch": 0.7997685801938141, "grad_norm": 0.12501022219657898, "learning_rate": 0.0005, "loss": 2.1144, "step": 210120 }, { "epoch": 0.7998066426619368, "grad_norm": 0.12021830677986145, "learning_rate": 0.0005, "loss": 2.1188, "step": 210130 }, { "epoch": 0.7998447051300595, "grad_norm": 0.11749450862407684, "learning_rate": 0.0005, "loss": 2.1282, "step": 210140 }, { "epoch": 0.7998827675981821, "grad_norm": 0.12211523205041885, "learning_rate": 0.0005, "loss": 2.102, "step": 210150 }, { "epoch": 0.7999208300663048, "grad_norm": 0.12200842797756195, "learning_rate": 0.0005, "loss": 2.1105, "step": 210160 }, { "epoch": 0.7999588925344275, "grad_norm": 0.11807631701231003, "learning_rate": 0.0005, "loss": 2.1017, "step": 210170 }, { "epoch": 0.7999969550025502, "grad_norm": 0.11688867211341858, "learning_rate": 0.0005, "loss": 2.1063, "step": 210180 }, { "epoch": 0.8000350174706728, "grad_norm": 0.12884896993637085, "learning_rate": 0.0005, "loss": 2.104, "step": 210190 }, { "epoch": 0.8000730799387955, "grad_norm": 0.11895192414522171, "learning_rate": 0.0005, "loss": 2.1072, "step": 210200 }, { "epoch": 0.8001111424069183, "grad_norm": 0.13126152753829956, "learning_rate": 0.0005, "loss": 2.0943, "step": 210210 }, { "epoch": 0.8001492048750409, "grad_norm": 0.1251167356967926, "learning_rate": 0.0005, "loss": 2.0996, "step": 210220 }, { "epoch": 0.8001872673431636, "grad_norm": 0.11805843561887741, "learning_rate": 0.0005, "loss": 2.1167, "step": 210230 }, { "epoch": 0.8002253298112862, "grad_norm": 0.12272538989782333, "learning_rate": 0.0005, "loss": 2.103, "step": 210240 }, { "epoch": 0.800263392279409, "grad_norm": 0.1270427703857422, "learning_rate": 0.0005, "loss": 2.1023, "step": 210250 }, { "epoch": 0.8003014547475317, "grad_norm": 0.11693274229764938, "learning_rate": 0.0005, "loss": 2.0947, "step": 210260 }, { "epoch": 0.8003395172156543, "grad_norm": 0.12159300595521927, "learning_rate": 0.0005, "loss": 2.0994, "step": 210270 }, { "epoch": 0.800377579683777, "grad_norm": 0.12339331954717636, "learning_rate": 0.0005, "loss": 2.1051, "step": 210280 }, { "epoch": 0.8004156421518998, "grad_norm": 0.11767217516899109, "learning_rate": 0.0005, "loss": 2.1083, "step": 210290 }, { "epoch": 0.8004537046200224, "grad_norm": 0.12712466716766357, "learning_rate": 0.0005, "loss": 2.1133, "step": 210300 }, { "epoch": 0.8004917670881451, "grad_norm": 0.11670083552598953, "learning_rate": 0.0005, "loss": 2.1201, "step": 210310 }, { "epoch": 0.8005298295562677, "grad_norm": 0.13408468663692474, "learning_rate": 0.0005, "loss": 2.0973, "step": 210320 }, { "epoch": 0.8005678920243904, "grad_norm": 0.12165634334087372, "learning_rate": 0.0005, "loss": 2.1116, "step": 210330 }, { "epoch": 0.8006059544925131, "grad_norm": 0.14448975026607513, "learning_rate": 0.0005, "loss": 2.0914, "step": 210340 }, { "epoch": 0.8006440169606358, "grad_norm": 0.12268371880054474, "learning_rate": 0.0005, "loss": 2.1049, "step": 210350 }, { "epoch": 0.8006820794287585, "grad_norm": 0.14124977588653564, "learning_rate": 0.0005, "loss": 2.1013, "step": 210360 }, { "epoch": 0.8007201418968811, "grad_norm": 0.12242026627063751, "learning_rate": 0.0005, "loss": 2.1145, "step": 210370 }, { "epoch": 0.8007582043650039, "grad_norm": 0.1343546211719513, "learning_rate": 0.0005, "loss": 2.103, "step": 210380 }, { "epoch": 0.8007962668331265, "grad_norm": 0.13093207776546478, "learning_rate": 0.0005, "loss": 2.1014, "step": 210390 }, { "epoch": 0.8008343293012492, "grad_norm": 0.11842503398656845, "learning_rate": 0.0005, "loss": 2.1081, "step": 210400 }, { "epoch": 0.8008723917693719, "grad_norm": 0.12339796870946884, "learning_rate": 0.0005, "loss": 2.1042, "step": 210410 }, { "epoch": 0.8009104542374946, "grad_norm": 0.11186055094003677, "learning_rate": 0.0005, "loss": 2.1006, "step": 210420 }, { "epoch": 0.8009485167056173, "grad_norm": 0.12415434420108795, "learning_rate": 0.0005, "loss": 2.116, "step": 210430 }, { "epoch": 0.8009865791737399, "grad_norm": 0.121949702501297, "learning_rate": 0.0005, "loss": 2.0952, "step": 210440 }, { "epoch": 0.8010246416418626, "grad_norm": 0.14509126543998718, "learning_rate": 0.0005, "loss": 2.113, "step": 210450 }, { "epoch": 0.8010627041099853, "grad_norm": 0.12276475131511688, "learning_rate": 0.0005, "loss": 2.1043, "step": 210460 }, { "epoch": 0.801100766578108, "grad_norm": 0.12225574254989624, "learning_rate": 0.0005, "loss": 2.1159, "step": 210470 }, { "epoch": 0.8011388290462307, "grad_norm": 0.12323661148548126, "learning_rate": 0.0005, "loss": 2.1095, "step": 210480 }, { "epoch": 0.8011768915143533, "grad_norm": 0.12105714529752731, "learning_rate": 0.0005, "loss": 2.1167, "step": 210490 }, { "epoch": 0.801214953982476, "grad_norm": 0.1379852592945099, "learning_rate": 0.0005, "loss": 2.1239, "step": 210500 }, { "epoch": 0.8012530164505988, "grad_norm": 0.1205686628818512, "learning_rate": 0.0005, "loss": 2.1134, "step": 210510 }, { "epoch": 0.8012910789187214, "grad_norm": 0.1286148726940155, "learning_rate": 0.0005, "loss": 2.1162, "step": 210520 }, { "epoch": 0.8013291413868441, "grad_norm": 0.1285942792892456, "learning_rate": 0.0005, "loss": 2.0926, "step": 210530 }, { "epoch": 0.8013672038549667, "grad_norm": 0.13062357902526855, "learning_rate": 0.0005, "loss": 2.103, "step": 210540 }, { "epoch": 0.8014052663230895, "grad_norm": 0.12718930840492249, "learning_rate": 0.0005, "loss": 2.0935, "step": 210550 }, { "epoch": 0.8014433287912122, "grad_norm": 0.134733647108078, "learning_rate": 0.0005, "loss": 2.116, "step": 210560 }, { "epoch": 0.8014813912593348, "grad_norm": 0.12464946508407593, "learning_rate": 0.0005, "loss": 2.1078, "step": 210570 }, { "epoch": 0.8015194537274575, "grad_norm": 0.1174856424331665, "learning_rate": 0.0005, "loss": 2.1019, "step": 210580 }, { "epoch": 0.8015575161955801, "grad_norm": 0.12474462389945984, "learning_rate": 0.0005, "loss": 2.1162, "step": 210590 }, { "epoch": 0.8015955786637029, "grad_norm": 0.12405211478471756, "learning_rate": 0.0005, "loss": 2.1045, "step": 210600 }, { "epoch": 0.8016336411318256, "grad_norm": 0.13704289495944977, "learning_rate": 0.0005, "loss": 2.1104, "step": 210610 }, { "epoch": 0.8016717035999482, "grad_norm": 0.1354646533727646, "learning_rate": 0.0005, "loss": 2.0865, "step": 210620 }, { "epoch": 0.8017097660680709, "grad_norm": 0.12755626440048218, "learning_rate": 0.0005, "loss": 2.1062, "step": 210630 }, { "epoch": 0.8017478285361936, "grad_norm": 0.12112542241811752, "learning_rate": 0.0005, "loss": 2.0911, "step": 210640 }, { "epoch": 0.8017858910043163, "grad_norm": 0.12758193910121918, "learning_rate": 0.0005, "loss": 2.1164, "step": 210650 }, { "epoch": 0.801823953472439, "grad_norm": 0.11487865447998047, "learning_rate": 0.0005, "loss": 2.1063, "step": 210660 }, { "epoch": 0.8018620159405616, "grad_norm": 0.1294761449098587, "learning_rate": 0.0005, "loss": 2.1169, "step": 210670 }, { "epoch": 0.8019000784086844, "grad_norm": 0.11908960342407227, "learning_rate": 0.0005, "loss": 2.0932, "step": 210680 }, { "epoch": 0.801938140876807, "grad_norm": 0.12050192803144455, "learning_rate": 0.0005, "loss": 2.1161, "step": 210690 }, { "epoch": 0.8019762033449297, "grad_norm": 0.13563457131385803, "learning_rate": 0.0005, "loss": 2.1191, "step": 210700 }, { "epoch": 0.8020142658130524, "grad_norm": 0.19248554110527039, "learning_rate": 0.0005, "loss": 2.1101, "step": 210710 }, { "epoch": 0.8020523282811751, "grad_norm": 0.11567103862762451, "learning_rate": 0.0005, "loss": 2.1071, "step": 210720 }, { "epoch": 0.8020903907492978, "grad_norm": 0.13898178935050964, "learning_rate": 0.0005, "loss": 2.1044, "step": 210730 }, { "epoch": 0.8021284532174204, "grad_norm": 0.1283826380968094, "learning_rate": 0.0005, "loss": 2.1073, "step": 210740 }, { "epoch": 0.8021665156855431, "grad_norm": 0.12306949496269226, "learning_rate": 0.0005, "loss": 2.1063, "step": 210750 }, { "epoch": 0.8022045781536657, "grad_norm": 0.13112527132034302, "learning_rate": 0.0005, "loss": 2.1024, "step": 210760 }, { "epoch": 0.8022426406217885, "grad_norm": 0.11857830733060837, "learning_rate": 0.0005, "loss": 2.1033, "step": 210770 }, { "epoch": 0.8022807030899112, "grad_norm": 0.12262977659702301, "learning_rate": 0.0005, "loss": 2.1182, "step": 210780 }, { "epoch": 0.8023187655580338, "grad_norm": 0.14496393501758575, "learning_rate": 0.0005, "loss": 2.0992, "step": 210790 }, { "epoch": 0.8023568280261565, "grad_norm": 0.12095699459314346, "learning_rate": 0.0005, "loss": 2.1042, "step": 210800 }, { "epoch": 0.8023948904942793, "grad_norm": 0.12212936580181122, "learning_rate": 0.0005, "loss": 2.1127, "step": 210810 }, { "epoch": 0.8024329529624019, "grad_norm": 0.1293233036994934, "learning_rate": 0.0005, "loss": 2.0964, "step": 210820 }, { "epoch": 0.8024710154305246, "grad_norm": 0.12779028713703156, "learning_rate": 0.0005, "loss": 2.1036, "step": 210830 }, { "epoch": 0.8025090778986472, "grad_norm": 0.12210148572921753, "learning_rate": 0.0005, "loss": 2.1072, "step": 210840 }, { "epoch": 0.80254714036677, "grad_norm": 0.13600996136665344, "learning_rate": 0.0005, "loss": 2.1059, "step": 210850 }, { "epoch": 0.8025852028348927, "grad_norm": 0.11877809464931488, "learning_rate": 0.0005, "loss": 2.1009, "step": 210860 }, { "epoch": 0.8026232653030153, "grad_norm": 0.1277652084827423, "learning_rate": 0.0005, "loss": 2.1206, "step": 210870 }, { "epoch": 0.802661327771138, "grad_norm": 0.12291332334280014, "learning_rate": 0.0005, "loss": 2.0952, "step": 210880 }, { "epoch": 0.8026993902392606, "grad_norm": 0.12449786812067032, "learning_rate": 0.0005, "loss": 2.1117, "step": 210890 }, { "epoch": 0.8027374527073834, "grad_norm": 0.11884238570928574, "learning_rate": 0.0005, "loss": 2.1025, "step": 210900 }, { "epoch": 0.802775515175506, "grad_norm": 0.1160527914762497, "learning_rate": 0.0005, "loss": 2.1083, "step": 210910 }, { "epoch": 0.8028135776436287, "grad_norm": 0.11842243373394012, "learning_rate": 0.0005, "loss": 2.0906, "step": 210920 }, { "epoch": 0.8028516401117514, "grad_norm": 0.13278530538082123, "learning_rate": 0.0005, "loss": 2.101, "step": 210930 }, { "epoch": 0.8028897025798741, "grad_norm": 0.13262298703193665, "learning_rate": 0.0005, "loss": 2.1057, "step": 210940 }, { "epoch": 0.8029277650479968, "grad_norm": 0.1319943219423294, "learning_rate": 0.0005, "loss": 2.0988, "step": 210950 }, { "epoch": 0.8029658275161194, "grad_norm": 0.12588515877723694, "learning_rate": 0.0005, "loss": 2.089, "step": 210960 }, { "epoch": 0.8030038899842421, "grad_norm": 0.1147276908159256, "learning_rate": 0.0005, "loss": 2.0954, "step": 210970 }, { "epoch": 0.8030419524523649, "grad_norm": 0.11792852729558945, "learning_rate": 0.0005, "loss": 2.1047, "step": 210980 }, { "epoch": 0.8030800149204875, "grad_norm": 0.12042088061571121, "learning_rate": 0.0005, "loss": 2.0852, "step": 210990 }, { "epoch": 0.8031180773886102, "grad_norm": 0.11919981986284256, "learning_rate": 0.0005, "loss": 2.1062, "step": 211000 }, { "epoch": 0.8031561398567328, "grad_norm": 0.13294468820095062, "learning_rate": 0.0005, "loss": 2.1001, "step": 211010 }, { "epoch": 0.8031942023248555, "grad_norm": 0.13578881323337555, "learning_rate": 0.0005, "loss": 2.1182, "step": 211020 }, { "epoch": 0.8032322647929783, "grad_norm": 0.13631047308444977, "learning_rate": 0.0005, "loss": 2.119, "step": 211030 }, { "epoch": 0.8032703272611009, "grad_norm": 0.12270741909742355, "learning_rate": 0.0005, "loss": 2.1056, "step": 211040 }, { "epoch": 0.8033083897292236, "grad_norm": 0.14174945652484894, "learning_rate": 0.0005, "loss": 2.0858, "step": 211050 }, { "epoch": 0.8033464521973462, "grad_norm": 0.12768588960170746, "learning_rate": 0.0005, "loss": 2.0999, "step": 211060 }, { "epoch": 0.803384514665469, "grad_norm": 0.1305711418390274, "learning_rate": 0.0005, "loss": 2.1247, "step": 211070 }, { "epoch": 0.8034225771335917, "grad_norm": 0.1230345070362091, "learning_rate": 0.0005, "loss": 2.1189, "step": 211080 }, { "epoch": 0.8034606396017143, "grad_norm": 0.12930433452129364, "learning_rate": 0.0005, "loss": 2.0919, "step": 211090 }, { "epoch": 0.803498702069837, "grad_norm": 0.14357741177082062, "learning_rate": 0.0005, "loss": 2.1062, "step": 211100 }, { "epoch": 0.8035367645379597, "grad_norm": 0.12526747584342957, "learning_rate": 0.0005, "loss": 2.1065, "step": 211110 }, { "epoch": 0.8035748270060824, "grad_norm": 0.15204723179340363, "learning_rate": 0.0005, "loss": 2.1067, "step": 211120 }, { "epoch": 0.8036128894742051, "grad_norm": 0.13927868008613586, "learning_rate": 0.0005, "loss": 2.0982, "step": 211130 }, { "epoch": 0.8036509519423277, "grad_norm": 0.12264429032802582, "learning_rate": 0.0005, "loss": 2.1013, "step": 211140 }, { "epoch": 0.8036890144104505, "grad_norm": 0.12423133850097656, "learning_rate": 0.0005, "loss": 2.1091, "step": 211150 }, { "epoch": 0.8037270768785731, "grad_norm": 0.1225658655166626, "learning_rate": 0.0005, "loss": 2.1022, "step": 211160 }, { "epoch": 0.8037651393466958, "grad_norm": 0.12389955669641495, "learning_rate": 0.0005, "loss": 2.0979, "step": 211170 }, { "epoch": 0.8038032018148185, "grad_norm": 0.1286647617816925, "learning_rate": 0.0005, "loss": 2.1083, "step": 211180 }, { "epoch": 0.8038412642829411, "grad_norm": 0.12770266830921173, "learning_rate": 0.0005, "loss": 2.1053, "step": 211190 }, { "epoch": 0.8038793267510639, "grad_norm": 0.1357760727405548, "learning_rate": 0.0005, "loss": 2.1101, "step": 211200 }, { "epoch": 0.8039173892191865, "grad_norm": 0.12599506974220276, "learning_rate": 0.0005, "loss": 2.1189, "step": 211210 }, { "epoch": 0.8039554516873092, "grad_norm": 0.11944464594125748, "learning_rate": 0.0005, "loss": 2.1171, "step": 211220 }, { "epoch": 0.8039935141554319, "grad_norm": 0.12599098682403564, "learning_rate": 0.0005, "loss": 2.1205, "step": 211230 }, { "epoch": 0.8040315766235546, "grad_norm": 0.13443703949451447, "learning_rate": 0.0005, "loss": 2.0954, "step": 211240 }, { "epoch": 0.8040696390916773, "grad_norm": 0.138460174202919, "learning_rate": 0.0005, "loss": 2.1104, "step": 211250 }, { "epoch": 0.8041077015597999, "grad_norm": 0.12518736720085144, "learning_rate": 0.0005, "loss": 2.1053, "step": 211260 }, { "epoch": 0.8041457640279226, "grad_norm": 0.12137263268232346, "learning_rate": 0.0005, "loss": 2.1038, "step": 211270 }, { "epoch": 0.8041838264960454, "grad_norm": 0.12049419432878494, "learning_rate": 0.0005, "loss": 2.09, "step": 211280 }, { "epoch": 0.804221888964168, "grad_norm": 0.12654536962509155, "learning_rate": 0.0005, "loss": 2.1023, "step": 211290 }, { "epoch": 0.8042599514322907, "grad_norm": 0.12805117666721344, "learning_rate": 0.0005, "loss": 2.1004, "step": 211300 }, { "epoch": 0.8042980139004133, "grad_norm": 0.12165186554193497, "learning_rate": 0.0005, "loss": 2.1034, "step": 211310 }, { "epoch": 0.804336076368536, "grad_norm": 0.13391226530075073, "learning_rate": 0.0005, "loss": 2.1087, "step": 211320 }, { "epoch": 0.8043741388366588, "grad_norm": 0.12267400324344635, "learning_rate": 0.0005, "loss": 2.1112, "step": 211330 }, { "epoch": 0.8044122013047814, "grad_norm": 0.123236745595932, "learning_rate": 0.0005, "loss": 2.1023, "step": 211340 }, { "epoch": 0.8044502637729041, "grad_norm": 0.12270749360322952, "learning_rate": 0.0005, "loss": 2.1041, "step": 211350 }, { "epoch": 0.8044883262410267, "grad_norm": 0.11430325359106064, "learning_rate": 0.0005, "loss": 2.1104, "step": 211360 }, { "epoch": 0.8045263887091495, "grad_norm": 0.12952934205532074, "learning_rate": 0.0005, "loss": 2.1192, "step": 211370 }, { "epoch": 0.8045644511772722, "grad_norm": 0.12921790778636932, "learning_rate": 0.0005, "loss": 2.1252, "step": 211380 }, { "epoch": 0.8046025136453948, "grad_norm": 0.13494722545146942, "learning_rate": 0.0005, "loss": 2.1035, "step": 211390 }, { "epoch": 0.8046405761135175, "grad_norm": 0.13030272722244263, "learning_rate": 0.0005, "loss": 2.1133, "step": 211400 }, { "epoch": 0.8046786385816402, "grad_norm": 0.1260061413049698, "learning_rate": 0.0005, "loss": 2.1153, "step": 211410 }, { "epoch": 0.8047167010497629, "grad_norm": 0.1300874650478363, "learning_rate": 0.0005, "loss": 2.0976, "step": 211420 }, { "epoch": 0.8047547635178856, "grad_norm": 0.11533726006746292, "learning_rate": 0.0005, "loss": 2.1045, "step": 211430 }, { "epoch": 0.8047928259860082, "grad_norm": 0.12814481556415558, "learning_rate": 0.0005, "loss": 2.1037, "step": 211440 }, { "epoch": 0.8048308884541309, "grad_norm": 0.11733753979206085, "learning_rate": 0.0005, "loss": 2.1153, "step": 211450 }, { "epoch": 0.8048689509222536, "grad_norm": 0.14942340552806854, "learning_rate": 0.0005, "loss": 2.0851, "step": 211460 }, { "epoch": 0.8049070133903763, "grad_norm": 0.11945247650146484, "learning_rate": 0.0005, "loss": 2.1026, "step": 211470 }, { "epoch": 0.804945075858499, "grad_norm": 0.13306763768196106, "learning_rate": 0.0005, "loss": 2.1101, "step": 211480 }, { "epoch": 0.8049831383266216, "grad_norm": 0.12273822724819183, "learning_rate": 0.0005, "loss": 2.1039, "step": 211490 }, { "epoch": 0.8050212007947444, "grad_norm": 0.1341172754764557, "learning_rate": 0.0005, "loss": 2.0872, "step": 211500 }, { "epoch": 0.805059263262867, "grad_norm": 0.118862085044384, "learning_rate": 0.0005, "loss": 2.1011, "step": 211510 }, { "epoch": 0.8050973257309897, "grad_norm": 0.1313682496547699, "learning_rate": 0.0005, "loss": 2.0974, "step": 211520 }, { "epoch": 0.8051353881991123, "grad_norm": 0.11379699409008026, "learning_rate": 0.0005, "loss": 2.102, "step": 211530 }, { "epoch": 0.8051734506672351, "grad_norm": 0.12442870438098907, "learning_rate": 0.0005, "loss": 2.1095, "step": 211540 }, { "epoch": 0.8052115131353578, "grad_norm": 0.1156822144985199, "learning_rate": 0.0005, "loss": 2.1092, "step": 211550 }, { "epoch": 0.8052495756034804, "grad_norm": 0.12452563643455505, "learning_rate": 0.0005, "loss": 2.1161, "step": 211560 }, { "epoch": 0.8052876380716031, "grad_norm": 0.13099755346775055, "learning_rate": 0.0005, "loss": 2.1256, "step": 211570 }, { "epoch": 0.8053257005397259, "grad_norm": 0.1255125254392624, "learning_rate": 0.0005, "loss": 2.1049, "step": 211580 }, { "epoch": 0.8053637630078485, "grad_norm": 0.12978138029575348, "learning_rate": 0.0005, "loss": 2.124, "step": 211590 }, { "epoch": 0.8054018254759712, "grad_norm": 0.1305938959121704, "learning_rate": 0.0005, "loss": 2.1147, "step": 211600 }, { "epoch": 0.8054398879440938, "grad_norm": 0.11762437969446182, "learning_rate": 0.0005, "loss": 2.0957, "step": 211610 }, { "epoch": 0.8054779504122165, "grad_norm": 0.13285085558891296, "learning_rate": 0.0005, "loss": 2.1017, "step": 211620 }, { "epoch": 0.8055160128803392, "grad_norm": 0.11013451218605042, "learning_rate": 0.0005, "loss": 2.1063, "step": 211630 }, { "epoch": 0.8055540753484619, "grad_norm": 0.11815615743398666, "learning_rate": 0.0005, "loss": 2.0987, "step": 211640 }, { "epoch": 0.8055921378165846, "grad_norm": 0.12628640234470367, "learning_rate": 0.0005, "loss": 2.0902, "step": 211650 }, { "epoch": 0.8056302002847072, "grad_norm": 0.12378709763288498, "learning_rate": 0.0005, "loss": 2.1071, "step": 211660 }, { "epoch": 0.80566826275283, "grad_norm": 0.13026103377342224, "learning_rate": 0.0005, "loss": 2.11, "step": 211670 }, { "epoch": 0.8057063252209526, "grad_norm": 0.13067007064819336, "learning_rate": 0.0005, "loss": 2.1156, "step": 211680 }, { "epoch": 0.8057443876890753, "grad_norm": 0.13219596445560455, "learning_rate": 0.0005, "loss": 2.1157, "step": 211690 }, { "epoch": 0.805782450157198, "grad_norm": 0.12373536080121994, "learning_rate": 0.0005, "loss": 2.1077, "step": 211700 }, { "epoch": 0.8058205126253207, "grad_norm": 0.12243331968784332, "learning_rate": 0.0005, "loss": 2.1111, "step": 211710 }, { "epoch": 0.8058585750934434, "grad_norm": 0.12543143332004547, "learning_rate": 0.0005, "loss": 2.1066, "step": 211720 }, { "epoch": 0.805896637561566, "grad_norm": 0.12280245870351791, "learning_rate": 0.0005, "loss": 2.1015, "step": 211730 }, { "epoch": 0.8059347000296887, "grad_norm": 0.12935133278369904, "learning_rate": 0.0005, "loss": 2.1081, "step": 211740 }, { "epoch": 0.8059727624978114, "grad_norm": 0.12362467497587204, "learning_rate": 0.0005, "loss": 2.1012, "step": 211750 }, { "epoch": 0.8060108249659341, "grad_norm": 0.12046709656715393, "learning_rate": 0.0005, "loss": 2.0875, "step": 211760 }, { "epoch": 0.8060488874340568, "grad_norm": 0.12583065032958984, "learning_rate": 0.0005, "loss": 2.0924, "step": 211770 }, { "epoch": 0.8060869499021794, "grad_norm": 0.13486357033252716, "learning_rate": 0.0005, "loss": 2.1171, "step": 211780 }, { "epoch": 0.8061250123703021, "grad_norm": 0.11814618855714798, "learning_rate": 0.0005, "loss": 2.0969, "step": 211790 }, { "epoch": 0.8061630748384249, "grad_norm": 0.12391425669193268, "learning_rate": 0.0005, "loss": 2.0947, "step": 211800 }, { "epoch": 0.8062011373065475, "grad_norm": 0.12215106189250946, "learning_rate": 0.0005, "loss": 2.1033, "step": 211810 }, { "epoch": 0.8062391997746702, "grad_norm": 0.13146349787712097, "learning_rate": 0.0005, "loss": 2.1121, "step": 211820 }, { "epoch": 0.8062772622427928, "grad_norm": 0.12323244661092758, "learning_rate": 0.0005, "loss": 2.1081, "step": 211830 }, { "epoch": 0.8063153247109156, "grad_norm": 0.11947532743215561, "learning_rate": 0.0005, "loss": 2.097, "step": 211840 }, { "epoch": 0.8063533871790383, "grad_norm": 0.13553132116794586, "learning_rate": 0.0005, "loss": 2.1048, "step": 211850 }, { "epoch": 0.8063914496471609, "grad_norm": 0.1189856007695198, "learning_rate": 0.0005, "loss": 2.097, "step": 211860 }, { "epoch": 0.8064295121152836, "grad_norm": 0.12500151991844177, "learning_rate": 0.0005, "loss": 2.0889, "step": 211870 }, { "epoch": 0.8064675745834062, "grad_norm": 0.11782065778970718, "learning_rate": 0.0005, "loss": 2.1116, "step": 211880 }, { "epoch": 0.806505637051529, "grad_norm": 0.1276840716600418, "learning_rate": 0.0005, "loss": 2.1048, "step": 211890 }, { "epoch": 0.8065436995196517, "grad_norm": 0.13151061534881592, "learning_rate": 0.0005, "loss": 2.1126, "step": 211900 }, { "epoch": 0.8065817619877743, "grad_norm": 0.12591581046581268, "learning_rate": 0.0005, "loss": 2.1025, "step": 211910 }, { "epoch": 0.806619824455897, "grad_norm": 0.13186302781105042, "learning_rate": 0.0005, "loss": 2.1061, "step": 211920 }, { "epoch": 0.8066578869240197, "grad_norm": 0.1300540715456009, "learning_rate": 0.0005, "loss": 2.118, "step": 211930 }, { "epoch": 0.8066959493921424, "grad_norm": 0.12366016209125519, "learning_rate": 0.0005, "loss": 2.1054, "step": 211940 }, { "epoch": 0.806734011860265, "grad_norm": 0.13649655878543854, "learning_rate": 0.0005, "loss": 2.0949, "step": 211950 }, { "epoch": 0.8067720743283877, "grad_norm": 0.12333094328641891, "learning_rate": 0.0005, "loss": 2.0925, "step": 211960 }, { "epoch": 0.8068101367965105, "grad_norm": 0.12637990713119507, "learning_rate": 0.0005, "loss": 2.1144, "step": 211970 }, { "epoch": 0.8068481992646331, "grad_norm": 0.123201884329319, "learning_rate": 0.0005, "loss": 2.1, "step": 211980 }, { "epoch": 0.8068862617327558, "grad_norm": 0.14880193769931793, "learning_rate": 0.0005, "loss": 2.124, "step": 211990 }, { "epoch": 0.8069243242008785, "grad_norm": 0.11887034773826599, "learning_rate": 0.0005, "loss": 2.1108, "step": 212000 }, { "epoch": 0.8069623866690012, "grad_norm": 0.11663667112588882, "learning_rate": 0.0005, "loss": 2.1042, "step": 212010 }, { "epoch": 0.8070004491371239, "grad_norm": 0.1256750077009201, "learning_rate": 0.0005, "loss": 2.1058, "step": 212020 }, { "epoch": 0.8070385116052465, "grad_norm": 0.12154050171375275, "learning_rate": 0.0005, "loss": 2.0944, "step": 212030 }, { "epoch": 0.8070765740733692, "grad_norm": 0.12251798063516617, "learning_rate": 0.0005, "loss": 2.1007, "step": 212040 }, { "epoch": 0.8071146365414918, "grad_norm": 0.12491164356470108, "learning_rate": 0.0005, "loss": 2.097, "step": 212050 }, { "epoch": 0.8071526990096146, "grad_norm": 0.11804839223623276, "learning_rate": 0.0005, "loss": 2.0889, "step": 212060 }, { "epoch": 0.8071907614777373, "grad_norm": 0.11745022982358932, "learning_rate": 0.0005, "loss": 2.1143, "step": 212070 }, { "epoch": 0.8072288239458599, "grad_norm": 0.12897460162639618, "learning_rate": 0.0005, "loss": 2.1169, "step": 212080 }, { "epoch": 0.8072668864139826, "grad_norm": 0.12131928652524948, "learning_rate": 0.0005, "loss": 2.0943, "step": 212090 }, { "epoch": 0.8073049488821054, "grad_norm": 0.12376783043146133, "learning_rate": 0.0005, "loss": 2.1104, "step": 212100 }, { "epoch": 0.807343011350228, "grad_norm": 0.11986812204122543, "learning_rate": 0.0005, "loss": 2.1006, "step": 212110 }, { "epoch": 0.8073810738183507, "grad_norm": 0.1264466643333435, "learning_rate": 0.0005, "loss": 2.0998, "step": 212120 }, { "epoch": 0.8074191362864733, "grad_norm": 0.11758770048618317, "learning_rate": 0.0005, "loss": 2.1194, "step": 212130 }, { "epoch": 0.8074571987545961, "grad_norm": 0.12155801057815552, "learning_rate": 0.0005, "loss": 2.0934, "step": 212140 }, { "epoch": 0.8074952612227188, "grad_norm": 0.13400864601135254, "learning_rate": 0.0005, "loss": 2.1064, "step": 212150 }, { "epoch": 0.8075333236908414, "grad_norm": 0.14201660454273224, "learning_rate": 0.0005, "loss": 2.1192, "step": 212160 }, { "epoch": 0.8075713861589641, "grad_norm": 0.12833969295024872, "learning_rate": 0.0005, "loss": 2.1119, "step": 212170 }, { "epoch": 0.8076094486270867, "grad_norm": 0.12091749161481857, "learning_rate": 0.0005, "loss": 2.0984, "step": 212180 }, { "epoch": 0.8076475110952095, "grad_norm": 0.12184452265501022, "learning_rate": 0.0005, "loss": 2.097, "step": 212190 }, { "epoch": 0.8076855735633321, "grad_norm": 0.12771421670913696, "learning_rate": 0.0005, "loss": 2.105, "step": 212200 }, { "epoch": 0.8077236360314548, "grad_norm": 0.12905149161815643, "learning_rate": 0.0005, "loss": 2.0958, "step": 212210 }, { "epoch": 0.8077616984995775, "grad_norm": 0.11964689195156097, "learning_rate": 0.0005, "loss": 2.1096, "step": 212220 }, { "epoch": 0.8077997609677002, "grad_norm": 0.12076413631439209, "learning_rate": 0.0005, "loss": 2.1105, "step": 212230 }, { "epoch": 0.8078378234358229, "grad_norm": 0.12492384761571884, "learning_rate": 0.0005, "loss": 2.1182, "step": 212240 }, { "epoch": 0.8078758859039455, "grad_norm": 0.1287720799446106, "learning_rate": 0.0005, "loss": 2.0967, "step": 212250 }, { "epoch": 0.8079139483720682, "grad_norm": 0.12003912776708603, "learning_rate": 0.0005, "loss": 2.0943, "step": 212260 }, { "epoch": 0.807952010840191, "grad_norm": 0.12055928260087967, "learning_rate": 0.0005, "loss": 2.1098, "step": 212270 }, { "epoch": 0.8079900733083136, "grad_norm": 0.12833811342716217, "learning_rate": 0.0005, "loss": 2.0976, "step": 212280 }, { "epoch": 0.8080281357764363, "grad_norm": 0.13042432069778442, "learning_rate": 0.0005, "loss": 2.0982, "step": 212290 }, { "epoch": 0.8080661982445589, "grad_norm": 0.13135427236557007, "learning_rate": 0.0005, "loss": 2.1157, "step": 212300 }, { "epoch": 0.8081042607126816, "grad_norm": 0.13378044962882996, "learning_rate": 0.0005, "loss": 2.0999, "step": 212310 }, { "epoch": 0.8081423231808044, "grad_norm": 0.13855749368667603, "learning_rate": 0.0005, "loss": 2.1077, "step": 212320 }, { "epoch": 0.808180385648927, "grad_norm": 0.1329144835472107, "learning_rate": 0.0005, "loss": 2.0941, "step": 212330 }, { "epoch": 0.8082184481170497, "grad_norm": 0.11958435922861099, "learning_rate": 0.0005, "loss": 2.1136, "step": 212340 }, { "epoch": 0.8082565105851723, "grad_norm": 0.12032509595155716, "learning_rate": 0.0005, "loss": 2.1001, "step": 212350 }, { "epoch": 0.8082945730532951, "grad_norm": 0.12137807160615921, "learning_rate": 0.0005, "loss": 2.1111, "step": 212360 }, { "epoch": 0.8083326355214178, "grad_norm": 0.11983451247215271, "learning_rate": 0.0005, "loss": 2.0996, "step": 212370 }, { "epoch": 0.8083706979895404, "grad_norm": 0.1348404586315155, "learning_rate": 0.0005, "loss": 2.1061, "step": 212380 }, { "epoch": 0.8084087604576631, "grad_norm": 0.12229456007480621, "learning_rate": 0.0005, "loss": 2.1111, "step": 212390 }, { "epoch": 0.8084468229257858, "grad_norm": 0.13219401240348816, "learning_rate": 0.0005, "loss": 2.0988, "step": 212400 }, { "epoch": 0.8084848853939085, "grad_norm": 0.12037495523691177, "learning_rate": 0.0005, "loss": 2.1001, "step": 212410 }, { "epoch": 0.8085229478620312, "grad_norm": 0.1404789388179779, "learning_rate": 0.0005, "loss": 2.1054, "step": 212420 }, { "epoch": 0.8085610103301538, "grad_norm": 0.12103266268968582, "learning_rate": 0.0005, "loss": 2.0872, "step": 212430 }, { "epoch": 0.8085990727982766, "grad_norm": 0.1356445997953415, "learning_rate": 0.0005, "loss": 2.1078, "step": 212440 }, { "epoch": 0.8086371352663992, "grad_norm": 0.14060430228710175, "learning_rate": 0.0005, "loss": 2.1022, "step": 212450 }, { "epoch": 0.8086751977345219, "grad_norm": 0.12964075803756714, "learning_rate": 0.0005, "loss": 2.1045, "step": 212460 }, { "epoch": 0.8087132602026446, "grad_norm": 0.12052503228187561, "learning_rate": 0.0005, "loss": 2.1291, "step": 212470 }, { "epoch": 0.8087513226707672, "grad_norm": 0.13579916954040527, "learning_rate": 0.0005, "loss": 2.1044, "step": 212480 }, { "epoch": 0.80878938513889, "grad_norm": 0.12856526672840118, "learning_rate": 0.0005, "loss": 2.1074, "step": 212490 }, { "epoch": 0.8088274476070126, "grad_norm": 0.12353648245334625, "learning_rate": 0.0005, "loss": 2.0864, "step": 212500 }, { "epoch": 0.8088655100751353, "grad_norm": 0.128973588347435, "learning_rate": 0.0005, "loss": 2.1001, "step": 212510 }, { "epoch": 0.808903572543258, "grad_norm": 0.12027554214000702, "learning_rate": 0.0005, "loss": 2.1219, "step": 212520 }, { "epoch": 0.8089416350113807, "grad_norm": 0.1417223960161209, "learning_rate": 0.0005, "loss": 2.1031, "step": 212530 }, { "epoch": 0.8089796974795034, "grad_norm": 0.11463826149702072, "learning_rate": 0.0005, "loss": 2.0996, "step": 212540 }, { "epoch": 0.809017759947626, "grad_norm": 0.11450222134590149, "learning_rate": 0.0005, "loss": 2.1003, "step": 212550 }, { "epoch": 0.8090558224157487, "grad_norm": 0.11749234795570374, "learning_rate": 0.0005, "loss": 2.0984, "step": 212560 }, { "epoch": 0.8090938848838715, "grad_norm": 0.12792567908763885, "learning_rate": 0.0005, "loss": 2.0904, "step": 212570 }, { "epoch": 0.8091319473519941, "grad_norm": 0.12287959456443787, "learning_rate": 0.0005, "loss": 2.1119, "step": 212580 }, { "epoch": 0.8091700098201168, "grad_norm": 0.12542614340782166, "learning_rate": 0.0005, "loss": 2.1048, "step": 212590 }, { "epoch": 0.8092080722882394, "grad_norm": 0.12899567186832428, "learning_rate": 0.0005, "loss": 2.0897, "step": 212600 }, { "epoch": 0.8092461347563621, "grad_norm": 0.12323947995901108, "learning_rate": 0.0005, "loss": 2.0954, "step": 212610 }, { "epoch": 0.8092841972244849, "grad_norm": 0.13235338032245636, "learning_rate": 0.0005, "loss": 2.1079, "step": 212620 }, { "epoch": 0.8093222596926075, "grad_norm": 0.12860196828842163, "learning_rate": 0.0005, "loss": 2.101, "step": 212630 }, { "epoch": 0.8093603221607302, "grad_norm": 0.13267847895622253, "learning_rate": 0.0005, "loss": 2.0847, "step": 212640 }, { "epoch": 0.8093983846288528, "grad_norm": 0.14531466364860535, "learning_rate": 0.0005, "loss": 2.0922, "step": 212650 }, { "epoch": 0.8094364470969756, "grad_norm": 0.12676188349723816, "learning_rate": 0.0005, "loss": 2.1126, "step": 212660 }, { "epoch": 0.8094745095650983, "grad_norm": 0.11910112202167511, "learning_rate": 0.0005, "loss": 2.0994, "step": 212670 }, { "epoch": 0.8095125720332209, "grad_norm": 0.12504889070987701, "learning_rate": 0.0005, "loss": 2.1114, "step": 212680 }, { "epoch": 0.8095506345013436, "grad_norm": 0.12771740555763245, "learning_rate": 0.0005, "loss": 2.1158, "step": 212690 }, { "epoch": 0.8095886969694663, "grad_norm": 0.12403963506221771, "learning_rate": 0.0005, "loss": 2.1316, "step": 212700 }, { "epoch": 0.809626759437589, "grad_norm": 0.11381793767213821, "learning_rate": 0.0005, "loss": 2.1117, "step": 212710 }, { "epoch": 0.8096648219057117, "grad_norm": 0.13926367461681366, "learning_rate": 0.0005, "loss": 2.1178, "step": 212720 }, { "epoch": 0.8097028843738343, "grad_norm": 0.14142242074012756, "learning_rate": 0.0005, "loss": 2.1118, "step": 212730 }, { "epoch": 0.809740946841957, "grad_norm": 0.12207679450511932, "learning_rate": 0.0005, "loss": 2.0933, "step": 212740 }, { "epoch": 0.8097790093100797, "grad_norm": 0.11419576406478882, "learning_rate": 0.0005, "loss": 2.0969, "step": 212750 }, { "epoch": 0.8098170717782024, "grad_norm": 0.11438912153244019, "learning_rate": 0.0005, "loss": 2.107, "step": 212760 }, { "epoch": 0.809855134246325, "grad_norm": 0.12209003418684006, "learning_rate": 0.0005, "loss": 2.0927, "step": 212770 }, { "epoch": 0.8098931967144477, "grad_norm": 0.1303257942199707, "learning_rate": 0.0005, "loss": 2.1026, "step": 212780 }, { "epoch": 0.8099312591825705, "grad_norm": 0.12480229139328003, "learning_rate": 0.0005, "loss": 2.1083, "step": 212790 }, { "epoch": 0.8099693216506931, "grad_norm": 0.13041506707668304, "learning_rate": 0.0005, "loss": 2.1208, "step": 212800 }, { "epoch": 0.8100073841188158, "grad_norm": 0.1102275401353836, "learning_rate": 0.0005, "loss": 2.0972, "step": 212810 }, { "epoch": 0.8100454465869384, "grad_norm": 0.11377348005771637, "learning_rate": 0.0005, "loss": 2.1084, "step": 212820 }, { "epoch": 0.8100835090550612, "grad_norm": 0.13421009480953217, "learning_rate": 0.0005, "loss": 2.0827, "step": 212830 }, { "epoch": 0.8101215715231839, "grad_norm": 0.13049618899822235, "learning_rate": 0.0005, "loss": 2.098, "step": 212840 }, { "epoch": 0.8101596339913065, "grad_norm": 0.1221061423420906, "learning_rate": 0.0005, "loss": 2.1186, "step": 212850 }, { "epoch": 0.8101976964594292, "grad_norm": 0.13240858912467957, "learning_rate": 0.0005, "loss": 2.0877, "step": 212860 }, { "epoch": 0.810235758927552, "grad_norm": 0.1204882338643074, "learning_rate": 0.0005, "loss": 2.1004, "step": 212870 }, { "epoch": 0.8102738213956746, "grad_norm": 0.14405842125415802, "learning_rate": 0.0005, "loss": 2.1112, "step": 212880 }, { "epoch": 0.8103118838637973, "grad_norm": 0.12688469886779785, "learning_rate": 0.0005, "loss": 2.0946, "step": 212890 }, { "epoch": 0.8103499463319199, "grad_norm": 0.12993687391281128, "learning_rate": 0.0005, "loss": 2.1004, "step": 212900 }, { "epoch": 0.8103880088000426, "grad_norm": 0.12682536244392395, "learning_rate": 0.0005, "loss": 2.0976, "step": 212910 }, { "epoch": 0.8104260712681653, "grad_norm": 0.12129734456539154, "learning_rate": 0.0005, "loss": 2.1171, "step": 212920 }, { "epoch": 0.810464133736288, "grad_norm": 0.11905720084905624, "learning_rate": 0.0005, "loss": 2.1055, "step": 212930 }, { "epoch": 0.8105021962044107, "grad_norm": 0.1375799924135208, "learning_rate": 0.0005, "loss": 2.1071, "step": 212940 }, { "epoch": 0.8105402586725333, "grad_norm": 0.13128814101219177, "learning_rate": 0.0005, "loss": 2.0914, "step": 212950 }, { "epoch": 0.8105783211406561, "grad_norm": 0.12356577068567276, "learning_rate": 0.0005, "loss": 2.0964, "step": 212960 }, { "epoch": 0.8106163836087787, "grad_norm": 0.12033846974372864, "learning_rate": 0.0005, "loss": 2.1168, "step": 212970 }, { "epoch": 0.8106544460769014, "grad_norm": 0.12359870970249176, "learning_rate": 0.0005, "loss": 2.0967, "step": 212980 }, { "epoch": 0.8106925085450241, "grad_norm": 0.12279484421014786, "learning_rate": 0.0005, "loss": 2.1203, "step": 212990 }, { "epoch": 0.8107305710131468, "grad_norm": 0.11939160525798798, "learning_rate": 0.0005, "loss": 2.105, "step": 213000 }, { "epoch": 0.8107686334812695, "grad_norm": 0.12679126858711243, "learning_rate": 0.0005, "loss": 2.093, "step": 213010 }, { "epoch": 0.8108066959493921, "grad_norm": 0.12365018576383591, "learning_rate": 0.0005, "loss": 2.1022, "step": 213020 }, { "epoch": 0.8108447584175148, "grad_norm": 0.12529915571212769, "learning_rate": 0.0005, "loss": 2.0942, "step": 213030 }, { "epoch": 0.8108828208856375, "grad_norm": 0.13124340772628784, "learning_rate": 0.0005, "loss": 2.1099, "step": 213040 }, { "epoch": 0.8109208833537602, "grad_norm": 0.1141396164894104, "learning_rate": 0.0005, "loss": 2.1053, "step": 213050 }, { "epoch": 0.8109589458218829, "grad_norm": 0.12517715990543365, "learning_rate": 0.0005, "loss": 2.1037, "step": 213060 }, { "epoch": 0.8109970082900055, "grad_norm": 0.1294000744819641, "learning_rate": 0.0005, "loss": 2.1068, "step": 213070 }, { "epoch": 0.8110350707581282, "grad_norm": 0.13378405570983887, "learning_rate": 0.0005, "loss": 2.1125, "step": 213080 }, { "epoch": 0.811073133226251, "grad_norm": 0.12976671755313873, "learning_rate": 0.0005, "loss": 2.0988, "step": 213090 }, { "epoch": 0.8111111956943736, "grad_norm": 0.132253035902977, "learning_rate": 0.0005, "loss": 2.1053, "step": 213100 }, { "epoch": 0.8111492581624963, "grad_norm": 0.13804452121257782, "learning_rate": 0.0005, "loss": 2.1075, "step": 213110 }, { "epoch": 0.8111873206306189, "grad_norm": 0.12440338730812073, "learning_rate": 0.0005, "loss": 2.1251, "step": 213120 }, { "epoch": 0.8112253830987417, "grad_norm": 0.14301490783691406, "learning_rate": 0.0005, "loss": 2.1171, "step": 213130 }, { "epoch": 0.8112634455668644, "grad_norm": 0.13201947510242462, "learning_rate": 0.0005, "loss": 2.1054, "step": 213140 }, { "epoch": 0.811301508034987, "grad_norm": 0.12042871117591858, "learning_rate": 0.0005, "loss": 2.1182, "step": 213150 }, { "epoch": 0.8113395705031097, "grad_norm": 0.12254253029823303, "learning_rate": 0.0005, "loss": 2.11, "step": 213160 }, { "epoch": 0.8113776329712323, "grad_norm": 0.1616806983947754, "learning_rate": 0.0005, "loss": 2.0858, "step": 213170 }, { "epoch": 0.8114156954393551, "grad_norm": 0.14708156883716583, "learning_rate": 0.0005, "loss": 2.1036, "step": 213180 }, { "epoch": 0.8114537579074778, "grad_norm": 0.11895033717155457, "learning_rate": 0.0005, "loss": 2.1066, "step": 213190 }, { "epoch": 0.8114918203756004, "grad_norm": 0.11877939850091934, "learning_rate": 0.0005, "loss": 2.0986, "step": 213200 }, { "epoch": 0.8115298828437231, "grad_norm": 0.11637184768915176, "learning_rate": 0.0005, "loss": 2.092, "step": 213210 }, { "epoch": 0.8115679453118458, "grad_norm": 0.12218356132507324, "learning_rate": 0.0005, "loss": 2.1144, "step": 213220 }, { "epoch": 0.8116060077799685, "grad_norm": 0.1255423128604889, "learning_rate": 0.0005, "loss": 2.1108, "step": 213230 }, { "epoch": 0.8116440702480912, "grad_norm": 0.14560627937316895, "learning_rate": 0.0005, "loss": 2.1262, "step": 213240 }, { "epoch": 0.8116821327162138, "grad_norm": 0.12048596888780594, "learning_rate": 0.0005, "loss": 2.1081, "step": 213250 }, { "epoch": 0.8117201951843366, "grad_norm": 0.12892501056194305, "learning_rate": 0.0005, "loss": 2.0956, "step": 213260 }, { "epoch": 0.8117582576524592, "grad_norm": 0.12159749120473862, "learning_rate": 0.0005, "loss": 2.1064, "step": 213270 }, { "epoch": 0.8117963201205819, "grad_norm": 0.13641926646232605, "learning_rate": 0.0005, "loss": 2.0846, "step": 213280 }, { "epoch": 0.8118343825887046, "grad_norm": 0.13524945080280304, "learning_rate": 0.0005, "loss": 2.1058, "step": 213290 }, { "epoch": 0.8118724450568273, "grad_norm": 0.1168695017695427, "learning_rate": 0.0005, "loss": 2.0976, "step": 213300 }, { "epoch": 0.81191050752495, "grad_norm": 0.1244073137640953, "learning_rate": 0.0005, "loss": 2.1068, "step": 213310 }, { "epoch": 0.8119485699930726, "grad_norm": 0.11838249862194061, "learning_rate": 0.0005, "loss": 2.0964, "step": 213320 }, { "epoch": 0.8119866324611953, "grad_norm": 0.11801798641681671, "learning_rate": 0.0005, "loss": 2.0991, "step": 213330 }, { "epoch": 0.812024694929318, "grad_norm": 0.12498720735311508, "learning_rate": 0.0005, "loss": 2.0983, "step": 213340 }, { "epoch": 0.8120627573974407, "grad_norm": 0.12367401272058487, "learning_rate": 0.0005, "loss": 2.1025, "step": 213350 }, { "epoch": 0.8121008198655634, "grad_norm": 0.12564094364643097, "learning_rate": 0.0005, "loss": 2.108, "step": 213360 }, { "epoch": 0.812138882333686, "grad_norm": 0.12600673735141754, "learning_rate": 0.0005, "loss": 2.0857, "step": 213370 }, { "epoch": 0.8121769448018087, "grad_norm": 0.13035088777542114, "learning_rate": 0.0005, "loss": 2.1144, "step": 213380 }, { "epoch": 0.8122150072699315, "grad_norm": 0.1314198225736618, "learning_rate": 0.0005, "loss": 2.0898, "step": 213390 }, { "epoch": 0.8122530697380541, "grad_norm": 0.1195560097694397, "learning_rate": 0.0005, "loss": 2.1133, "step": 213400 }, { "epoch": 0.8122911322061768, "grad_norm": 0.12452965974807739, "learning_rate": 0.0005, "loss": 2.0986, "step": 213410 }, { "epoch": 0.8123291946742994, "grad_norm": 0.7531483769416809, "learning_rate": 0.0005, "loss": 2.1064, "step": 213420 }, { "epoch": 0.8123672571424222, "grad_norm": 0.1292552500963211, "learning_rate": 0.0005, "loss": 2.1101, "step": 213430 }, { "epoch": 0.8124053196105449, "grad_norm": 0.1480943262577057, "learning_rate": 0.0005, "loss": 2.1094, "step": 213440 }, { "epoch": 0.8124433820786675, "grad_norm": 0.12335444241762161, "learning_rate": 0.0005, "loss": 2.102, "step": 213450 }, { "epoch": 0.8124814445467902, "grad_norm": 0.13456949591636658, "learning_rate": 0.0005, "loss": 2.1212, "step": 213460 }, { "epoch": 0.8125195070149128, "grad_norm": 0.12705455720424652, "learning_rate": 0.0005, "loss": 2.0931, "step": 213470 }, { "epoch": 0.8125575694830356, "grad_norm": 0.1286783516407013, "learning_rate": 0.0005, "loss": 2.109, "step": 213480 }, { "epoch": 0.8125956319511582, "grad_norm": 0.12179796397686005, "learning_rate": 0.0005, "loss": 2.1028, "step": 213490 }, { "epoch": 0.8126336944192809, "grad_norm": 0.12345118820667267, "learning_rate": 0.0005, "loss": 2.1042, "step": 213500 }, { "epoch": 0.8126717568874036, "grad_norm": 0.11786182224750519, "learning_rate": 0.0005, "loss": 2.0997, "step": 213510 }, { "epoch": 0.8127098193555263, "grad_norm": 0.12873464822769165, "learning_rate": 0.0005, "loss": 2.1117, "step": 213520 }, { "epoch": 0.812747881823649, "grad_norm": 0.12281100451946259, "learning_rate": 0.0005, "loss": 2.1166, "step": 213530 }, { "epoch": 0.8127859442917716, "grad_norm": 0.12193473428487778, "learning_rate": 0.0005, "loss": 2.1178, "step": 213540 }, { "epoch": 0.8128240067598943, "grad_norm": 0.12629088759422302, "learning_rate": 0.0005, "loss": 2.087, "step": 213550 }, { "epoch": 0.8128620692280171, "grad_norm": 0.13471092283725739, "learning_rate": 0.0005, "loss": 2.0936, "step": 213560 }, { "epoch": 0.8129001316961397, "grad_norm": 0.13517582416534424, "learning_rate": 0.0005, "loss": 2.0928, "step": 213570 }, { "epoch": 0.8129381941642624, "grad_norm": 0.11960075050592422, "learning_rate": 0.0005, "loss": 2.0947, "step": 213580 }, { "epoch": 0.812976256632385, "grad_norm": 0.12184352427721024, "learning_rate": 0.0005, "loss": 2.1101, "step": 213590 }, { "epoch": 0.8130143191005077, "grad_norm": 0.12594962120056152, "learning_rate": 0.0005, "loss": 2.0956, "step": 213600 }, { "epoch": 0.8130523815686305, "grad_norm": 0.12507972121238708, "learning_rate": 0.0005, "loss": 2.1032, "step": 213610 }, { "epoch": 0.8130904440367531, "grad_norm": 0.13315139710903168, "learning_rate": 0.0005, "loss": 2.1098, "step": 213620 }, { "epoch": 0.8131285065048758, "grad_norm": 0.11814764887094498, "learning_rate": 0.0005, "loss": 2.0994, "step": 213630 }, { "epoch": 0.8131665689729984, "grad_norm": 0.12203751504421234, "learning_rate": 0.0005, "loss": 2.1032, "step": 213640 }, { "epoch": 0.8132046314411212, "grad_norm": 0.1237272098660469, "learning_rate": 0.0005, "loss": 2.1066, "step": 213650 }, { "epoch": 0.8132426939092439, "grad_norm": 0.15392014384269714, "learning_rate": 0.0005, "loss": 2.1043, "step": 213660 }, { "epoch": 0.8132807563773665, "grad_norm": 0.12798550724983215, "learning_rate": 0.0005, "loss": 2.1081, "step": 213670 }, { "epoch": 0.8133188188454892, "grad_norm": 0.12292656302452087, "learning_rate": 0.0005, "loss": 2.1061, "step": 213680 }, { "epoch": 0.813356881313612, "grad_norm": 0.14073209464550018, "learning_rate": 0.0005, "loss": 2.105, "step": 213690 }, { "epoch": 0.8133949437817346, "grad_norm": 0.1310732662677765, "learning_rate": 0.0005, "loss": 2.0897, "step": 213700 }, { "epoch": 0.8134330062498573, "grad_norm": 0.14236874878406525, "learning_rate": 0.0005, "loss": 2.1037, "step": 213710 }, { "epoch": 0.8134710687179799, "grad_norm": 0.12861856818199158, "learning_rate": 0.0005, "loss": 2.0869, "step": 213720 }, { "epoch": 0.8135091311861027, "grad_norm": 0.1245727613568306, "learning_rate": 0.0005, "loss": 2.1004, "step": 213730 }, { "epoch": 0.8135471936542253, "grad_norm": 0.11919999122619629, "learning_rate": 0.0005, "loss": 2.1011, "step": 213740 }, { "epoch": 0.813585256122348, "grad_norm": 0.11863479018211365, "learning_rate": 0.0005, "loss": 2.1015, "step": 213750 }, { "epoch": 0.8136233185904707, "grad_norm": 0.12423855811357498, "learning_rate": 0.0005, "loss": 2.0852, "step": 213760 }, { "epoch": 0.8136613810585933, "grad_norm": 0.11683562397956848, "learning_rate": 0.0005, "loss": 2.1005, "step": 213770 }, { "epoch": 0.8136994435267161, "grad_norm": 0.12702502310276031, "learning_rate": 0.0005, "loss": 2.0991, "step": 213780 }, { "epoch": 0.8137375059948387, "grad_norm": 0.12645933032035828, "learning_rate": 0.0005, "loss": 2.1169, "step": 213790 }, { "epoch": 0.8137755684629614, "grad_norm": 0.11896852403879166, "learning_rate": 0.0005, "loss": 2.1084, "step": 213800 }, { "epoch": 0.813813630931084, "grad_norm": 0.13744334876537323, "learning_rate": 0.0005, "loss": 2.1143, "step": 213810 }, { "epoch": 0.8138516933992068, "grad_norm": 0.1264820694923401, "learning_rate": 0.0005, "loss": 2.1115, "step": 213820 }, { "epoch": 0.8138897558673295, "grad_norm": 0.13732865452766418, "learning_rate": 0.0005, "loss": 2.1056, "step": 213830 }, { "epoch": 0.8139278183354521, "grad_norm": 0.12342477589845657, "learning_rate": 0.0005, "loss": 2.0947, "step": 213840 }, { "epoch": 0.8139658808035748, "grad_norm": 0.12727709114551544, "learning_rate": 0.0005, "loss": 2.0987, "step": 213850 }, { "epoch": 0.8140039432716976, "grad_norm": 0.11827078461647034, "learning_rate": 0.0005, "loss": 2.1001, "step": 213860 }, { "epoch": 0.8140420057398202, "grad_norm": 0.12728027999401093, "learning_rate": 0.0005, "loss": 2.1145, "step": 213870 }, { "epoch": 0.8140800682079429, "grad_norm": 0.13272863626480103, "learning_rate": 0.0005, "loss": 2.1036, "step": 213880 }, { "epoch": 0.8141181306760655, "grad_norm": 0.13463446497917175, "learning_rate": 0.0005, "loss": 2.1035, "step": 213890 }, { "epoch": 0.8141561931441882, "grad_norm": 0.12459887564182281, "learning_rate": 0.0005, "loss": 2.1093, "step": 213900 }, { "epoch": 0.814194255612311, "grad_norm": 0.1231890395283699, "learning_rate": 0.0005, "loss": 2.1037, "step": 213910 }, { "epoch": 0.8142323180804336, "grad_norm": 0.11568400263786316, "learning_rate": 0.0005, "loss": 2.1006, "step": 213920 }, { "epoch": 0.8142703805485563, "grad_norm": 0.11822357028722763, "learning_rate": 0.0005, "loss": 2.1018, "step": 213930 }, { "epoch": 0.8143084430166789, "grad_norm": 0.15082870423793793, "learning_rate": 0.0005, "loss": 2.096, "step": 213940 }, { "epoch": 0.8143465054848017, "grad_norm": 0.13176187872886658, "learning_rate": 0.0005, "loss": 2.1087, "step": 213950 }, { "epoch": 0.8143845679529244, "grad_norm": 0.12025332450866699, "learning_rate": 0.0005, "loss": 2.1034, "step": 213960 }, { "epoch": 0.814422630421047, "grad_norm": 0.1275869607925415, "learning_rate": 0.0005, "loss": 2.102, "step": 213970 }, { "epoch": 0.8144606928891697, "grad_norm": 0.12367784976959229, "learning_rate": 0.0005, "loss": 2.1151, "step": 213980 }, { "epoch": 0.8144987553572924, "grad_norm": 0.14042086899280548, "learning_rate": 0.0005, "loss": 2.1122, "step": 213990 }, { "epoch": 0.8145368178254151, "grad_norm": 0.13881553709506989, "learning_rate": 0.0005, "loss": 2.096, "step": 214000 }, { "epoch": 0.8145748802935378, "grad_norm": 0.1477145552635193, "learning_rate": 0.0005, "loss": 2.1126, "step": 214010 }, { "epoch": 0.8146129427616604, "grad_norm": 0.11577677726745605, "learning_rate": 0.0005, "loss": 2.0982, "step": 214020 }, { "epoch": 0.8146510052297831, "grad_norm": 0.12480149418115616, "learning_rate": 0.0005, "loss": 2.0907, "step": 214030 }, { "epoch": 0.8146890676979058, "grad_norm": 0.13816551864147186, "learning_rate": 0.0005, "loss": 2.1165, "step": 214040 }, { "epoch": 0.8147271301660285, "grad_norm": 0.1139967143535614, "learning_rate": 0.0005, "loss": 2.0967, "step": 214050 }, { "epoch": 0.8147651926341511, "grad_norm": 0.11555656045675278, "learning_rate": 0.0005, "loss": 2.0881, "step": 214060 }, { "epoch": 0.8148032551022738, "grad_norm": 0.12136835604906082, "learning_rate": 0.0005, "loss": 2.1115, "step": 214070 }, { "epoch": 0.8148413175703966, "grad_norm": 0.13405480980873108, "learning_rate": 0.0005, "loss": 2.096, "step": 214080 }, { "epoch": 0.8148793800385192, "grad_norm": 0.1206866204738617, "learning_rate": 0.0005, "loss": 2.1065, "step": 214090 }, { "epoch": 0.8149174425066419, "grad_norm": 0.13503244519233704, "learning_rate": 0.0005, "loss": 2.1095, "step": 214100 }, { "epoch": 0.8149555049747645, "grad_norm": 0.12347983568906784, "learning_rate": 0.0005, "loss": 2.1223, "step": 214110 }, { "epoch": 0.8149935674428873, "grad_norm": 0.13352620601654053, "learning_rate": 0.0005, "loss": 2.1031, "step": 214120 }, { "epoch": 0.81503162991101, "grad_norm": 0.12844537198543549, "learning_rate": 0.0005, "loss": 2.1177, "step": 214130 }, { "epoch": 0.8150696923791326, "grad_norm": 0.12517356872558594, "learning_rate": 0.0005, "loss": 2.1167, "step": 214140 }, { "epoch": 0.8151077548472553, "grad_norm": 0.12121585011482239, "learning_rate": 0.0005, "loss": 2.1015, "step": 214150 }, { "epoch": 0.815145817315378, "grad_norm": 0.12959855794906616, "learning_rate": 0.0005, "loss": 2.0969, "step": 214160 }, { "epoch": 0.8151838797835007, "grad_norm": 0.12382923066616058, "learning_rate": 0.0005, "loss": 2.0997, "step": 214170 }, { "epoch": 0.8152219422516234, "grad_norm": 0.12631931900978088, "learning_rate": 0.0005, "loss": 2.1146, "step": 214180 }, { "epoch": 0.815260004719746, "grad_norm": 0.12381136417388916, "learning_rate": 0.0005, "loss": 2.1002, "step": 214190 }, { "epoch": 0.8152980671878687, "grad_norm": 0.1158674955368042, "learning_rate": 0.0005, "loss": 2.1052, "step": 214200 }, { "epoch": 0.8153361296559914, "grad_norm": 0.1376393884420395, "learning_rate": 0.0005, "loss": 2.1041, "step": 214210 }, { "epoch": 0.8153741921241141, "grad_norm": 0.12953539192676544, "learning_rate": 0.0005, "loss": 2.0939, "step": 214220 }, { "epoch": 0.8154122545922368, "grad_norm": 0.12734073400497437, "learning_rate": 0.0005, "loss": 2.0996, "step": 214230 }, { "epoch": 0.8154503170603594, "grad_norm": 0.12529154121875763, "learning_rate": 0.0005, "loss": 2.0925, "step": 214240 }, { "epoch": 0.8154883795284822, "grad_norm": 0.12359047681093216, "learning_rate": 0.0005, "loss": 2.0995, "step": 214250 }, { "epoch": 0.8155264419966048, "grad_norm": 0.13195255398750305, "learning_rate": 0.0005, "loss": 2.1154, "step": 214260 }, { "epoch": 0.8155645044647275, "grad_norm": 0.12542951107025146, "learning_rate": 0.0005, "loss": 2.0786, "step": 214270 }, { "epoch": 0.8156025669328502, "grad_norm": 0.12773358821868896, "learning_rate": 0.0005, "loss": 2.105, "step": 214280 }, { "epoch": 0.8156406294009729, "grad_norm": 0.12585784494876862, "learning_rate": 0.0005, "loss": 2.1007, "step": 214290 }, { "epoch": 0.8156786918690956, "grad_norm": 0.11464321613311768, "learning_rate": 0.0005, "loss": 2.1157, "step": 214300 }, { "epoch": 0.8157167543372182, "grad_norm": 0.12004934996366501, "learning_rate": 0.0005, "loss": 2.0929, "step": 214310 }, { "epoch": 0.8157548168053409, "grad_norm": 0.13350030779838562, "learning_rate": 0.0005, "loss": 2.091, "step": 214320 }, { "epoch": 0.8157928792734636, "grad_norm": 0.12730374932289124, "learning_rate": 0.0005, "loss": 2.1003, "step": 214330 }, { "epoch": 0.8158309417415863, "grad_norm": 0.13901080191135406, "learning_rate": 0.0005, "loss": 2.0859, "step": 214340 }, { "epoch": 0.815869004209709, "grad_norm": 0.1676304042339325, "learning_rate": 0.0005, "loss": 2.0964, "step": 214350 }, { "epoch": 0.8159070666778316, "grad_norm": 0.13866017758846283, "learning_rate": 0.0005, "loss": 2.1053, "step": 214360 }, { "epoch": 0.8159451291459543, "grad_norm": 0.13269653916358948, "learning_rate": 0.0005, "loss": 2.1077, "step": 214370 }, { "epoch": 0.8159831916140771, "grad_norm": 0.12412741780281067, "learning_rate": 0.0005, "loss": 2.1112, "step": 214380 }, { "epoch": 0.8160212540821997, "grad_norm": 0.11861774325370789, "learning_rate": 0.0005, "loss": 2.1235, "step": 214390 }, { "epoch": 0.8160593165503224, "grad_norm": 0.12456288188695908, "learning_rate": 0.0005, "loss": 2.0968, "step": 214400 }, { "epoch": 0.816097379018445, "grad_norm": 0.1261448860168457, "learning_rate": 0.0005, "loss": 2.0985, "step": 214410 }, { "epoch": 0.8161354414865678, "grad_norm": 0.134896382689476, "learning_rate": 0.0005, "loss": 2.1154, "step": 214420 }, { "epoch": 0.8161735039546905, "grad_norm": 0.14073701202869415, "learning_rate": 0.0005, "loss": 2.1163, "step": 214430 }, { "epoch": 0.8162115664228131, "grad_norm": 0.12699000537395477, "learning_rate": 0.0005, "loss": 2.1036, "step": 214440 }, { "epoch": 0.8162496288909358, "grad_norm": 0.12732018530368805, "learning_rate": 0.0005, "loss": 2.1106, "step": 214450 }, { "epoch": 0.8162876913590584, "grad_norm": 0.1431722640991211, "learning_rate": 0.0005, "loss": 2.0975, "step": 214460 }, { "epoch": 0.8163257538271812, "grad_norm": 0.1346866488456726, "learning_rate": 0.0005, "loss": 2.1063, "step": 214470 }, { "epoch": 0.8163638162953039, "grad_norm": 0.14443494379520416, "learning_rate": 0.0005, "loss": 2.1177, "step": 214480 }, { "epoch": 0.8164018787634265, "grad_norm": 0.12590277194976807, "learning_rate": 0.0005, "loss": 2.0922, "step": 214490 }, { "epoch": 0.8164399412315492, "grad_norm": 0.11266959458589554, "learning_rate": 0.0005, "loss": 2.0889, "step": 214500 }, { "epoch": 0.8164780036996719, "grad_norm": 0.12907174229621887, "learning_rate": 0.0005, "loss": 2.1214, "step": 214510 }, { "epoch": 0.8165160661677946, "grad_norm": 0.13524018228054047, "learning_rate": 0.0005, "loss": 2.1124, "step": 214520 }, { "epoch": 0.8165541286359173, "grad_norm": 0.12719334661960602, "learning_rate": 0.0005, "loss": 2.0982, "step": 214530 }, { "epoch": 0.8165921911040399, "grad_norm": 0.12704965472221375, "learning_rate": 0.0005, "loss": 2.1032, "step": 214540 }, { "epoch": 0.8166302535721627, "grad_norm": 0.11621110886335373, "learning_rate": 0.0005, "loss": 2.1014, "step": 214550 }, { "epoch": 0.8166683160402853, "grad_norm": 0.1382187306880951, "learning_rate": 0.0005, "loss": 2.1032, "step": 214560 }, { "epoch": 0.816706378508408, "grad_norm": 0.12210649996995926, "learning_rate": 0.0005, "loss": 2.1097, "step": 214570 }, { "epoch": 0.8167444409765307, "grad_norm": 0.11293422430753708, "learning_rate": 0.0005, "loss": 2.1001, "step": 214580 }, { "epoch": 0.8167825034446534, "grad_norm": 0.12237521260976791, "learning_rate": 0.0005, "loss": 2.0907, "step": 214590 }, { "epoch": 0.8168205659127761, "grad_norm": 0.12014593183994293, "learning_rate": 0.0005, "loss": 2.1157, "step": 214600 }, { "epoch": 0.8168586283808987, "grad_norm": 0.12235447764396667, "learning_rate": 0.0005, "loss": 2.1169, "step": 214610 }, { "epoch": 0.8168966908490214, "grad_norm": 0.12861338257789612, "learning_rate": 0.0005, "loss": 2.11, "step": 214620 }, { "epoch": 0.816934753317144, "grad_norm": 0.12567169964313507, "learning_rate": 0.0005, "loss": 2.1033, "step": 214630 }, { "epoch": 0.8169728157852668, "grad_norm": 0.24400149285793304, "learning_rate": 0.0005, "loss": 2.107, "step": 214640 }, { "epoch": 0.8170108782533895, "grad_norm": 0.12880581617355347, "learning_rate": 0.0005, "loss": 2.1247, "step": 214650 }, { "epoch": 0.8170489407215121, "grad_norm": 0.11873859167098999, "learning_rate": 0.0005, "loss": 2.0995, "step": 214660 }, { "epoch": 0.8170870031896348, "grad_norm": 0.12728740274906158, "learning_rate": 0.0005, "loss": 2.1025, "step": 214670 }, { "epoch": 0.8171250656577576, "grad_norm": 0.11791323870420456, "learning_rate": 0.0005, "loss": 2.1061, "step": 214680 }, { "epoch": 0.8171631281258802, "grad_norm": 0.1313634216785431, "learning_rate": 0.0005, "loss": 2.1044, "step": 214690 }, { "epoch": 0.8172011905940029, "grad_norm": 0.13051530718803406, "learning_rate": 0.0005, "loss": 2.1243, "step": 214700 }, { "epoch": 0.8172392530621255, "grad_norm": 0.13162212073802948, "learning_rate": 0.0005, "loss": 2.0969, "step": 214710 }, { "epoch": 0.8172773155302483, "grad_norm": 0.1316559910774231, "learning_rate": 0.0005, "loss": 2.1156, "step": 214720 }, { "epoch": 0.817315377998371, "grad_norm": 0.12329455465078354, "learning_rate": 0.0005, "loss": 2.1201, "step": 214730 }, { "epoch": 0.8173534404664936, "grad_norm": 0.11354649811983109, "learning_rate": 0.0005, "loss": 2.1085, "step": 214740 }, { "epoch": 0.8173915029346163, "grad_norm": 0.12798650562763214, "learning_rate": 0.0005, "loss": 2.1064, "step": 214750 }, { "epoch": 0.8174295654027389, "grad_norm": 0.11971401423215866, "learning_rate": 0.0005, "loss": 2.1011, "step": 214760 }, { "epoch": 0.8174676278708617, "grad_norm": 0.12153248488903046, "learning_rate": 0.0005, "loss": 2.0935, "step": 214770 }, { "epoch": 0.8175056903389843, "grad_norm": 0.12790264189243317, "learning_rate": 0.0005, "loss": 2.1027, "step": 214780 }, { "epoch": 0.817543752807107, "grad_norm": 0.11219383031129837, "learning_rate": 0.0005, "loss": 2.1005, "step": 214790 }, { "epoch": 0.8175818152752297, "grad_norm": 0.12153936922550201, "learning_rate": 0.0005, "loss": 2.1045, "step": 214800 }, { "epoch": 0.8176198777433524, "grad_norm": 0.13037075102329254, "learning_rate": 0.0005, "loss": 2.1009, "step": 214810 }, { "epoch": 0.8176579402114751, "grad_norm": 0.15542103350162506, "learning_rate": 0.0005, "loss": 2.1096, "step": 214820 }, { "epoch": 0.8176960026795977, "grad_norm": 0.13262136280536652, "learning_rate": 0.0005, "loss": 2.1063, "step": 214830 }, { "epoch": 0.8177340651477204, "grad_norm": 0.1375369131565094, "learning_rate": 0.0005, "loss": 2.1122, "step": 214840 }, { "epoch": 0.8177721276158432, "grad_norm": 0.11928694695234299, "learning_rate": 0.0005, "loss": 2.1019, "step": 214850 }, { "epoch": 0.8178101900839658, "grad_norm": 0.11859259009361267, "learning_rate": 0.0005, "loss": 2.1021, "step": 214860 }, { "epoch": 0.8178482525520885, "grad_norm": 0.11539218574762344, "learning_rate": 0.0005, "loss": 2.1058, "step": 214870 }, { "epoch": 0.8178863150202111, "grad_norm": 0.11819108575582504, "learning_rate": 0.0005, "loss": 2.1071, "step": 214880 }, { "epoch": 0.8179243774883338, "grad_norm": 0.14430873095989227, "learning_rate": 0.0005, "loss": 2.0956, "step": 214890 }, { "epoch": 0.8179624399564566, "grad_norm": 0.12576286494731903, "learning_rate": 0.0005, "loss": 2.1075, "step": 214900 }, { "epoch": 0.8180005024245792, "grad_norm": 0.12197782844305038, "learning_rate": 0.0005, "loss": 2.0945, "step": 214910 }, { "epoch": 0.8180385648927019, "grad_norm": 0.12566708028316498, "learning_rate": 0.0005, "loss": 2.1121, "step": 214920 }, { "epoch": 0.8180766273608245, "grad_norm": 0.12516872584819794, "learning_rate": 0.0005, "loss": 2.1059, "step": 214930 }, { "epoch": 0.8181146898289473, "grad_norm": 0.13733117282390594, "learning_rate": 0.0005, "loss": 2.1031, "step": 214940 }, { "epoch": 0.81815275229707, "grad_norm": 0.1253124177455902, "learning_rate": 0.0005, "loss": 2.0865, "step": 214950 }, { "epoch": 0.8181908147651926, "grad_norm": 0.12638896703720093, "learning_rate": 0.0005, "loss": 2.1138, "step": 214960 }, { "epoch": 0.8182288772333153, "grad_norm": 0.13121956586837769, "learning_rate": 0.0005, "loss": 2.1224, "step": 214970 }, { "epoch": 0.818266939701438, "grad_norm": 0.1312199980020523, "learning_rate": 0.0005, "loss": 2.1087, "step": 214980 }, { "epoch": 0.8183050021695607, "grad_norm": 0.12410897016525269, "learning_rate": 0.0005, "loss": 2.1131, "step": 214990 }, { "epoch": 0.8183430646376834, "grad_norm": 0.12948821485042572, "learning_rate": 0.0005, "loss": 2.1016, "step": 215000 }, { "epoch": 0.818381127105806, "grad_norm": 0.12356840074062347, "learning_rate": 0.0005, "loss": 2.1097, "step": 215010 }, { "epoch": 0.8184191895739288, "grad_norm": 0.12674380838871002, "learning_rate": 0.0005, "loss": 2.1093, "step": 215020 }, { "epoch": 0.8184572520420514, "grad_norm": 0.13833408057689667, "learning_rate": 0.0005, "loss": 2.0929, "step": 215030 }, { "epoch": 0.8184953145101741, "grad_norm": 0.12283878773450851, "learning_rate": 0.0005, "loss": 2.1044, "step": 215040 }, { "epoch": 0.8185333769782968, "grad_norm": 0.12343966960906982, "learning_rate": 0.0005, "loss": 2.1113, "step": 215050 }, { "epoch": 0.8185714394464194, "grad_norm": 0.1294650286436081, "learning_rate": 0.0005, "loss": 2.0876, "step": 215060 }, { "epoch": 0.8186095019145422, "grad_norm": 0.12338768690824509, "learning_rate": 0.0005, "loss": 2.1094, "step": 215070 }, { "epoch": 0.8186475643826648, "grad_norm": 0.11877749860286713, "learning_rate": 0.0005, "loss": 2.1074, "step": 215080 }, { "epoch": 0.8186856268507875, "grad_norm": 0.12078936398029327, "learning_rate": 0.0005, "loss": 2.0966, "step": 215090 }, { "epoch": 0.8187236893189102, "grad_norm": 0.11437283456325531, "learning_rate": 0.0005, "loss": 2.1248, "step": 215100 }, { "epoch": 0.8187617517870329, "grad_norm": 0.11495641618967056, "learning_rate": 0.0005, "loss": 2.113, "step": 215110 }, { "epoch": 0.8187998142551556, "grad_norm": 0.5764119029045105, "learning_rate": 0.0005, "loss": 2.1051, "step": 215120 }, { "epoch": 0.8188378767232782, "grad_norm": 0.11969491839408875, "learning_rate": 0.0005, "loss": 2.0977, "step": 215130 }, { "epoch": 0.8188759391914009, "grad_norm": 0.1191493421792984, "learning_rate": 0.0005, "loss": 2.0926, "step": 215140 }, { "epoch": 0.8189140016595237, "grad_norm": 0.11786416918039322, "learning_rate": 0.0005, "loss": 2.1017, "step": 215150 }, { "epoch": 0.8189520641276463, "grad_norm": 0.12184026092290878, "learning_rate": 0.0005, "loss": 2.115, "step": 215160 }, { "epoch": 0.818990126595769, "grad_norm": 0.13123203814029694, "learning_rate": 0.0005, "loss": 2.1147, "step": 215170 }, { "epoch": 0.8190281890638916, "grad_norm": 0.12545669078826904, "learning_rate": 0.0005, "loss": 2.1132, "step": 215180 }, { "epoch": 0.8190662515320143, "grad_norm": 0.14899766445159912, "learning_rate": 0.0005, "loss": 2.1202, "step": 215190 }, { "epoch": 0.8191043140001371, "grad_norm": 0.12839815020561218, "learning_rate": 0.0005, "loss": 2.1074, "step": 215200 }, { "epoch": 0.8191423764682597, "grad_norm": 0.12477117031812668, "learning_rate": 0.0005, "loss": 2.1012, "step": 215210 }, { "epoch": 0.8191804389363824, "grad_norm": 0.1285984218120575, "learning_rate": 0.0005, "loss": 2.1046, "step": 215220 }, { "epoch": 0.819218501404505, "grad_norm": 0.1194237545132637, "learning_rate": 0.0005, "loss": 2.1078, "step": 215230 }, { "epoch": 0.8192565638726278, "grad_norm": 0.12809935212135315, "learning_rate": 0.0005, "loss": 2.1197, "step": 215240 }, { "epoch": 0.8192946263407505, "grad_norm": 0.12358009070158005, "learning_rate": 0.0005, "loss": 2.1088, "step": 215250 }, { "epoch": 0.8193326888088731, "grad_norm": 0.1358308643102646, "learning_rate": 0.0005, "loss": 2.102, "step": 215260 }, { "epoch": 0.8193707512769958, "grad_norm": 0.12565359473228455, "learning_rate": 0.0005, "loss": 2.1011, "step": 215270 }, { "epoch": 0.8194088137451185, "grad_norm": 0.13406111299991608, "learning_rate": 0.0005, "loss": 2.1004, "step": 215280 }, { "epoch": 0.8194468762132412, "grad_norm": 0.11594336479902267, "learning_rate": 0.0005, "loss": 2.0928, "step": 215290 }, { "epoch": 0.8194849386813639, "grad_norm": 0.13356558978557587, "learning_rate": 0.0005, "loss": 2.1056, "step": 215300 }, { "epoch": 0.8195230011494865, "grad_norm": 0.1209784597158432, "learning_rate": 0.0005, "loss": 2.1077, "step": 215310 }, { "epoch": 0.8195610636176093, "grad_norm": 0.12455625832080841, "learning_rate": 0.0005, "loss": 2.1072, "step": 215320 }, { "epoch": 0.8195991260857319, "grad_norm": 0.12635934352874756, "learning_rate": 0.0005, "loss": 2.096, "step": 215330 }, { "epoch": 0.8196371885538546, "grad_norm": 0.12177467346191406, "learning_rate": 0.0005, "loss": 2.087, "step": 215340 }, { "epoch": 0.8196752510219772, "grad_norm": 0.1592981070280075, "learning_rate": 0.0005, "loss": 2.0962, "step": 215350 }, { "epoch": 0.8197133134900999, "grad_norm": 0.13388434052467346, "learning_rate": 0.0005, "loss": 2.1226, "step": 215360 }, { "epoch": 0.8197513759582227, "grad_norm": 0.11722065508365631, "learning_rate": 0.0005, "loss": 2.0935, "step": 215370 }, { "epoch": 0.8197894384263453, "grad_norm": 0.1249929741024971, "learning_rate": 0.0005, "loss": 2.1122, "step": 215380 }, { "epoch": 0.819827500894468, "grad_norm": 0.12349975854158401, "learning_rate": 0.0005, "loss": 2.1117, "step": 215390 }, { "epoch": 0.8198655633625906, "grad_norm": 0.1266603320837021, "learning_rate": 0.0005, "loss": 2.1251, "step": 215400 }, { "epoch": 0.8199036258307134, "grad_norm": 0.12294841557741165, "learning_rate": 0.0005, "loss": 2.1194, "step": 215410 }, { "epoch": 0.8199416882988361, "grad_norm": 0.12204351276159286, "learning_rate": 0.0005, "loss": 2.1096, "step": 215420 }, { "epoch": 0.8199797507669587, "grad_norm": 0.12130658328533173, "learning_rate": 0.0005, "loss": 2.0968, "step": 215430 }, { "epoch": 0.8200178132350814, "grad_norm": 0.11593271046876907, "learning_rate": 0.0005, "loss": 2.1071, "step": 215440 }, { "epoch": 0.8200558757032042, "grad_norm": 0.1328611522912979, "learning_rate": 0.0005, "loss": 2.0975, "step": 215450 }, { "epoch": 0.8200939381713268, "grad_norm": 0.1163841113448143, "learning_rate": 0.0005, "loss": 2.1093, "step": 215460 }, { "epoch": 0.8201320006394495, "grad_norm": 0.1402568370103836, "learning_rate": 0.0005, "loss": 2.1133, "step": 215470 }, { "epoch": 0.8201700631075721, "grad_norm": 0.11890465766191483, "learning_rate": 0.0005, "loss": 2.1067, "step": 215480 }, { "epoch": 0.8202081255756948, "grad_norm": 0.12252867221832275, "learning_rate": 0.0005, "loss": 2.0965, "step": 215490 }, { "epoch": 0.8202461880438175, "grad_norm": 0.12809689342975616, "learning_rate": 0.0005, "loss": 2.1087, "step": 215500 }, { "epoch": 0.8202842505119402, "grad_norm": 0.12146003544330597, "learning_rate": 0.0005, "loss": 2.1123, "step": 215510 }, { "epoch": 0.8203223129800629, "grad_norm": 0.12264453619718552, "learning_rate": 0.0005, "loss": 2.1077, "step": 215520 }, { "epoch": 0.8203603754481855, "grad_norm": 0.12639731168746948, "learning_rate": 0.0005, "loss": 2.0928, "step": 215530 }, { "epoch": 0.8203984379163083, "grad_norm": 0.17479752004146576, "learning_rate": 0.0005, "loss": 2.0986, "step": 215540 }, { "epoch": 0.820436500384431, "grad_norm": 0.11922673135995865, "learning_rate": 0.0005, "loss": 2.112, "step": 215550 }, { "epoch": 0.8204745628525536, "grad_norm": 0.11989131569862366, "learning_rate": 0.0005, "loss": 2.1087, "step": 215560 }, { "epoch": 0.8205126253206763, "grad_norm": 0.10601154714822769, "learning_rate": 0.0005, "loss": 2.1105, "step": 215570 }, { "epoch": 0.820550687788799, "grad_norm": 0.1295652985572815, "learning_rate": 0.0005, "loss": 2.119, "step": 215580 }, { "epoch": 0.8205887502569217, "grad_norm": 0.13028371334075928, "learning_rate": 0.0005, "loss": 2.1078, "step": 215590 }, { "epoch": 0.8206268127250443, "grad_norm": 0.12443135678768158, "learning_rate": 0.0005, "loss": 2.1, "step": 215600 }, { "epoch": 0.820664875193167, "grad_norm": 0.13287542760372162, "learning_rate": 0.0005, "loss": 2.1018, "step": 215610 }, { "epoch": 0.8207029376612897, "grad_norm": 0.13548590242862701, "learning_rate": 0.0005, "loss": 2.1153, "step": 215620 }, { "epoch": 0.8207410001294124, "grad_norm": 0.11423881351947784, "learning_rate": 0.0005, "loss": 2.0987, "step": 215630 }, { "epoch": 0.8207790625975351, "grad_norm": 0.15103867650032043, "learning_rate": 0.0005, "loss": 2.1172, "step": 215640 }, { "epoch": 0.8208171250656577, "grad_norm": 0.1434904932975769, "learning_rate": 0.0005, "loss": 2.1058, "step": 215650 }, { "epoch": 0.8208551875337804, "grad_norm": 0.12201616168022156, "learning_rate": 0.0005, "loss": 2.1091, "step": 215660 }, { "epoch": 0.8208932500019032, "grad_norm": 0.11950390040874481, "learning_rate": 0.0005, "loss": 2.0888, "step": 215670 }, { "epoch": 0.8209313124700258, "grad_norm": 0.18592995405197144, "learning_rate": 0.0005, "loss": 2.106, "step": 215680 }, { "epoch": 0.8209693749381485, "grad_norm": 0.13558170199394226, "learning_rate": 0.0005, "loss": 2.1022, "step": 215690 }, { "epoch": 0.8210074374062711, "grad_norm": 0.12218789756298065, "learning_rate": 0.0005, "loss": 2.1013, "step": 215700 }, { "epoch": 0.8210454998743939, "grad_norm": 0.12565554678440094, "learning_rate": 0.0005, "loss": 2.1065, "step": 215710 }, { "epoch": 0.8210835623425166, "grad_norm": 0.1262756884098053, "learning_rate": 0.0005, "loss": 2.11, "step": 215720 }, { "epoch": 0.8211216248106392, "grad_norm": 0.12086983770132065, "learning_rate": 0.0005, "loss": 2.1011, "step": 215730 }, { "epoch": 0.8211596872787619, "grad_norm": 0.11935533583164215, "learning_rate": 0.0005, "loss": 2.1021, "step": 215740 }, { "epoch": 0.8211977497468846, "grad_norm": 0.12646500766277313, "learning_rate": 0.0005, "loss": 2.1128, "step": 215750 }, { "epoch": 0.8212358122150073, "grad_norm": 0.12062659114599228, "learning_rate": 0.0005, "loss": 2.0992, "step": 215760 }, { "epoch": 0.82127387468313, "grad_norm": 0.1336217075586319, "learning_rate": 0.0005, "loss": 2.1163, "step": 215770 }, { "epoch": 0.8213119371512526, "grad_norm": 0.12084248661994934, "learning_rate": 0.0005, "loss": 2.1068, "step": 215780 }, { "epoch": 0.8213499996193753, "grad_norm": 0.11078011244535446, "learning_rate": 0.0005, "loss": 2.0923, "step": 215790 }, { "epoch": 0.821388062087498, "grad_norm": 0.11964815855026245, "learning_rate": 0.0005, "loss": 2.115, "step": 215800 }, { "epoch": 0.8214261245556207, "grad_norm": 0.11715470254421234, "learning_rate": 0.0005, "loss": 2.1034, "step": 215810 }, { "epoch": 0.8214641870237434, "grad_norm": 0.133929044008255, "learning_rate": 0.0005, "loss": 2.1171, "step": 215820 }, { "epoch": 0.821502249491866, "grad_norm": 0.127930149435997, "learning_rate": 0.0005, "loss": 2.0991, "step": 215830 }, { "epoch": 0.8215403119599888, "grad_norm": 0.1305428445339203, "learning_rate": 0.0005, "loss": 2.1016, "step": 215840 }, { "epoch": 0.8215783744281114, "grad_norm": 0.12557300925254822, "learning_rate": 0.0005, "loss": 2.1005, "step": 215850 }, { "epoch": 0.8216164368962341, "grad_norm": 0.12656274437904358, "learning_rate": 0.0005, "loss": 2.11, "step": 215860 }, { "epoch": 0.8216544993643567, "grad_norm": 0.12123452126979828, "learning_rate": 0.0005, "loss": 2.1191, "step": 215870 }, { "epoch": 0.8216925618324795, "grad_norm": 0.12982730567455292, "learning_rate": 0.0005, "loss": 2.1087, "step": 215880 }, { "epoch": 0.8217306243006022, "grad_norm": 0.13296344876289368, "learning_rate": 0.0005, "loss": 2.1067, "step": 215890 }, { "epoch": 0.8217686867687248, "grad_norm": 0.1178731918334961, "learning_rate": 0.0005, "loss": 2.1009, "step": 215900 }, { "epoch": 0.8218067492368475, "grad_norm": 0.1229533925652504, "learning_rate": 0.0005, "loss": 2.1083, "step": 215910 }, { "epoch": 0.8218448117049701, "grad_norm": 0.12563879787921906, "learning_rate": 0.0005, "loss": 2.1065, "step": 215920 }, { "epoch": 0.8218828741730929, "grad_norm": 0.15010017156600952, "learning_rate": 0.0005, "loss": 2.1226, "step": 215930 }, { "epoch": 0.8219209366412156, "grad_norm": 0.11918827891349792, "learning_rate": 0.0005, "loss": 2.1029, "step": 215940 }, { "epoch": 0.8219589991093382, "grad_norm": 0.1260400414466858, "learning_rate": 0.0005, "loss": 2.1041, "step": 215950 }, { "epoch": 0.8219970615774609, "grad_norm": 0.12422250956296921, "learning_rate": 0.0005, "loss": 2.1121, "step": 215960 }, { "epoch": 0.8220351240455837, "grad_norm": 0.12583960592746735, "learning_rate": 0.0005, "loss": 2.1105, "step": 215970 }, { "epoch": 0.8220731865137063, "grad_norm": 0.12483104318380356, "learning_rate": 0.0005, "loss": 2.1128, "step": 215980 }, { "epoch": 0.822111248981829, "grad_norm": 0.11402330547571182, "learning_rate": 0.0005, "loss": 2.1082, "step": 215990 }, { "epoch": 0.8221493114499516, "grad_norm": 0.1227409839630127, "learning_rate": 0.0005, "loss": 2.1093, "step": 216000 }, { "epoch": 0.8221873739180744, "grad_norm": 0.12268001586198807, "learning_rate": 0.0005, "loss": 2.099, "step": 216010 }, { "epoch": 0.822225436386197, "grad_norm": 0.12708698213100433, "learning_rate": 0.0005, "loss": 2.0944, "step": 216020 }, { "epoch": 0.8222634988543197, "grad_norm": 0.1555493175983429, "learning_rate": 0.0005, "loss": 2.1077, "step": 216030 }, { "epoch": 0.8223015613224424, "grad_norm": 0.14136861264705658, "learning_rate": 0.0005, "loss": 2.0922, "step": 216040 }, { "epoch": 0.822339623790565, "grad_norm": 0.12100417912006378, "learning_rate": 0.0005, "loss": 2.1001, "step": 216050 }, { "epoch": 0.8223776862586878, "grad_norm": 0.11855873465538025, "learning_rate": 0.0005, "loss": 2.0977, "step": 216060 }, { "epoch": 0.8224157487268104, "grad_norm": 0.12413900345563889, "learning_rate": 0.0005, "loss": 2.1229, "step": 216070 }, { "epoch": 0.8224538111949331, "grad_norm": 0.12667207419872284, "learning_rate": 0.0005, "loss": 2.1042, "step": 216080 }, { "epoch": 0.8224918736630558, "grad_norm": 0.1186310201883316, "learning_rate": 0.0005, "loss": 2.0922, "step": 216090 }, { "epoch": 0.8225299361311785, "grad_norm": 0.11538801342248917, "learning_rate": 0.0005, "loss": 2.1128, "step": 216100 }, { "epoch": 0.8225679985993012, "grad_norm": 0.12002494186162949, "learning_rate": 0.0005, "loss": 2.1173, "step": 216110 }, { "epoch": 0.8226060610674238, "grad_norm": 0.12906166911125183, "learning_rate": 0.0005, "loss": 2.1027, "step": 216120 }, { "epoch": 0.8226441235355465, "grad_norm": 0.1414247453212738, "learning_rate": 0.0005, "loss": 2.0921, "step": 216130 }, { "epoch": 0.8226821860036693, "grad_norm": 0.1321527659893036, "learning_rate": 0.0005, "loss": 2.0977, "step": 216140 }, { "epoch": 0.8227202484717919, "grad_norm": 0.12948259711265564, "learning_rate": 0.0005, "loss": 2.1026, "step": 216150 }, { "epoch": 0.8227583109399146, "grad_norm": 0.12453103810548782, "learning_rate": 0.0005, "loss": 2.1033, "step": 216160 }, { "epoch": 0.8227963734080372, "grad_norm": 0.11883663386106491, "learning_rate": 0.0005, "loss": 2.1022, "step": 216170 }, { "epoch": 0.82283443587616, "grad_norm": 0.12888406217098236, "learning_rate": 0.0005, "loss": 2.1165, "step": 216180 }, { "epoch": 0.8228724983442827, "grad_norm": 0.12209624797105789, "learning_rate": 0.0005, "loss": 2.1075, "step": 216190 }, { "epoch": 0.8229105608124053, "grad_norm": 0.1349632740020752, "learning_rate": 0.0005, "loss": 2.0763, "step": 216200 }, { "epoch": 0.822948623280528, "grad_norm": 0.12418237328529358, "learning_rate": 0.0005, "loss": 2.1104, "step": 216210 }, { "epoch": 0.8229866857486506, "grad_norm": 0.1306927651166916, "learning_rate": 0.0005, "loss": 2.1107, "step": 216220 }, { "epoch": 0.8230247482167734, "grad_norm": 0.1376977562904358, "learning_rate": 0.0005, "loss": 2.0964, "step": 216230 }, { "epoch": 0.8230628106848961, "grad_norm": 0.11857961118221283, "learning_rate": 0.0005, "loss": 2.0969, "step": 216240 }, { "epoch": 0.8231008731530187, "grad_norm": 0.11643446236848831, "learning_rate": 0.0005, "loss": 2.1161, "step": 216250 }, { "epoch": 0.8231389356211414, "grad_norm": 0.1297149360179901, "learning_rate": 0.0005, "loss": 2.0893, "step": 216260 }, { "epoch": 0.8231769980892641, "grad_norm": 0.13119326531887054, "learning_rate": 0.0005, "loss": 2.1017, "step": 216270 }, { "epoch": 0.8232150605573868, "grad_norm": 0.11515495181083679, "learning_rate": 0.0005, "loss": 2.1023, "step": 216280 }, { "epoch": 0.8232531230255095, "grad_norm": 0.1289171725511551, "learning_rate": 0.0005, "loss": 2.0919, "step": 216290 }, { "epoch": 0.8232911854936321, "grad_norm": 0.12597043812274933, "learning_rate": 0.0005, "loss": 2.1047, "step": 216300 }, { "epoch": 0.8233292479617549, "grad_norm": 0.11690404266119003, "learning_rate": 0.0005, "loss": 2.1141, "step": 216310 }, { "epoch": 0.8233673104298775, "grad_norm": 0.12119033187627792, "learning_rate": 0.0005, "loss": 2.0954, "step": 216320 }, { "epoch": 0.8234053728980002, "grad_norm": 0.14844262599945068, "learning_rate": 0.0005, "loss": 2.1147, "step": 216330 }, { "epoch": 0.8234434353661229, "grad_norm": 0.11987555772066116, "learning_rate": 0.0005, "loss": 2.1077, "step": 216340 }, { "epoch": 0.8234814978342455, "grad_norm": 0.13595780730247498, "learning_rate": 0.0005, "loss": 2.1244, "step": 216350 }, { "epoch": 0.8235195603023683, "grad_norm": 0.11209575086832047, "learning_rate": 0.0005, "loss": 2.0902, "step": 216360 }, { "epoch": 0.8235576227704909, "grad_norm": 0.12953747808933258, "learning_rate": 0.0005, "loss": 2.1067, "step": 216370 }, { "epoch": 0.8235956852386136, "grad_norm": 0.12427164614200592, "learning_rate": 0.0005, "loss": 2.1135, "step": 216380 }, { "epoch": 0.8236337477067363, "grad_norm": 0.12212230265140533, "learning_rate": 0.0005, "loss": 2.1032, "step": 216390 }, { "epoch": 0.823671810174859, "grad_norm": 0.12323116511106491, "learning_rate": 0.0005, "loss": 2.1084, "step": 216400 }, { "epoch": 0.8237098726429817, "grad_norm": 0.12646298110485077, "learning_rate": 0.0005, "loss": 2.0995, "step": 216410 }, { "epoch": 0.8237479351111043, "grad_norm": 0.14122170209884644, "learning_rate": 0.0005, "loss": 2.0929, "step": 216420 }, { "epoch": 0.823785997579227, "grad_norm": 0.1250144988298416, "learning_rate": 0.0005, "loss": 2.1001, "step": 216430 }, { "epoch": 0.8238240600473498, "grad_norm": 0.12037564814090729, "learning_rate": 0.0005, "loss": 2.114, "step": 216440 }, { "epoch": 0.8238621225154724, "grad_norm": 0.12027294188737869, "learning_rate": 0.0005, "loss": 2.0968, "step": 216450 }, { "epoch": 0.8239001849835951, "grad_norm": 0.13762728869915009, "learning_rate": 0.0005, "loss": 2.1084, "step": 216460 }, { "epoch": 0.8239382474517177, "grad_norm": 0.12742702662944794, "learning_rate": 0.0005, "loss": 2.1111, "step": 216470 }, { "epoch": 0.8239763099198404, "grad_norm": 0.1193777471780777, "learning_rate": 0.0005, "loss": 2.1009, "step": 216480 }, { "epoch": 0.8240143723879632, "grad_norm": 0.11855217069387436, "learning_rate": 0.0005, "loss": 2.1097, "step": 216490 }, { "epoch": 0.8240524348560858, "grad_norm": 0.12601156532764435, "learning_rate": 0.0005, "loss": 2.0995, "step": 216500 }, { "epoch": 0.8240904973242085, "grad_norm": 0.13579390943050385, "learning_rate": 0.0005, "loss": 2.1066, "step": 216510 }, { "epoch": 0.8241285597923311, "grad_norm": 0.12577751278877258, "learning_rate": 0.0005, "loss": 2.1016, "step": 216520 }, { "epoch": 0.8241666222604539, "grad_norm": 0.1241578757762909, "learning_rate": 0.0005, "loss": 2.0974, "step": 216530 }, { "epoch": 0.8242046847285766, "grad_norm": 0.11784977465867996, "learning_rate": 0.0005, "loss": 2.1113, "step": 216540 }, { "epoch": 0.8242427471966992, "grad_norm": 0.12446720898151398, "learning_rate": 0.0005, "loss": 2.092, "step": 216550 }, { "epoch": 0.8242808096648219, "grad_norm": 0.10872805118560791, "learning_rate": 0.0005, "loss": 2.1139, "step": 216560 }, { "epoch": 0.8243188721329446, "grad_norm": 0.1375352293252945, "learning_rate": 0.0005, "loss": 2.1076, "step": 216570 }, { "epoch": 0.8243569346010673, "grad_norm": 0.12831570208072662, "learning_rate": 0.0005, "loss": 2.0909, "step": 216580 }, { "epoch": 0.82439499706919, "grad_norm": 0.11933586746454239, "learning_rate": 0.0005, "loss": 2.1133, "step": 216590 }, { "epoch": 0.8244330595373126, "grad_norm": 0.12159659713506699, "learning_rate": 0.0005, "loss": 2.1155, "step": 216600 }, { "epoch": 0.8244711220054354, "grad_norm": 0.1195387914776802, "learning_rate": 0.0005, "loss": 2.1075, "step": 216610 }, { "epoch": 0.824509184473558, "grad_norm": 0.11985363066196442, "learning_rate": 0.0005, "loss": 2.0944, "step": 216620 }, { "epoch": 0.8245472469416807, "grad_norm": 0.1207309141755104, "learning_rate": 0.0005, "loss": 2.0973, "step": 216630 }, { "epoch": 0.8245853094098033, "grad_norm": 0.1313052773475647, "learning_rate": 0.0005, "loss": 2.0922, "step": 216640 }, { "epoch": 0.824623371877926, "grad_norm": 0.11789082735776901, "learning_rate": 0.0005, "loss": 2.1116, "step": 216650 }, { "epoch": 0.8246614343460488, "grad_norm": 0.13953982293605804, "learning_rate": 0.0005, "loss": 2.1162, "step": 216660 }, { "epoch": 0.8246994968141714, "grad_norm": 0.12093924731016159, "learning_rate": 0.0005, "loss": 2.1066, "step": 216670 }, { "epoch": 0.8247375592822941, "grad_norm": 0.13901619613170624, "learning_rate": 0.0005, "loss": 2.0962, "step": 216680 }, { "epoch": 0.8247756217504167, "grad_norm": 0.1243644654750824, "learning_rate": 0.0005, "loss": 2.0967, "step": 216690 }, { "epoch": 0.8248136842185395, "grad_norm": 0.1316295564174652, "learning_rate": 0.0005, "loss": 2.1083, "step": 216700 }, { "epoch": 0.8248517466866622, "grad_norm": 0.1418789029121399, "learning_rate": 0.0005, "loss": 2.1037, "step": 216710 }, { "epoch": 0.8248898091547848, "grad_norm": 0.15817764401435852, "learning_rate": 0.0005, "loss": 2.0892, "step": 216720 }, { "epoch": 0.8249278716229075, "grad_norm": 0.12203866243362427, "learning_rate": 0.0005, "loss": 2.1177, "step": 216730 }, { "epoch": 0.8249659340910303, "grad_norm": 0.12293455749750137, "learning_rate": 0.0005, "loss": 2.1044, "step": 216740 }, { "epoch": 0.8250039965591529, "grad_norm": 0.12383758276700974, "learning_rate": 0.0005, "loss": 2.1078, "step": 216750 }, { "epoch": 0.8250420590272756, "grad_norm": 0.1415373533964157, "learning_rate": 0.0005, "loss": 2.1105, "step": 216760 }, { "epoch": 0.8250801214953982, "grad_norm": 0.13275422155857086, "learning_rate": 0.0005, "loss": 2.1146, "step": 216770 }, { "epoch": 0.8251181839635209, "grad_norm": 0.12756270170211792, "learning_rate": 0.0005, "loss": 2.1013, "step": 216780 }, { "epoch": 0.8251562464316436, "grad_norm": 0.12516839802265167, "learning_rate": 0.0005, "loss": 2.1246, "step": 216790 }, { "epoch": 0.8251943088997663, "grad_norm": 0.13058559596538544, "learning_rate": 0.0005, "loss": 2.1117, "step": 216800 }, { "epoch": 0.825232371367889, "grad_norm": 0.1205880343914032, "learning_rate": 0.0005, "loss": 2.1033, "step": 216810 }, { "epoch": 0.8252704338360116, "grad_norm": 0.12645703554153442, "learning_rate": 0.0005, "loss": 2.1086, "step": 216820 }, { "epoch": 0.8253084963041344, "grad_norm": 0.12271429598331451, "learning_rate": 0.0005, "loss": 2.0993, "step": 216830 }, { "epoch": 0.825346558772257, "grad_norm": 0.1316102296113968, "learning_rate": 0.0005, "loss": 2.1076, "step": 216840 }, { "epoch": 0.8253846212403797, "grad_norm": 0.13087411224842072, "learning_rate": 0.0005, "loss": 2.1139, "step": 216850 }, { "epoch": 0.8254226837085024, "grad_norm": 0.14741811156272888, "learning_rate": 0.0005, "loss": 2.1094, "step": 216860 }, { "epoch": 0.8254607461766251, "grad_norm": 0.13531188666820526, "learning_rate": 0.0005, "loss": 2.0898, "step": 216870 }, { "epoch": 0.8254988086447478, "grad_norm": 0.10500217229127884, "learning_rate": 0.0005, "loss": 2.1123, "step": 216880 }, { "epoch": 0.8255368711128704, "grad_norm": 0.13994759321212769, "learning_rate": 0.0005, "loss": 2.1135, "step": 216890 }, { "epoch": 0.8255749335809931, "grad_norm": 0.12008815258741379, "learning_rate": 0.0005, "loss": 2.1091, "step": 216900 }, { "epoch": 0.8256129960491158, "grad_norm": 0.12398341298103333, "learning_rate": 0.0005, "loss": 2.1063, "step": 216910 }, { "epoch": 0.8256510585172385, "grad_norm": 0.11703905463218689, "learning_rate": 0.0005, "loss": 2.0943, "step": 216920 }, { "epoch": 0.8256891209853612, "grad_norm": 0.12600445747375488, "learning_rate": 0.0005, "loss": 2.1101, "step": 216930 }, { "epoch": 0.8257271834534838, "grad_norm": 0.12901932001113892, "learning_rate": 0.0005, "loss": 2.1178, "step": 216940 }, { "epoch": 0.8257652459216065, "grad_norm": 0.12366965413093567, "learning_rate": 0.0005, "loss": 2.1084, "step": 216950 }, { "epoch": 0.8258033083897293, "grad_norm": 0.11896523088216782, "learning_rate": 0.0005, "loss": 2.1195, "step": 216960 }, { "epoch": 0.8258413708578519, "grad_norm": 0.1275978684425354, "learning_rate": 0.0005, "loss": 2.0942, "step": 216970 }, { "epoch": 0.8258794333259746, "grad_norm": 0.13192734122276306, "learning_rate": 0.0005, "loss": 2.0987, "step": 216980 }, { "epoch": 0.8259174957940972, "grad_norm": 0.12553296983242035, "learning_rate": 0.0005, "loss": 2.0984, "step": 216990 }, { "epoch": 0.82595555826222, "grad_norm": 0.1140354722738266, "learning_rate": 0.0005, "loss": 2.113, "step": 217000 }, { "epoch": 0.8259936207303427, "grad_norm": 0.12425722181797028, "learning_rate": 0.0005, "loss": 2.1064, "step": 217010 }, { "epoch": 0.8260316831984653, "grad_norm": 0.12796053290367126, "learning_rate": 0.0005, "loss": 2.0921, "step": 217020 }, { "epoch": 0.826069745666588, "grad_norm": 0.11599701642990112, "learning_rate": 0.0005, "loss": 2.1046, "step": 217030 }, { "epoch": 0.8261078081347107, "grad_norm": 0.12334559112787247, "learning_rate": 0.0005, "loss": 2.1049, "step": 217040 }, { "epoch": 0.8261458706028334, "grad_norm": 0.12353719025850296, "learning_rate": 0.0005, "loss": 2.0928, "step": 217050 }, { "epoch": 0.8261839330709561, "grad_norm": 0.13022439181804657, "learning_rate": 0.0005, "loss": 2.1052, "step": 217060 }, { "epoch": 0.8262219955390787, "grad_norm": 0.12294045090675354, "learning_rate": 0.0005, "loss": 2.1107, "step": 217070 }, { "epoch": 0.8262600580072014, "grad_norm": 0.1288580745458603, "learning_rate": 0.0005, "loss": 2.0962, "step": 217080 }, { "epoch": 0.8262981204753241, "grad_norm": 0.12367033958435059, "learning_rate": 0.0005, "loss": 2.1081, "step": 217090 }, { "epoch": 0.8263361829434468, "grad_norm": 0.12451356649398804, "learning_rate": 0.0005, "loss": 2.1001, "step": 217100 }, { "epoch": 0.8263742454115695, "grad_norm": 0.130989670753479, "learning_rate": 0.0005, "loss": 2.0946, "step": 217110 }, { "epoch": 0.8264123078796921, "grad_norm": 0.12526611983776093, "learning_rate": 0.0005, "loss": 2.0983, "step": 217120 }, { "epoch": 0.8264503703478149, "grad_norm": 0.12423757463693619, "learning_rate": 0.0005, "loss": 2.102, "step": 217130 }, { "epoch": 0.8264884328159375, "grad_norm": 0.12939000129699707, "learning_rate": 0.0005, "loss": 2.1074, "step": 217140 }, { "epoch": 0.8265264952840602, "grad_norm": 0.13361996412277222, "learning_rate": 0.0005, "loss": 2.1041, "step": 217150 }, { "epoch": 0.8265645577521828, "grad_norm": 0.13030046224594116, "learning_rate": 0.0005, "loss": 2.1121, "step": 217160 }, { "epoch": 0.8266026202203056, "grad_norm": 0.11973579227924347, "learning_rate": 0.0005, "loss": 2.0919, "step": 217170 }, { "epoch": 0.8266406826884283, "grad_norm": 0.13034309446811676, "learning_rate": 0.0005, "loss": 2.108, "step": 217180 }, { "epoch": 0.8266787451565509, "grad_norm": 0.12459006905555725, "learning_rate": 0.0005, "loss": 2.1065, "step": 217190 }, { "epoch": 0.8267168076246736, "grad_norm": 0.12662822008132935, "learning_rate": 0.0005, "loss": 2.0997, "step": 217200 }, { "epoch": 0.8267548700927962, "grad_norm": 0.1360860913991928, "learning_rate": 0.0005, "loss": 2.0941, "step": 217210 }, { "epoch": 0.826792932560919, "grad_norm": 0.12168723344802856, "learning_rate": 0.0005, "loss": 2.1102, "step": 217220 }, { "epoch": 0.8268309950290417, "grad_norm": 0.12574991583824158, "learning_rate": 0.0005, "loss": 2.11, "step": 217230 }, { "epoch": 0.8268690574971643, "grad_norm": 0.11954498291015625, "learning_rate": 0.0005, "loss": 2.1048, "step": 217240 }, { "epoch": 0.826907119965287, "grad_norm": 0.11757103353738785, "learning_rate": 0.0005, "loss": 2.1165, "step": 217250 }, { "epoch": 0.8269451824334098, "grad_norm": 0.11557068675756454, "learning_rate": 0.0005, "loss": 2.1001, "step": 217260 }, { "epoch": 0.8269832449015324, "grad_norm": 0.12298137694597244, "learning_rate": 0.0005, "loss": 2.0974, "step": 217270 }, { "epoch": 0.8270213073696551, "grad_norm": 0.1493585854768753, "learning_rate": 0.0005, "loss": 2.0894, "step": 217280 }, { "epoch": 0.8270593698377777, "grad_norm": 0.12169773876667023, "learning_rate": 0.0005, "loss": 2.0921, "step": 217290 }, { "epoch": 0.8270974323059005, "grad_norm": 0.11764802783727646, "learning_rate": 0.0005, "loss": 2.1047, "step": 217300 }, { "epoch": 0.8271354947740232, "grad_norm": 0.11365995556116104, "learning_rate": 0.0005, "loss": 2.115, "step": 217310 }, { "epoch": 0.8271735572421458, "grad_norm": 0.1227889209985733, "learning_rate": 0.0005, "loss": 2.1007, "step": 217320 }, { "epoch": 0.8272116197102685, "grad_norm": 0.12642797827720642, "learning_rate": 0.0005, "loss": 2.1157, "step": 217330 }, { "epoch": 0.8272496821783911, "grad_norm": 0.1310013085603714, "learning_rate": 0.0005, "loss": 2.1281, "step": 217340 }, { "epoch": 0.8272877446465139, "grad_norm": 0.12254031747579575, "learning_rate": 0.0005, "loss": 2.1057, "step": 217350 }, { "epoch": 0.8273258071146365, "grad_norm": 0.1295580267906189, "learning_rate": 0.0005, "loss": 2.1148, "step": 217360 }, { "epoch": 0.8273638695827592, "grad_norm": 0.1251552402973175, "learning_rate": 0.0005, "loss": 2.089, "step": 217370 }, { "epoch": 0.8274019320508819, "grad_norm": 0.13117572665214539, "learning_rate": 0.0005, "loss": 2.1193, "step": 217380 }, { "epoch": 0.8274399945190046, "grad_norm": 0.14766137301921844, "learning_rate": 0.0005, "loss": 2.1107, "step": 217390 }, { "epoch": 0.8274780569871273, "grad_norm": 0.1245618611574173, "learning_rate": 0.0005, "loss": 2.1042, "step": 217400 }, { "epoch": 0.8275161194552499, "grad_norm": 0.12521414458751678, "learning_rate": 0.0005, "loss": 2.1161, "step": 217410 }, { "epoch": 0.8275541819233726, "grad_norm": 0.1354859322309494, "learning_rate": 0.0005, "loss": 2.1084, "step": 217420 }, { "epoch": 0.8275922443914954, "grad_norm": 0.1282625049352646, "learning_rate": 0.0005, "loss": 2.0939, "step": 217430 }, { "epoch": 0.827630306859618, "grad_norm": 0.12341107428073883, "learning_rate": 0.0005, "loss": 2.0936, "step": 217440 }, { "epoch": 0.8276683693277407, "grad_norm": 0.12624134123325348, "learning_rate": 0.0005, "loss": 2.1109, "step": 217450 }, { "epoch": 0.8277064317958633, "grad_norm": 0.13044513761997223, "learning_rate": 0.0005, "loss": 2.0943, "step": 217460 }, { "epoch": 0.8277444942639861, "grad_norm": 0.11772848665714264, "learning_rate": 0.0005, "loss": 2.1177, "step": 217470 }, { "epoch": 0.8277825567321088, "grad_norm": 0.11918339878320694, "learning_rate": 0.0005, "loss": 2.1095, "step": 217480 }, { "epoch": 0.8278206192002314, "grad_norm": 0.11408302932977676, "learning_rate": 0.0005, "loss": 2.1149, "step": 217490 }, { "epoch": 0.8278586816683541, "grad_norm": 0.12150252610445023, "learning_rate": 0.0005, "loss": 2.0851, "step": 217500 }, { "epoch": 0.8278967441364767, "grad_norm": 0.12904538214206696, "learning_rate": 0.0005, "loss": 2.1171, "step": 217510 }, { "epoch": 0.8279348066045995, "grad_norm": 0.13957063853740692, "learning_rate": 0.0005, "loss": 2.0989, "step": 217520 }, { "epoch": 0.8279728690727222, "grad_norm": 0.1259831190109253, "learning_rate": 0.0005, "loss": 2.1069, "step": 217530 }, { "epoch": 0.8280109315408448, "grad_norm": 0.13127802312374115, "learning_rate": 0.0005, "loss": 2.13, "step": 217540 }, { "epoch": 0.8280489940089675, "grad_norm": 0.12426311522722244, "learning_rate": 0.0005, "loss": 2.1054, "step": 217550 }, { "epoch": 0.8280870564770902, "grad_norm": 0.116221584379673, "learning_rate": 0.0005, "loss": 2.0915, "step": 217560 }, { "epoch": 0.8281251189452129, "grad_norm": 0.1291026920080185, "learning_rate": 0.0005, "loss": 2.0989, "step": 217570 }, { "epoch": 0.8281631814133356, "grad_norm": 0.11487631499767303, "learning_rate": 0.0005, "loss": 2.0881, "step": 217580 }, { "epoch": 0.8282012438814582, "grad_norm": 0.11711179465055466, "learning_rate": 0.0005, "loss": 2.1096, "step": 217590 }, { "epoch": 0.828239306349581, "grad_norm": 0.132344588637352, "learning_rate": 0.0005, "loss": 2.1042, "step": 217600 }, { "epoch": 0.8282773688177036, "grad_norm": 0.13396376371383667, "learning_rate": 0.0005, "loss": 2.0892, "step": 217610 }, { "epoch": 0.8283154312858263, "grad_norm": 0.12230243533849716, "learning_rate": 0.0005, "loss": 2.0912, "step": 217620 }, { "epoch": 0.828353493753949, "grad_norm": 0.1329038143157959, "learning_rate": 0.0005, "loss": 2.0965, "step": 217630 }, { "epoch": 0.8283915562220716, "grad_norm": 0.12074906378984451, "learning_rate": 0.0005, "loss": 2.1046, "step": 217640 }, { "epoch": 0.8284296186901944, "grad_norm": 0.12414073944091797, "learning_rate": 0.0005, "loss": 2.1055, "step": 217650 }, { "epoch": 0.828467681158317, "grad_norm": 0.12632207572460175, "learning_rate": 0.0005, "loss": 2.1095, "step": 217660 }, { "epoch": 0.8285057436264397, "grad_norm": 0.12833178043365479, "learning_rate": 0.0005, "loss": 2.0941, "step": 217670 }, { "epoch": 0.8285438060945624, "grad_norm": 0.1254252791404724, "learning_rate": 0.0005, "loss": 2.108, "step": 217680 }, { "epoch": 0.8285818685626851, "grad_norm": 0.12022615224123001, "learning_rate": 0.0005, "loss": 2.0968, "step": 217690 }, { "epoch": 0.8286199310308078, "grad_norm": 0.121268130838871, "learning_rate": 0.0005, "loss": 2.1063, "step": 217700 }, { "epoch": 0.8286579934989304, "grad_norm": 0.12961791455745697, "learning_rate": 0.0005, "loss": 2.0942, "step": 217710 }, { "epoch": 0.8286960559670531, "grad_norm": 0.1238061934709549, "learning_rate": 0.0005, "loss": 2.1082, "step": 217720 }, { "epoch": 0.8287341184351759, "grad_norm": 0.12405844032764435, "learning_rate": 0.0005, "loss": 2.0892, "step": 217730 }, { "epoch": 0.8287721809032985, "grad_norm": 0.11987213045358658, "learning_rate": 0.0005, "loss": 2.1025, "step": 217740 }, { "epoch": 0.8288102433714212, "grad_norm": 0.1331670880317688, "learning_rate": 0.0005, "loss": 2.1062, "step": 217750 }, { "epoch": 0.8288483058395438, "grad_norm": 0.11618906259536743, "learning_rate": 0.0005, "loss": 2.1058, "step": 217760 }, { "epoch": 0.8288863683076665, "grad_norm": 0.12605607509613037, "learning_rate": 0.0005, "loss": 2.1053, "step": 217770 }, { "epoch": 0.8289244307757893, "grad_norm": 0.12620078027248383, "learning_rate": 0.0005, "loss": 2.1034, "step": 217780 }, { "epoch": 0.8289624932439119, "grad_norm": 0.12945833802223206, "learning_rate": 0.0005, "loss": 2.102, "step": 217790 }, { "epoch": 0.8290005557120346, "grad_norm": 0.13022899627685547, "learning_rate": 0.0005, "loss": 2.1014, "step": 217800 }, { "epoch": 0.8290386181801572, "grad_norm": 0.13662181794643402, "learning_rate": 0.0005, "loss": 2.1156, "step": 217810 }, { "epoch": 0.82907668064828, "grad_norm": 0.13192316889762878, "learning_rate": 0.0005, "loss": 2.0998, "step": 217820 }, { "epoch": 0.8291147431164027, "grad_norm": 0.13144901394844055, "learning_rate": 0.0005, "loss": 2.1106, "step": 217830 }, { "epoch": 0.8291528055845253, "grad_norm": 0.11243834346532822, "learning_rate": 0.0005, "loss": 2.0957, "step": 217840 }, { "epoch": 0.829190868052648, "grad_norm": 0.1198180615901947, "learning_rate": 0.0005, "loss": 2.0998, "step": 217850 }, { "epoch": 0.8292289305207707, "grad_norm": 0.11610034108161926, "learning_rate": 0.0005, "loss": 2.1096, "step": 217860 }, { "epoch": 0.8292669929888934, "grad_norm": 0.13124071061611176, "learning_rate": 0.0005, "loss": 2.1085, "step": 217870 }, { "epoch": 0.829305055457016, "grad_norm": 0.11900652945041656, "learning_rate": 0.0005, "loss": 2.0903, "step": 217880 }, { "epoch": 0.8293431179251387, "grad_norm": 0.13047805428504944, "learning_rate": 0.0005, "loss": 2.1041, "step": 217890 }, { "epoch": 0.8293811803932615, "grad_norm": 0.12463737279176712, "learning_rate": 0.0005, "loss": 2.0975, "step": 217900 }, { "epoch": 0.8294192428613841, "grad_norm": 0.12787851691246033, "learning_rate": 0.0005, "loss": 2.1005, "step": 217910 }, { "epoch": 0.8294573053295068, "grad_norm": 0.13656316697597504, "learning_rate": 0.0005, "loss": 2.0978, "step": 217920 }, { "epoch": 0.8294953677976294, "grad_norm": 0.1297297179698944, "learning_rate": 0.0005, "loss": 2.0995, "step": 217930 }, { "epoch": 0.8295334302657521, "grad_norm": 0.13846826553344727, "learning_rate": 0.0005, "loss": 2.0852, "step": 217940 }, { "epoch": 0.8295714927338749, "grad_norm": 0.1337054818868637, "learning_rate": 0.0005, "loss": 2.107, "step": 217950 }, { "epoch": 0.8296095552019975, "grad_norm": 0.11899015307426453, "learning_rate": 0.0005, "loss": 2.1029, "step": 217960 }, { "epoch": 0.8296476176701202, "grad_norm": 0.12474185228347778, "learning_rate": 0.0005, "loss": 2.1054, "step": 217970 }, { "epoch": 0.8296856801382428, "grad_norm": 0.12763509154319763, "learning_rate": 0.0005, "loss": 2.1057, "step": 217980 }, { "epoch": 0.8297237426063656, "grad_norm": 0.13220475614070892, "learning_rate": 0.0005, "loss": 2.103, "step": 217990 }, { "epoch": 0.8297618050744883, "grad_norm": 0.12977369129657745, "learning_rate": 0.0005, "loss": 2.1158, "step": 218000 }, { "epoch": 0.8297998675426109, "grad_norm": 0.13516859710216522, "learning_rate": 0.0005, "loss": 2.1115, "step": 218010 }, { "epoch": 0.8298379300107336, "grad_norm": 0.1203380823135376, "learning_rate": 0.0005, "loss": 2.1069, "step": 218020 }, { "epoch": 0.8298759924788564, "grad_norm": 0.11484825611114502, "learning_rate": 0.0005, "loss": 2.1073, "step": 218030 }, { "epoch": 0.829914054946979, "grad_norm": 0.13916315138339996, "learning_rate": 0.0005, "loss": 2.1181, "step": 218040 }, { "epoch": 0.8299521174151017, "grad_norm": 0.16566237807273865, "learning_rate": 0.0005, "loss": 2.1134, "step": 218050 }, { "epoch": 0.8299901798832243, "grad_norm": 0.1584080308675766, "learning_rate": 0.0005, "loss": 2.1044, "step": 218060 }, { "epoch": 0.830028242351347, "grad_norm": 0.15324938297271729, "learning_rate": 0.0005, "loss": 2.0958, "step": 218070 }, { "epoch": 0.8300663048194697, "grad_norm": 0.12328063696622849, "learning_rate": 0.0005, "loss": 2.1232, "step": 218080 }, { "epoch": 0.8301043672875924, "grad_norm": 0.1370043009519577, "learning_rate": 0.0005, "loss": 2.1065, "step": 218090 }, { "epoch": 0.8301424297557151, "grad_norm": 0.11810302734375, "learning_rate": 0.0005, "loss": 2.0956, "step": 218100 }, { "epoch": 0.8301804922238377, "grad_norm": 0.12439907342195511, "learning_rate": 0.0005, "loss": 2.0989, "step": 218110 }, { "epoch": 0.8302185546919605, "grad_norm": 0.12648242712020874, "learning_rate": 0.0005, "loss": 2.091, "step": 218120 }, { "epoch": 0.8302566171600831, "grad_norm": 0.13632889091968536, "learning_rate": 0.0005, "loss": 2.1014, "step": 218130 }, { "epoch": 0.8302946796282058, "grad_norm": 0.13036006689071655, "learning_rate": 0.0005, "loss": 2.1034, "step": 218140 }, { "epoch": 0.8303327420963285, "grad_norm": 0.12066423892974854, "learning_rate": 0.0005, "loss": 2.1149, "step": 218150 }, { "epoch": 0.8303708045644512, "grad_norm": 0.138320654630661, "learning_rate": 0.0005, "loss": 2.1074, "step": 218160 }, { "epoch": 0.8304088670325739, "grad_norm": 0.12363722175359726, "learning_rate": 0.0005, "loss": 2.0952, "step": 218170 }, { "epoch": 0.8304469295006965, "grad_norm": 0.13549621403217316, "learning_rate": 0.0005, "loss": 2.0913, "step": 218180 }, { "epoch": 0.8304849919688192, "grad_norm": 0.1275758296251297, "learning_rate": 0.0005, "loss": 2.102, "step": 218190 }, { "epoch": 0.8305230544369419, "grad_norm": 0.1225966215133667, "learning_rate": 0.0005, "loss": 2.1204, "step": 218200 }, { "epoch": 0.8305611169050646, "grad_norm": 0.11592172086238861, "learning_rate": 0.0005, "loss": 2.077, "step": 218210 }, { "epoch": 0.8305991793731873, "grad_norm": 0.12450257688760757, "learning_rate": 0.0005, "loss": 2.0964, "step": 218220 }, { "epoch": 0.8306372418413099, "grad_norm": 0.20624670386314392, "learning_rate": 0.0005, "loss": 2.0992, "step": 218230 }, { "epoch": 0.8306753043094326, "grad_norm": 0.12865829467773438, "learning_rate": 0.0005, "loss": 2.1099, "step": 218240 }, { "epoch": 0.8307133667775554, "grad_norm": 0.12566210329532623, "learning_rate": 0.0005, "loss": 2.1078, "step": 218250 }, { "epoch": 0.830751429245678, "grad_norm": 0.12799441814422607, "learning_rate": 0.0005, "loss": 2.1186, "step": 218260 }, { "epoch": 0.8307894917138007, "grad_norm": 0.12885446846485138, "learning_rate": 0.0005, "loss": 2.116, "step": 218270 }, { "epoch": 0.8308275541819233, "grad_norm": 0.13233810663223267, "learning_rate": 0.0005, "loss": 2.1006, "step": 218280 }, { "epoch": 0.8308656166500461, "grad_norm": 0.11711487174034119, "learning_rate": 0.0005, "loss": 2.1029, "step": 218290 }, { "epoch": 0.8309036791181688, "grad_norm": 0.11240831017494202, "learning_rate": 0.0005, "loss": 2.1053, "step": 218300 }, { "epoch": 0.8309417415862914, "grad_norm": 0.12619911134243011, "learning_rate": 0.0005, "loss": 2.1025, "step": 218310 }, { "epoch": 0.8309798040544141, "grad_norm": 0.12383470684289932, "learning_rate": 0.0005, "loss": 2.1005, "step": 218320 }, { "epoch": 0.8310178665225368, "grad_norm": 0.12004560232162476, "learning_rate": 0.0005, "loss": 2.1145, "step": 218330 }, { "epoch": 0.8310559289906595, "grad_norm": 0.12753629684448242, "learning_rate": 0.0005, "loss": 2.109, "step": 218340 }, { "epoch": 0.8310939914587822, "grad_norm": 0.11608237028121948, "learning_rate": 0.0005, "loss": 2.1084, "step": 218350 }, { "epoch": 0.8311320539269048, "grad_norm": 0.1277332901954651, "learning_rate": 0.0005, "loss": 2.1096, "step": 218360 }, { "epoch": 0.8311701163950275, "grad_norm": 0.14176727831363678, "learning_rate": 0.0005, "loss": 2.114, "step": 218370 }, { "epoch": 0.8312081788631502, "grad_norm": 0.12776508927345276, "learning_rate": 0.0005, "loss": 2.094, "step": 218380 }, { "epoch": 0.8312462413312729, "grad_norm": 0.12426704168319702, "learning_rate": 0.0005, "loss": 2.109, "step": 218390 }, { "epoch": 0.8312843037993956, "grad_norm": 0.12347155809402466, "learning_rate": 0.0005, "loss": 2.1119, "step": 218400 }, { "epoch": 0.8313223662675182, "grad_norm": 0.12837913632392883, "learning_rate": 0.0005, "loss": 2.1072, "step": 218410 }, { "epoch": 0.831360428735641, "grad_norm": 0.12590497732162476, "learning_rate": 0.0005, "loss": 2.1181, "step": 218420 }, { "epoch": 0.8313984912037636, "grad_norm": 0.11684665083885193, "learning_rate": 0.0005, "loss": 2.0926, "step": 218430 }, { "epoch": 0.8314365536718863, "grad_norm": 0.1202918216586113, "learning_rate": 0.0005, "loss": 2.0988, "step": 218440 }, { "epoch": 0.831474616140009, "grad_norm": 0.12684597074985504, "learning_rate": 0.0005, "loss": 2.1082, "step": 218450 }, { "epoch": 0.8315126786081317, "grad_norm": 0.11629930138587952, "learning_rate": 0.0005, "loss": 2.1078, "step": 218460 }, { "epoch": 0.8315507410762544, "grad_norm": 0.1290973573923111, "learning_rate": 0.0005, "loss": 2.1058, "step": 218470 }, { "epoch": 0.831588803544377, "grad_norm": 0.11917988955974579, "learning_rate": 0.0005, "loss": 2.0996, "step": 218480 }, { "epoch": 0.8316268660124997, "grad_norm": 0.11952853202819824, "learning_rate": 0.0005, "loss": 2.1002, "step": 218490 }, { "epoch": 0.8316649284806223, "grad_norm": 0.1278664916753769, "learning_rate": 0.0005, "loss": 2.1002, "step": 218500 }, { "epoch": 0.8317029909487451, "grad_norm": 0.12799476087093353, "learning_rate": 0.0005, "loss": 2.1113, "step": 218510 }, { "epoch": 0.8317410534168678, "grad_norm": 0.17281503975391388, "learning_rate": 0.0005, "loss": 2.1015, "step": 218520 }, { "epoch": 0.8317791158849904, "grad_norm": 0.13788138329982758, "learning_rate": 0.0005, "loss": 2.0996, "step": 218530 }, { "epoch": 0.8318171783531131, "grad_norm": 0.12696509063243866, "learning_rate": 0.0005, "loss": 2.0985, "step": 218540 }, { "epoch": 0.8318552408212359, "grad_norm": 0.12793564796447754, "learning_rate": 0.0005, "loss": 2.1045, "step": 218550 }, { "epoch": 0.8318933032893585, "grad_norm": 0.2812157869338989, "learning_rate": 0.0005, "loss": 2.1114, "step": 218560 }, { "epoch": 0.8319313657574812, "grad_norm": 0.11583702266216278, "learning_rate": 0.0005, "loss": 2.1098, "step": 218570 }, { "epoch": 0.8319694282256038, "grad_norm": 0.12513552606105804, "learning_rate": 0.0005, "loss": 2.1036, "step": 218580 }, { "epoch": 0.8320074906937266, "grad_norm": 0.12092099338769913, "learning_rate": 0.0005, "loss": 2.1183, "step": 218590 }, { "epoch": 0.8320455531618492, "grad_norm": 0.14139620959758759, "learning_rate": 0.0005, "loss": 2.105, "step": 218600 }, { "epoch": 0.8320836156299719, "grad_norm": 0.12599727511405945, "learning_rate": 0.0005, "loss": 2.1049, "step": 218610 }, { "epoch": 0.8321216780980946, "grad_norm": 0.11518718302249908, "learning_rate": 0.0005, "loss": 2.0944, "step": 218620 }, { "epoch": 0.8321597405662172, "grad_norm": 0.1322387456893921, "learning_rate": 0.0005, "loss": 2.0976, "step": 218630 }, { "epoch": 0.83219780303434, "grad_norm": 0.11819644272327423, "learning_rate": 0.0005, "loss": 2.1002, "step": 218640 }, { "epoch": 0.8322358655024626, "grad_norm": 0.13278405368328094, "learning_rate": 0.0005, "loss": 2.1039, "step": 218650 }, { "epoch": 0.8322739279705853, "grad_norm": 0.1323469579219818, "learning_rate": 0.0005, "loss": 2.1064, "step": 218660 }, { "epoch": 0.832311990438708, "grad_norm": 0.11011801660060883, "learning_rate": 0.0005, "loss": 2.1081, "step": 218670 }, { "epoch": 0.8323500529068307, "grad_norm": 0.11899255216121674, "learning_rate": 0.0005, "loss": 2.1039, "step": 218680 }, { "epoch": 0.8323881153749534, "grad_norm": 0.12844981253147125, "learning_rate": 0.0005, "loss": 2.1123, "step": 218690 }, { "epoch": 0.832426177843076, "grad_norm": 0.13125044107437134, "learning_rate": 0.0005, "loss": 2.1183, "step": 218700 }, { "epoch": 0.8324642403111987, "grad_norm": 0.13138504326343536, "learning_rate": 0.0005, "loss": 2.0873, "step": 218710 }, { "epoch": 0.8325023027793215, "grad_norm": 0.15032434463500977, "learning_rate": 0.0005, "loss": 2.102, "step": 218720 }, { "epoch": 0.8325403652474441, "grad_norm": 0.12644049525260925, "learning_rate": 0.0005, "loss": 2.1013, "step": 218730 }, { "epoch": 0.8325784277155668, "grad_norm": 0.11857806891202927, "learning_rate": 0.0005, "loss": 2.1057, "step": 218740 }, { "epoch": 0.8326164901836894, "grad_norm": 0.11589282751083374, "learning_rate": 0.0005, "loss": 2.1131, "step": 218750 }, { "epoch": 0.8326545526518122, "grad_norm": 0.1254875361919403, "learning_rate": 0.0005, "loss": 2.1153, "step": 218760 }, { "epoch": 0.8326926151199349, "grad_norm": 0.15716472268104553, "learning_rate": 0.0005, "loss": 2.1039, "step": 218770 }, { "epoch": 0.8327306775880575, "grad_norm": 0.14979439973831177, "learning_rate": 0.0005, "loss": 2.1116, "step": 218780 }, { "epoch": 0.8327687400561802, "grad_norm": 0.12744982540607452, "learning_rate": 0.0005, "loss": 2.104, "step": 218790 }, { "epoch": 0.8328068025243028, "grad_norm": 0.1272817850112915, "learning_rate": 0.0005, "loss": 2.0867, "step": 218800 }, { "epoch": 0.8328448649924256, "grad_norm": 0.12373799085617065, "learning_rate": 0.0005, "loss": 2.0968, "step": 218810 }, { "epoch": 0.8328829274605483, "grad_norm": 0.12223444879055023, "learning_rate": 0.0005, "loss": 2.1175, "step": 218820 }, { "epoch": 0.8329209899286709, "grad_norm": 0.12802813947200775, "learning_rate": 0.0005, "loss": 2.0967, "step": 218830 }, { "epoch": 0.8329590523967936, "grad_norm": 0.12781710922718048, "learning_rate": 0.0005, "loss": 2.1119, "step": 218840 }, { "epoch": 0.8329971148649163, "grad_norm": 0.12687340378761292, "learning_rate": 0.0005, "loss": 2.0979, "step": 218850 }, { "epoch": 0.833035177333039, "grad_norm": 0.12185090035200119, "learning_rate": 0.0005, "loss": 2.1146, "step": 218860 }, { "epoch": 0.8330732398011617, "grad_norm": 0.1304478794336319, "learning_rate": 0.0005, "loss": 2.1107, "step": 218870 }, { "epoch": 0.8331113022692843, "grad_norm": 0.13264945149421692, "learning_rate": 0.0005, "loss": 2.1089, "step": 218880 }, { "epoch": 0.8331493647374071, "grad_norm": 0.1238386332988739, "learning_rate": 0.0005, "loss": 2.1114, "step": 218890 }, { "epoch": 0.8331874272055297, "grad_norm": 0.12317591905593872, "learning_rate": 0.0005, "loss": 2.0945, "step": 218900 }, { "epoch": 0.8332254896736524, "grad_norm": 0.12547940015792847, "learning_rate": 0.0005, "loss": 2.0991, "step": 218910 }, { "epoch": 0.833263552141775, "grad_norm": 0.14949534833431244, "learning_rate": 0.0005, "loss": 2.1043, "step": 218920 }, { "epoch": 0.8333016146098977, "grad_norm": 0.13050684332847595, "learning_rate": 0.0005, "loss": 2.1088, "step": 218930 }, { "epoch": 0.8333396770780205, "grad_norm": 0.12255212664604187, "learning_rate": 0.0005, "loss": 2.1293, "step": 218940 }, { "epoch": 0.8333777395461431, "grad_norm": 0.1258205771446228, "learning_rate": 0.0005, "loss": 2.1096, "step": 218950 }, { "epoch": 0.8334158020142658, "grad_norm": 0.11713549494743347, "learning_rate": 0.0005, "loss": 2.1059, "step": 218960 }, { "epoch": 0.8334538644823885, "grad_norm": 0.12944845855236053, "learning_rate": 0.0005, "loss": 2.121, "step": 218970 }, { "epoch": 0.8334919269505112, "grad_norm": 0.13849878311157227, "learning_rate": 0.0005, "loss": 2.1109, "step": 218980 }, { "epoch": 0.8335299894186339, "grad_norm": 0.13154521584510803, "learning_rate": 0.0005, "loss": 2.1168, "step": 218990 }, { "epoch": 0.8335680518867565, "grad_norm": 0.12196949869394302, "learning_rate": 0.0005, "loss": 2.0972, "step": 219000 }, { "epoch": 0.8336061143548792, "grad_norm": 0.12521928548812866, "learning_rate": 0.0005, "loss": 2.1051, "step": 219010 }, { "epoch": 0.833644176823002, "grad_norm": 0.12222328037023544, "learning_rate": 0.0005, "loss": 2.0975, "step": 219020 }, { "epoch": 0.8336822392911246, "grad_norm": 0.12767820060253143, "learning_rate": 0.0005, "loss": 2.0984, "step": 219030 }, { "epoch": 0.8337203017592473, "grad_norm": 0.15104414522647858, "learning_rate": 0.0005, "loss": 2.1056, "step": 219040 }, { "epoch": 0.8337583642273699, "grad_norm": 0.14058572053909302, "learning_rate": 0.0005, "loss": 2.0939, "step": 219050 }, { "epoch": 0.8337964266954926, "grad_norm": 0.18238197267055511, "learning_rate": 0.0005, "loss": 2.1265, "step": 219060 }, { "epoch": 0.8338344891636154, "grad_norm": 0.12206801027059555, "learning_rate": 0.0005, "loss": 2.1118, "step": 219070 }, { "epoch": 0.833872551631738, "grad_norm": 0.14061588048934937, "learning_rate": 0.0005, "loss": 2.1037, "step": 219080 }, { "epoch": 0.8339106140998607, "grad_norm": 0.13219532370567322, "learning_rate": 0.0005, "loss": 2.1039, "step": 219090 }, { "epoch": 0.8339486765679833, "grad_norm": 0.13353204727172852, "learning_rate": 0.0005, "loss": 2.0939, "step": 219100 }, { "epoch": 0.8339867390361061, "grad_norm": 0.12455740571022034, "learning_rate": 0.0005, "loss": 2.1311, "step": 219110 }, { "epoch": 0.8340248015042288, "grad_norm": 0.12448125332593918, "learning_rate": 0.0005, "loss": 2.1156, "step": 219120 }, { "epoch": 0.8340628639723514, "grad_norm": 0.12801167368888855, "learning_rate": 0.0005, "loss": 2.1109, "step": 219130 }, { "epoch": 0.8341009264404741, "grad_norm": 0.12430154532194138, "learning_rate": 0.0005, "loss": 2.1097, "step": 219140 }, { "epoch": 0.8341389889085968, "grad_norm": 0.13335710763931274, "learning_rate": 0.0005, "loss": 2.0941, "step": 219150 }, { "epoch": 0.8341770513767195, "grad_norm": 0.12447947263717651, "learning_rate": 0.0005, "loss": 2.0965, "step": 219160 }, { "epoch": 0.8342151138448421, "grad_norm": 0.1202947348356247, "learning_rate": 0.0005, "loss": 2.0845, "step": 219170 }, { "epoch": 0.8342531763129648, "grad_norm": 0.12497258931398392, "learning_rate": 0.0005, "loss": 2.1016, "step": 219180 }, { "epoch": 0.8342912387810876, "grad_norm": 0.12731964886188507, "learning_rate": 0.0005, "loss": 2.1064, "step": 219190 }, { "epoch": 0.8343293012492102, "grad_norm": 0.11766664683818817, "learning_rate": 0.0005, "loss": 2.0959, "step": 219200 }, { "epoch": 0.8343673637173329, "grad_norm": 0.11760242283344269, "learning_rate": 0.0005, "loss": 2.1047, "step": 219210 }, { "epoch": 0.8344054261854555, "grad_norm": 0.12806595861911774, "learning_rate": 0.0005, "loss": 2.1006, "step": 219220 }, { "epoch": 0.8344434886535782, "grad_norm": 0.1269293874502182, "learning_rate": 0.0005, "loss": 2.1118, "step": 219230 }, { "epoch": 0.834481551121701, "grad_norm": 0.11925157159566879, "learning_rate": 0.0005, "loss": 2.0955, "step": 219240 }, { "epoch": 0.8345196135898236, "grad_norm": 0.14136989414691925, "learning_rate": 0.0005, "loss": 2.1119, "step": 219250 }, { "epoch": 0.8345576760579463, "grad_norm": 0.12632125616073608, "learning_rate": 0.0005, "loss": 2.1065, "step": 219260 }, { "epoch": 0.8345957385260689, "grad_norm": 0.12249311804771423, "learning_rate": 0.0005, "loss": 2.1042, "step": 219270 }, { "epoch": 0.8346338009941917, "grad_norm": 0.13576222956180573, "learning_rate": 0.0005, "loss": 2.1111, "step": 219280 }, { "epoch": 0.8346718634623144, "grad_norm": 0.35710614919662476, "learning_rate": 0.0005, "loss": 2.1012, "step": 219290 }, { "epoch": 0.834709925930437, "grad_norm": 0.11858486384153366, "learning_rate": 0.0005, "loss": 2.105, "step": 219300 }, { "epoch": 0.8347479883985597, "grad_norm": 0.11976990848779678, "learning_rate": 0.0005, "loss": 2.0948, "step": 219310 }, { "epoch": 0.8347860508666824, "grad_norm": 0.12474948912858963, "learning_rate": 0.0005, "loss": 2.0997, "step": 219320 }, { "epoch": 0.8348241133348051, "grad_norm": 0.1316811442375183, "learning_rate": 0.0005, "loss": 2.1088, "step": 219330 }, { "epoch": 0.8348621758029278, "grad_norm": 0.11638589948415756, "learning_rate": 0.0005, "loss": 2.104, "step": 219340 }, { "epoch": 0.8349002382710504, "grad_norm": 0.12653575837612152, "learning_rate": 0.0005, "loss": 2.1065, "step": 219350 }, { "epoch": 0.8349383007391731, "grad_norm": 0.1143994927406311, "learning_rate": 0.0005, "loss": 2.124, "step": 219360 }, { "epoch": 0.8349763632072958, "grad_norm": 0.1304251253604889, "learning_rate": 0.0005, "loss": 2.1199, "step": 219370 }, { "epoch": 0.8350144256754185, "grad_norm": 0.14210690557956696, "learning_rate": 0.0005, "loss": 2.1025, "step": 219380 }, { "epoch": 0.8350524881435412, "grad_norm": 0.1346653550863266, "learning_rate": 0.0005, "loss": 2.1026, "step": 219390 }, { "epoch": 0.8350905506116638, "grad_norm": 0.12486615777015686, "learning_rate": 0.0005, "loss": 2.1192, "step": 219400 }, { "epoch": 0.8351286130797866, "grad_norm": 0.1316107213497162, "learning_rate": 0.0005, "loss": 2.0899, "step": 219410 }, { "epoch": 0.8351666755479092, "grad_norm": 0.12303043156862259, "learning_rate": 0.0005, "loss": 2.1059, "step": 219420 }, { "epoch": 0.8352047380160319, "grad_norm": 0.17756234109401703, "learning_rate": 0.0005, "loss": 2.0772, "step": 219430 }, { "epoch": 0.8352428004841546, "grad_norm": 0.11724088340997696, "learning_rate": 0.0005, "loss": 2.1004, "step": 219440 }, { "epoch": 0.8352808629522773, "grad_norm": 0.1304939091205597, "learning_rate": 0.0005, "loss": 2.1108, "step": 219450 }, { "epoch": 0.8353189254204, "grad_norm": 0.13139232993125916, "learning_rate": 0.0005, "loss": 2.1032, "step": 219460 }, { "epoch": 0.8353569878885226, "grad_norm": 0.14084558188915253, "learning_rate": 0.0005, "loss": 2.124, "step": 219470 }, { "epoch": 0.8353950503566453, "grad_norm": 0.12776871025562286, "learning_rate": 0.0005, "loss": 2.1001, "step": 219480 }, { "epoch": 0.835433112824768, "grad_norm": 0.12956500053405762, "learning_rate": 0.0005, "loss": 2.1209, "step": 219490 }, { "epoch": 0.8354711752928907, "grad_norm": 0.12888874113559723, "learning_rate": 0.0005, "loss": 2.1038, "step": 219500 }, { "epoch": 0.8355092377610134, "grad_norm": 0.1196303591132164, "learning_rate": 0.0005, "loss": 2.097, "step": 219510 }, { "epoch": 0.835547300229136, "grad_norm": 0.1274517923593521, "learning_rate": 0.0005, "loss": 2.1177, "step": 219520 }, { "epoch": 0.8355853626972587, "grad_norm": 0.12661273777484894, "learning_rate": 0.0005, "loss": 2.1104, "step": 219530 }, { "epoch": 0.8356234251653815, "grad_norm": 0.11761227250099182, "learning_rate": 0.0005, "loss": 2.1167, "step": 219540 }, { "epoch": 0.8356614876335041, "grad_norm": 0.1256171017885208, "learning_rate": 0.0005, "loss": 2.1125, "step": 219550 }, { "epoch": 0.8356995501016268, "grad_norm": 0.11300463229417801, "learning_rate": 0.0005, "loss": 2.0933, "step": 219560 }, { "epoch": 0.8357376125697494, "grad_norm": 0.11957667022943497, "learning_rate": 0.0005, "loss": 2.1069, "step": 219570 }, { "epoch": 0.8357756750378722, "grad_norm": 0.11883150041103363, "learning_rate": 0.0005, "loss": 2.1143, "step": 219580 }, { "epoch": 0.8358137375059949, "grad_norm": 0.13056206703186035, "learning_rate": 0.0005, "loss": 2.1063, "step": 219590 }, { "epoch": 0.8358517999741175, "grad_norm": 0.13161806762218475, "learning_rate": 0.0005, "loss": 2.0964, "step": 219600 }, { "epoch": 0.8358898624422402, "grad_norm": 0.4031476378440857, "learning_rate": 0.0005, "loss": 2.1017, "step": 219610 }, { "epoch": 0.8359279249103629, "grad_norm": 0.14822180569171906, "learning_rate": 0.0005, "loss": 2.0922, "step": 219620 }, { "epoch": 0.8359659873784856, "grad_norm": 0.12903235852718353, "learning_rate": 0.0005, "loss": 2.1052, "step": 219630 }, { "epoch": 0.8360040498466083, "grad_norm": 0.12346906960010529, "learning_rate": 0.0005, "loss": 2.1067, "step": 219640 }, { "epoch": 0.8360421123147309, "grad_norm": 0.127950057387352, "learning_rate": 0.0005, "loss": 2.1226, "step": 219650 }, { "epoch": 0.8360801747828536, "grad_norm": 0.12633047997951508, "learning_rate": 0.0005, "loss": 2.1032, "step": 219660 }, { "epoch": 0.8361182372509763, "grad_norm": 0.1212027296423912, "learning_rate": 0.0005, "loss": 2.1264, "step": 219670 }, { "epoch": 0.836156299719099, "grad_norm": 0.11412563174962997, "learning_rate": 0.0005, "loss": 2.1064, "step": 219680 }, { "epoch": 0.8361943621872217, "grad_norm": 0.13156822323799133, "learning_rate": 0.0005, "loss": 2.1107, "step": 219690 }, { "epoch": 0.8362324246553443, "grad_norm": 0.11729413270950317, "learning_rate": 0.0005, "loss": 2.102, "step": 219700 }, { "epoch": 0.8362704871234671, "grad_norm": 0.12015419453382492, "learning_rate": 0.0005, "loss": 2.1026, "step": 219710 }, { "epoch": 0.8363085495915897, "grad_norm": 0.11273893713951111, "learning_rate": 0.0005, "loss": 2.1125, "step": 219720 }, { "epoch": 0.8363466120597124, "grad_norm": 0.1223645731806755, "learning_rate": 0.0005, "loss": 2.1129, "step": 219730 }, { "epoch": 0.836384674527835, "grad_norm": 0.15054258704185486, "learning_rate": 0.0005, "loss": 2.1041, "step": 219740 }, { "epoch": 0.8364227369959578, "grad_norm": 0.11842544376850128, "learning_rate": 0.0005, "loss": 2.0929, "step": 219750 }, { "epoch": 0.8364607994640805, "grad_norm": 0.12672503292560577, "learning_rate": 0.0005, "loss": 2.1007, "step": 219760 }, { "epoch": 0.8364988619322031, "grad_norm": 0.1286192536354065, "learning_rate": 0.0005, "loss": 2.1, "step": 219770 }, { "epoch": 0.8365369244003258, "grad_norm": 0.12729696929454803, "learning_rate": 0.0005, "loss": 2.1043, "step": 219780 }, { "epoch": 0.8365749868684484, "grad_norm": 0.13377317786216736, "learning_rate": 0.0005, "loss": 2.0995, "step": 219790 }, { "epoch": 0.8366130493365712, "grad_norm": 0.12418297678232193, "learning_rate": 0.0005, "loss": 2.1228, "step": 219800 }, { "epoch": 0.8366511118046939, "grad_norm": 0.12871158123016357, "learning_rate": 0.0005, "loss": 2.112, "step": 219810 }, { "epoch": 0.8366891742728165, "grad_norm": 0.12255019694566727, "learning_rate": 0.0005, "loss": 2.0834, "step": 219820 }, { "epoch": 0.8367272367409392, "grad_norm": 0.12640362977981567, "learning_rate": 0.0005, "loss": 2.0994, "step": 219830 }, { "epoch": 0.836765299209062, "grad_norm": 0.13552077114582062, "learning_rate": 0.0005, "loss": 2.1107, "step": 219840 }, { "epoch": 0.8368033616771846, "grad_norm": 0.11782406270503998, "learning_rate": 0.0005, "loss": 2.1075, "step": 219850 }, { "epoch": 0.8368414241453073, "grad_norm": 0.13368447124958038, "learning_rate": 0.0005, "loss": 2.1129, "step": 219860 }, { "epoch": 0.8368794866134299, "grad_norm": 0.13141457736492157, "learning_rate": 0.0005, "loss": 2.0907, "step": 219870 }, { "epoch": 0.8369175490815527, "grad_norm": 0.12528492510318756, "learning_rate": 0.0005, "loss": 2.1182, "step": 219880 }, { "epoch": 0.8369556115496753, "grad_norm": 0.1313919723033905, "learning_rate": 0.0005, "loss": 2.0997, "step": 219890 }, { "epoch": 0.836993674017798, "grad_norm": 0.1197948232293129, "learning_rate": 0.0005, "loss": 2.0952, "step": 219900 }, { "epoch": 0.8370317364859207, "grad_norm": 0.12959925830364227, "learning_rate": 0.0005, "loss": 2.1029, "step": 219910 }, { "epoch": 0.8370697989540434, "grad_norm": 0.12254883348941803, "learning_rate": 0.0005, "loss": 2.1005, "step": 219920 }, { "epoch": 0.8371078614221661, "grad_norm": 0.12791576981544495, "learning_rate": 0.0005, "loss": 2.1126, "step": 219930 }, { "epoch": 0.8371459238902887, "grad_norm": 0.12873512506484985, "learning_rate": 0.0005, "loss": 2.0948, "step": 219940 }, { "epoch": 0.8371839863584114, "grad_norm": 0.12622743844985962, "learning_rate": 0.0005, "loss": 2.0954, "step": 219950 }, { "epoch": 0.8372220488265341, "grad_norm": 0.1307528167963028, "learning_rate": 0.0005, "loss": 2.108, "step": 219960 }, { "epoch": 0.8372601112946568, "grad_norm": 0.11617814004421234, "learning_rate": 0.0005, "loss": 2.0998, "step": 219970 }, { "epoch": 0.8372981737627795, "grad_norm": 0.12317516654729843, "learning_rate": 0.0005, "loss": 2.1019, "step": 219980 }, { "epoch": 0.8373362362309021, "grad_norm": 0.11890271306037903, "learning_rate": 0.0005, "loss": 2.0985, "step": 219990 }, { "epoch": 0.8373742986990248, "grad_norm": 0.1206265315413475, "learning_rate": 0.0005, "loss": 2.0789, "step": 220000 }, { "epoch": 0.8374123611671476, "grad_norm": 0.12651634216308594, "learning_rate": 0.0005, "loss": 2.0913, "step": 220010 }, { "epoch": 0.8374504236352702, "grad_norm": 0.11712469160556793, "learning_rate": 0.0005, "loss": 2.1054, "step": 220020 }, { "epoch": 0.8374884861033929, "grad_norm": 0.1293676495552063, "learning_rate": 0.0005, "loss": 2.0991, "step": 220030 }, { "epoch": 0.8375265485715155, "grad_norm": 0.13680680096149445, "learning_rate": 0.0005, "loss": 2.0967, "step": 220040 }, { "epoch": 0.8375646110396383, "grad_norm": 0.11812064796686172, "learning_rate": 0.0005, "loss": 2.0852, "step": 220050 }, { "epoch": 0.837602673507761, "grad_norm": 0.12090223282575607, "learning_rate": 0.0005, "loss": 2.1004, "step": 220060 }, { "epoch": 0.8376407359758836, "grad_norm": 0.13474848866462708, "learning_rate": 0.0005, "loss": 2.0967, "step": 220070 }, { "epoch": 0.8376787984440063, "grad_norm": 0.13077902793884277, "learning_rate": 0.0005, "loss": 2.107, "step": 220080 }, { "epoch": 0.8377168609121289, "grad_norm": 0.13111700117588043, "learning_rate": 0.0005, "loss": 2.1019, "step": 220090 }, { "epoch": 0.8377549233802517, "grad_norm": 0.13522939383983612, "learning_rate": 0.0005, "loss": 2.0968, "step": 220100 }, { "epoch": 0.8377929858483744, "grad_norm": 0.11386032402515411, "learning_rate": 0.0005, "loss": 2.1175, "step": 220110 }, { "epoch": 0.837831048316497, "grad_norm": 0.12607534229755402, "learning_rate": 0.0005, "loss": 2.1006, "step": 220120 }, { "epoch": 0.8378691107846197, "grad_norm": 0.12744760513305664, "learning_rate": 0.0005, "loss": 2.1114, "step": 220130 }, { "epoch": 0.8379071732527424, "grad_norm": 0.12559397518634796, "learning_rate": 0.0005, "loss": 2.1006, "step": 220140 }, { "epoch": 0.8379452357208651, "grad_norm": 0.1274642050266266, "learning_rate": 0.0005, "loss": 2.1024, "step": 220150 }, { "epoch": 0.8379832981889878, "grad_norm": 0.12040489912033081, "learning_rate": 0.0005, "loss": 2.1054, "step": 220160 }, { "epoch": 0.8380213606571104, "grad_norm": 0.1237734705209732, "learning_rate": 0.0005, "loss": 2.1082, "step": 220170 }, { "epoch": 0.8380594231252332, "grad_norm": 0.13057562708854675, "learning_rate": 0.0005, "loss": 2.103, "step": 220180 }, { "epoch": 0.8380974855933558, "grad_norm": 0.1266573965549469, "learning_rate": 0.0005, "loss": 2.0954, "step": 220190 }, { "epoch": 0.8381355480614785, "grad_norm": 0.13707292079925537, "learning_rate": 0.0005, "loss": 2.1011, "step": 220200 }, { "epoch": 0.8381736105296012, "grad_norm": 0.12204498797655106, "learning_rate": 0.0005, "loss": 2.1099, "step": 220210 }, { "epoch": 0.8382116729977238, "grad_norm": 0.1297737956047058, "learning_rate": 0.0005, "loss": 2.102, "step": 220220 }, { "epoch": 0.8382497354658466, "grad_norm": 0.11896966397762299, "learning_rate": 0.0005, "loss": 2.097, "step": 220230 }, { "epoch": 0.8382877979339692, "grad_norm": 0.12746648490428925, "learning_rate": 0.0005, "loss": 2.1213, "step": 220240 }, { "epoch": 0.8383258604020919, "grad_norm": 0.13134010136127472, "learning_rate": 0.0005, "loss": 2.1139, "step": 220250 }, { "epoch": 0.8383639228702146, "grad_norm": 0.1297721415758133, "learning_rate": 0.0005, "loss": 2.1142, "step": 220260 }, { "epoch": 0.8384019853383373, "grad_norm": 0.11566124856472015, "learning_rate": 0.0005, "loss": 2.0967, "step": 220270 }, { "epoch": 0.83844004780646, "grad_norm": 0.1283719837665558, "learning_rate": 0.0005, "loss": 2.1089, "step": 220280 }, { "epoch": 0.8384781102745826, "grad_norm": 0.13768593966960907, "learning_rate": 0.0005, "loss": 2.1122, "step": 220290 }, { "epoch": 0.8385161727427053, "grad_norm": 0.13208796083927155, "learning_rate": 0.0005, "loss": 2.0954, "step": 220300 }, { "epoch": 0.8385542352108281, "grad_norm": 0.15101394057273865, "learning_rate": 0.0005, "loss": 2.1114, "step": 220310 }, { "epoch": 0.8385922976789507, "grad_norm": 0.12248805165290833, "learning_rate": 0.0005, "loss": 2.093, "step": 220320 }, { "epoch": 0.8386303601470734, "grad_norm": 0.1368759274482727, "learning_rate": 0.0005, "loss": 2.1011, "step": 220330 }, { "epoch": 0.838668422615196, "grad_norm": 0.12396056950092316, "learning_rate": 0.0005, "loss": 2.0939, "step": 220340 }, { "epoch": 0.8387064850833188, "grad_norm": 0.12895584106445312, "learning_rate": 0.0005, "loss": 2.1083, "step": 220350 }, { "epoch": 0.8387445475514415, "grad_norm": 0.140159010887146, "learning_rate": 0.0005, "loss": 2.1141, "step": 220360 }, { "epoch": 0.8387826100195641, "grad_norm": 0.12067504972219467, "learning_rate": 0.0005, "loss": 2.1044, "step": 220370 }, { "epoch": 0.8388206724876868, "grad_norm": 0.11043280363082886, "learning_rate": 0.0005, "loss": 2.1002, "step": 220380 }, { "epoch": 0.8388587349558094, "grad_norm": 0.12122703343629837, "learning_rate": 0.0005, "loss": 2.1047, "step": 220390 }, { "epoch": 0.8388967974239322, "grad_norm": 0.11654380708932877, "learning_rate": 0.0005, "loss": 2.1096, "step": 220400 }, { "epoch": 0.8389348598920549, "grad_norm": 0.12953002750873566, "learning_rate": 0.0005, "loss": 2.1145, "step": 220410 }, { "epoch": 0.8389729223601775, "grad_norm": 0.12626194953918457, "learning_rate": 0.0005, "loss": 2.1048, "step": 220420 }, { "epoch": 0.8390109848283002, "grad_norm": 0.1339138299226761, "learning_rate": 0.0005, "loss": 2.0937, "step": 220430 }, { "epoch": 0.8390490472964229, "grad_norm": 0.12890183925628662, "learning_rate": 0.0005, "loss": 2.1013, "step": 220440 }, { "epoch": 0.8390871097645456, "grad_norm": 0.12708349525928497, "learning_rate": 0.0005, "loss": 2.1121, "step": 220450 }, { "epoch": 0.8391251722326682, "grad_norm": 0.12480761110782623, "learning_rate": 0.0005, "loss": 2.1096, "step": 220460 }, { "epoch": 0.8391632347007909, "grad_norm": 0.11401735246181488, "learning_rate": 0.0005, "loss": 2.1013, "step": 220470 }, { "epoch": 0.8392012971689137, "grad_norm": 0.12634459137916565, "learning_rate": 0.0005, "loss": 2.1174, "step": 220480 }, { "epoch": 0.8392393596370363, "grad_norm": 0.11847994476556778, "learning_rate": 0.0005, "loss": 2.1102, "step": 220490 }, { "epoch": 0.839277422105159, "grad_norm": 0.1256241649389267, "learning_rate": 0.0005, "loss": 2.0971, "step": 220500 }, { "epoch": 0.8393154845732816, "grad_norm": 0.12276550382375717, "learning_rate": 0.0005, "loss": 2.0925, "step": 220510 }, { "epoch": 0.8393535470414043, "grad_norm": 0.13058394193649292, "learning_rate": 0.0005, "loss": 2.1073, "step": 220520 }, { "epoch": 0.8393916095095271, "grad_norm": 0.13051599264144897, "learning_rate": 0.0005, "loss": 2.1081, "step": 220530 }, { "epoch": 0.8394296719776497, "grad_norm": 0.12998336553573608, "learning_rate": 0.0005, "loss": 2.1115, "step": 220540 }, { "epoch": 0.8394677344457724, "grad_norm": 0.12816651165485382, "learning_rate": 0.0005, "loss": 2.1001, "step": 220550 }, { "epoch": 0.839505796913895, "grad_norm": 0.12044844776391983, "learning_rate": 0.0005, "loss": 2.098, "step": 220560 }, { "epoch": 0.8395438593820178, "grad_norm": 0.13804836571216583, "learning_rate": 0.0005, "loss": 2.1201, "step": 220570 }, { "epoch": 0.8395819218501405, "grad_norm": 0.12196008116006851, "learning_rate": 0.0005, "loss": 2.1123, "step": 220580 }, { "epoch": 0.8396199843182631, "grad_norm": 0.127668097615242, "learning_rate": 0.0005, "loss": 2.1144, "step": 220590 }, { "epoch": 0.8396580467863858, "grad_norm": 0.13133740425109863, "learning_rate": 0.0005, "loss": 2.0973, "step": 220600 }, { "epoch": 0.8396961092545085, "grad_norm": 0.13879632949829102, "learning_rate": 0.0005, "loss": 2.1013, "step": 220610 }, { "epoch": 0.8397341717226312, "grad_norm": 0.12597687542438507, "learning_rate": 0.0005, "loss": 2.1069, "step": 220620 }, { "epoch": 0.8397722341907539, "grad_norm": 0.1211690902709961, "learning_rate": 0.0005, "loss": 2.0911, "step": 220630 }, { "epoch": 0.8398102966588765, "grad_norm": 0.13215793669223785, "learning_rate": 0.0005, "loss": 2.11, "step": 220640 }, { "epoch": 0.8398483591269992, "grad_norm": 0.1430741250514984, "learning_rate": 0.0005, "loss": 2.1029, "step": 220650 }, { "epoch": 0.839886421595122, "grad_norm": 0.1376802921295166, "learning_rate": 0.0005, "loss": 2.1052, "step": 220660 }, { "epoch": 0.8399244840632446, "grad_norm": 0.12379004806280136, "learning_rate": 0.0005, "loss": 2.1162, "step": 220670 }, { "epoch": 0.8399625465313673, "grad_norm": 0.12066813558340073, "learning_rate": 0.0005, "loss": 2.0853, "step": 220680 }, { "epoch": 0.8400006089994899, "grad_norm": 0.1271403282880783, "learning_rate": 0.0005, "loss": 2.101, "step": 220690 }, { "epoch": 0.8400386714676127, "grad_norm": 0.12651439011096954, "learning_rate": 0.0005, "loss": 2.1119, "step": 220700 }, { "epoch": 0.8400767339357353, "grad_norm": 0.1287226527929306, "learning_rate": 0.0005, "loss": 2.0833, "step": 220710 }, { "epoch": 0.840114796403858, "grad_norm": 0.13046154379844666, "learning_rate": 0.0005, "loss": 2.1046, "step": 220720 }, { "epoch": 0.8401528588719807, "grad_norm": 0.11079791933298111, "learning_rate": 0.0005, "loss": 2.1049, "step": 220730 }, { "epoch": 0.8401909213401034, "grad_norm": 0.1274874359369278, "learning_rate": 0.0005, "loss": 2.0981, "step": 220740 }, { "epoch": 0.8402289838082261, "grad_norm": 0.1260407567024231, "learning_rate": 0.0005, "loss": 2.1139, "step": 220750 }, { "epoch": 0.8402670462763487, "grad_norm": 0.11859792470932007, "learning_rate": 0.0005, "loss": 2.1049, "step": 220760 }, { "epoch": 0.8403051087444714, "grad_norm": 0.12220750004053116, "learning_rate": 0.0005, "loss": 2.1039, "step": 220770 }, { "epoch": 0.8403431712125942, "grad_norm": 0.13098108768463135, "learning_rate": 0.0005, "loss": 2.0985, "step": 220780 }, { "epoch": 0.8403812336807168, "grad_norm": 0.13276565074920654, "learning_rate": 0.0005, "loss": 2.1038, "step": 220790 }, { "epoch": 0.8404192961488395, "grad_norm": 0.12173032015562057, "learning_rate": 0.0005, "loss": 2.1022, "step": 220800 }, { "epoch": 0.8404573586169621, "grad_norm": 0.12042824178934097, "learning_rate": 0.0005, "loss": 2.1122, "step": 220810 }, { "epoch": 0.8404954210850848, "grad_norm": 0.12178431451320648, "learning_rate": 0.0005, "loss": 2.093, "step": 220820 }, { "epoch": 0.8405334835532076, "grad_norm": 0.11848913133144379, "learning_rate": 0.0005, "loss": 2.1112, "step": 220830 }, { "epoch": 0.8405715460213302, "grad_norm": 0.11565049737691879, "learning_rate": 0.0005, "loss": 2.1015, "step": 220840 }, { "epoch": 0.8406096084894529, "grad_norm": 0.12566454708576202, "learning_rate": 0.0005, "loss": 2.126, "step": 220850 }, { "epoch": 0.8406476709575755, "grad_norm": 0.12305108457803726, "learning_rate": 0.0005, "loss": 2.1056, "step": 220860 }, { "epoch": 0.8406857334256983, "grad_norm": 0.12301186472177505, "learning_rate": 0.0005, "loss": 2.1109, "step": 220870 }, { "epoch": 0.840723795893821, "grad_norm": 0.1459309160709381, "learning_rate": 0.0005, "loss": 2.1128, "step": 220880 }, { "epoch": 0.8407618583619436, "grad_norm": 0.12649591267108917, "learning_rate": 0.0005, "loss": 2.0977, "step": 220890 }, { "epoch": 0.8407999208300663, "grad_norm": 0.12986882030963898, "learning_rate": 0.0005, "loss": 2.1014, "step": 220900 }, { "epoch": 0.840837983298189, "grad_norm": 0.11345621198415756, "learning_rate": 0.0005, "loss": 2.1084, "step": 220910 }, { "epoch": 0.8408760457663117, "grad_norm": 0.12991900742053986, "learning_rate": 0.0005, "loss": 2.1062, "step": 220920 }, { "epoch": 0.8409141082344344, "grad_norm": 0.11783597618341446, "learning_rate": 0.0005, "loss": 2.0976, "step": 220930 }, { "epoch": 0.840952170702557, "grad_norm": 0.13129445910453796, "learning_rate": 0.0005, "loss": 2.1036, "step": 220940 }, { "epoch": 0.8409902331706797, "grad_norm": 0.15973244607448578, "learning_rate": 0.0005, "loss": 2.1032, "step": 220950 }, { "epoch": 0.8410282956388024, "grad_norm": 0.11920005828142166, "learning_rate": 0.0005, "loss": 2.1114, "step": 220960 }, { "epoch": 0.8410663581069251, "grad_norm": 0.12620969116687775, "learning_rate": 0.0005, "loss": 2.0979, "step": 220970 }, { "epoch": 0.8411044205750478, "grad_norm": 0.12203420698642731, "learning_rate": 0.0005, "loss": 2.1002, "step": 220980 }, { "epoch": 0.8411424830431704, "grad_norm": 0.13088800013065338, "learning_rate": 0.0005, "loss": 2.1001, "step": 220990 }, { "epoch": 0.8411805455112932, "grad_norm": 0.12926673889160156, "learning_rate": 0.0005, "loss": 2.1056, "step": 221000 }, { "epoch": 0.8412186079794158, "grad_norm": 0.12309526652097702, "learning_rate": 0.0005, "loss": 2.0876, "step": 221010 }, { "epoch": 0.8412566704475385, "grad_norm": 0.1410890370607376, "learning_rate": 0.0005, "loss": 2.102, "step": 221020 }, { "epoch": 0.8412947329156611, "grad_norm": 0.14877110719680786, "learning_rate": 0.0005, "loss": 2.1089, "step": 221030 }, { "epoch": 0.8413327953837839, "grad_norm": 0.1271168440580368, "learning_rate": 0.0005, "loss": 2.1081, "step": 221040 }, { "epoch": 0.8413708578519066, "grad_norm": 0.12515568733215332, "learning_rate": 0.0005, "loss": 2.1027, "step": 221050 }, { "epoch": 0.8414089203200292, "grad_norm": 0.1173098087310791, "learning_rate": 0.0005, "loss": 2.1089, "step": 221060 }, { "epoch": 0.8414469827881519, "grad_norm": 0.12076503783464432, "learning_rate": 0.0005, "loss": 2.1096, "step": 221070 }, { "epoch": 0.8414850452562745, "grad_norm": 0.12368505448102951, "learning_rate": 0.0005, "loss": 2.0945, "step": 221080 }, { "epoch": 0.8415231077243973, "grad_norm": 0.1254063993692398, "learning_rate": 0.0005, "loss": 2.105, "step": 221090 }, { "epoch": 0.84156117019252, "grad_norm": 0.1290888488292694, "learning_rate": 0.0005, "loss": 2.1132, "step": 221100 }, { "epoch": 0.8415992326606426, "grad_norm": 0.12299709022045135, "learning_rate": 0.0005, "loss": 2.1126, "step": 221110 }, { "epoch": 0.8416372951287653, "grad_norm": 0.14081443846225739, "learning_rate": 0.0005, "loss": 2.0776, "step": 221120 }, { "epoch": 0.841675357596888, "grad_norm": 0.13779985904693604, "learning_rate": 0.0005, "loss": 2.1049, "step": 221130 }, { "epoch": 0.8417134200650107, "grad_norm": 0.1326400339603424, "learning_rate": 0.0005, "loss": 2.1099, "step": 221140 }, { "epoch": 0.8417514825331334, "grad_norm": 0.13280388712882996, "learning_rate": 0.0005, "loss": 2.0914, "step": 221150 }, { "epoch": 0.841789545001256, "grad_norm": 0.12059993296861649, "learning_rate": 0.0005, "loss": 2.0931, "step": 221160 }, { "epoch": 0.8418276074693788, "grad_norm": 0.12586145102977753, "learning_rate": 0.0005, "loss": 2.0977, "step": 221170 }, { "epoch": 0.8418656699375014, "grad_norm": 0.12924747169017792, "learning_rate": 0.0005, "loss": 2.1073, "step": 221180 }, { "epoch": 0.8419037324056241, "grad_norm": 0.11559860408306122, "learning_rate": 0.0005, "loss": 2.1084, "step": 221190 }, { "epoch": 0.8419417948737468, "grad_norm": 0.13393127918243408, "learning_rate": 0.0005, "loss": 2.1114, "step": 221200 }, { "epoch": 0.8419798573418695, "grad_norm": 0.11904259026050568, "learning_rate": 0.0005, "loss": 2.1112, "step": 221210 }, { "epoch": 0.8420179198099922, "grad_norm": 0.11419398337602615, "learning_rate": 0.0005, "loss": 2.1134, "step": 221220 }, { "epoch": 0.8420559822781148, "grad_norm": 0.12206226587295532, "learning_rate": 0.0005, "loss": 2.1043, "step": 221230 }, { "epoch": 0.8420940447462375, "grad_norm": 0.11776451766490936, "learning_rate": 0.0005, "loss": 2.1045, "step": 221240 }, { "epoch": 0.8421321072143602, "grad_norm": 0.12946538627147675, "learning_rate": 0.0005, "loss": 2.1105, "step": 221250 }, { "epoch": 0.8421701696824829, "grad_norm": 0.12498574703931808, "learning_rate": 0.0005, "loss": 2.1119, "step": 221260 }, { "epoch": 0.8422082321506056, "grad_norm": 0.13250340521335602, "learning_rate": 0.0005, "loss": 2.1076, "step": 221270 }, { "epoch": 0.8422462946187282, "grad_norm": 0.11953128129243851, "learning_rate": 0.0005, "loss": 2.1083, "step": 221280 }, { "epoch": 0.8422843570868509, "grad_norm": 0.12299077957868576, "learning_rate": 0.0005, "loss": 2.0955, "step": 221290 }, { "epoch": 0.8423224195549737, "grad_norm": 0.13264764845371246, "learning_rate": 0.0005, "loss": 2.1023, "step": 221300 }, { "epoch": 0.8423604820230963, "grad_norm": 0.12111787497997284, "learning_rate": 0.0005, "loss": 2.109, "step": 221310 }, { "epoch": 0.842398544491219, "grad_norm": 0.13892684876918793, "learning_rate": 0.0005, "loss": 2.0948, "step": 221320 }, { "epoch": 0.8424366069593416, "grad_norm": 0.12118415534496307, "learning_rate": 0.0005, "loss": 2.1107, "step": 221330 }, { "epoch": 0.8424746694274644, "grad_norm": 0.135015606880188, "learning_rate": 0.0005, "loss": 2.1154, "step": 221340 }, { "epoch": 0.8425127318955871, "grad_norm": 0.13240301609039307, "learning_rate": 0.0005, "loss": 2.1023, "step": 221350 }, { "epoch": 0.8425507943637097, "grad_norm": 0.12435257434844971, "learning_rate": 0.0005, "loss": 2.0947, "step": 221360 }, { "epoch": 0.8425888568318324, "grad_norm": 0.11481736600399017, "learning_rate": 0.0005, "loss": 2.0968, "step": 221370 }, { "epoch": 0.842626919299955, "grad_norm": 0.12455075234174728, "learning_rate": 0.0005, "loss": 2.1158, "step": 221380 }, { "epoch": 0.8426649817680778, "grad_norm": 0.1482924073934555, "learning_rate": 0.0005, "loss": 2.1114, "step": 221390 }, { "epoch": 0.8427030442362005, "grad_norm": 0.1364888846874237, "learning_rate": 0.0005, "loss": 2.1151, "step": 221400 }, { "epoch": 0.8427411067043231, "grad_norm": 0.13500209152698517, "learning_rate": 0.0005, "loss": 2.1059, "step": 221410 }, { "epoch": 0.8427791691724458, "grad_norm": 0.11597223579883575, "learning_rate": 0.0005, "loss": 2.1034, "step": 221420 }, { "epoch": 0.8428172316405685, "grad_norm": 0.12641501426696777, "learning_rate": 0.0005, "loss": 2.1085, "step": 221430 }, { "epoch": 0.8428552941086912, "grad_norm": 0.11433349549770355, "learning_rate": 0.0005, "loss": 2.0905, "step": 221440 }, { "epoch": 0.8428933565768139, "grad_norm": 0.1248387023806572, "learning_rate": 0.0005, "loss": 2.1016, "step": 221450 }, { "epoch": 0.8429314190449365, "grad_norm": 0.12403486669063568, "learning_rate": 0.0005, "loss": 2.1052, "step": 221460 }, { "epoch": 0.8429694815130593, "grad_norm": 0.12142638862133026, "learning_rate": 0.0005, "loss": 2.1001, "step": 221470 }, { "epoch": 0.8430075439811819, "grad_norm": 0.12724417448043823, "learning_rate": 0.0005, "loss": 2.1188, "step": 221480 }, { "epoch": 0.8430456064493046, "grad_norm": 0.1116470918059349, "learning_rate": 0.0005, "loss": 2.1021, "step": 221490 }, { "epoch": 0.8430836689174273, "grad_norm": 0.12297196686267853, "learning_rate": 0.0005, "loss": 2.112, "step": 221500 }, { "epoch": 0.8431217313855499, "grad_norm": 0.11790607869625092, "learning_rate": 0.0005, "loss": 2.1086, "step": 221510 }, { "epoch": 0.8431597938536727, "grad_norm": 0.14102840423583984, "learning_rate": 0.0005, "loss": 2.1123, "step": 221520 }, { "epoch": 0.8431978563217953, "grad_norm": 0.12849067151546478, "learning_rate": 0.0005, "loss": 2.0959, "step": 221530 }, { "epoch": 0.843235918789918, "grad_norm": 0.12495825439691544, "learning_rate": 0.0005, "loss": 2.0989, "step": 221540 }, { "epoch": 0.8432739812580407, "grad_norm": 0.11713171750307083, "learning_rate": 0.0005, "loss": 2.1004, "step": 221550 }, { "epoch": 0.8433120437261634, "grad_norm": 0.1117461696267128, "learning_rate": 0.0005, "loss": 2.1016, "step": 221560 }, { "epoch": 0.8433501061942861, "grad_norm": 0.13182447850704193, "learning_rate": 0.0005, "loss": 2.0947, "step": 221570 }, { "epoch": 0.8433881686624087, "grad_norm": 0.11467888951301575, "learning_rate": 0.0005, "loss": 2.1068, "step": 221580 }, { "epoch": 0.8434262311305314, "grad_norm": 0.12174401432275772, "learning_rate": 0.0005, "loss": 2.1199, "step": 221590 }, { "epoch": 0.8434642935986542, "grad_norm": 0.12947309017181396, "learning_rate": 0.0005, "loss": 2.111, "step": 221600 }, { "epoch": 0.8435023560667768, "grad_norm": 0.1451372653245926, "learning_rate": 0.0005, "loss": 2.0992, "step": 221610 }, { "epoch": 0.8435404185348995, "grad_norm": 0.12875647842884064, "learning_rate": 0.0005, "loss": 2.112, "step": 221620 }, { "epoch": 0.8435784810030221, "grad_norm": 0.11792705953121185, "learning_rate": 0.0005, "loss": 2.1015, "step": 221630 }, { "epoch": 0.8436165434711449, "grad_norm": 0.11868022382259369, "learning_rate": 0.0005, "loss": 2.1134, "step": 221640 }, { "epoch": 0.8436546059392676, "grad_norm": 0.12856294214725494, "learning_rate": 0.0005, "loss": 2.1004, "step": 221650 }, { "epoch": 0.8436926684073902, "grad_norm": 0.12594842910766602, "learning_rate": 0.0005, "loss": 2.081, "step": 221660 }, { "epoch": 0.8437307308755129, "grad_norm": 0.11838579177856445, "learning_rate": 0.0005, "loss": 2.1213, "step": 221670 }, { "epoch": 0.8437687933436355, "grad_norm": 0.125643789768219, "learning_rate": 0.0005, "loss": 2.0925, "step": 221680 }, { "epoch": 0.8438068558117583, "grad_norm": 0.12387394905090332, "learning_rate": 0.0005, "loss": 2.1071, "step": 221690 }, { "epoch": 0.843844918279881, "grad_norm": 0.11876220256090164, "learning_rate": 0.0005, "loss": 2.1098, "step": 221700 }, { "epoch": 0.8438829807480036, "grad_norm": 0.11892592161893845, "learning_rate": 0.0005, "loss": 2.0999, "step": 221710 }, { "epoch": 0.8439210432161263, "grad_norm": 0.14295817911624908, "learning_rate": 0.0005, "loss": 2.1032, "step": 221720 }, { "epoch": 0.843959105684249, "grad_norm": 0.12424919009208679, "learning_rate": 0.0005, "loss": 2.0996, "step": 221730 }, { "epoch": 0.8439971681523717, "grad_norm": 0.13023801147937775, "learning_rate": 0.0005, "loss": 2.0955, "step": 221740 }, { "epoch": 0.8440352306204943, "grad_norm": 0.137488454580307, "learning_rate": 0.0005, "loss": 2.1024, "step": 221750 }, { "epoch": 0.844073293088617, "grad_norm": 0.12765270471572876, "learning_rate": 0.0005, "loss": 2.1151, "step": 221760 }, { "epoch": 0.8441113555567398, "grad_norm": 0.11833908408880234, "learning_rate": 0.0005, "loss": 2.1098, "step": 221770 }, { "epoch": 0.8441494180248624, "grad_norm": 0.12693412601947784, "learning_rate": 0.0005, "loss": 2.1017, "step": 221780 }, { "epoch": 0.8441874804929851, "grad_norm": 0.13138915598392487, "learning_rate": 0.0005, "loss": 2.1062, "step": 221790 }, { "epoch": 0.8442255429611077, "grad_norm": 0.11366184055805206, "learning_rate": 0.0005, "loss": 2.0966, "step": 221800 }, { "epoch": 0.8442636054292304, "grad_norm": 0.14213193953037262, "learning_rate": 0.0005, "loss": 2.1054, "step": 221810 }, { "epoch": 0.8443016678973532, "grad_norm": 0.13295476138591766, "learning_rate": 0.0005, "loss": 2.1108, "step": 221820 }, { "epoch": 0.8443397303654758, "grad_norm": 0.15998639166355133, "learning_rate": 0.0005, "loss": 2.0947, "step": 221830 }, { "epoch": 0.8443777928335985, "grad_norm": 0.13382495939731598, "learning_rate": 0.0005, "loss": 2.1013, "step": 221840 }, { "epoch": 0.8444158553017211, "grad_norm": 0.14077311754226685, "learning_rate": 0.0005, "loss": 2.1156, "step": 221850 }, { "epoch": 0.8444539177698439, "grad_norm": 0.13966163992881775, "learning_rate": 0.0005, "loss": 2.0827, "step": 221860 }, { "epoch": 0.8444919802379666, "grad_norm": 0.13039542734622955, "learning_rate": 0.0005, "loss": 2.1161, "step": 221870 }, { "epoch": 0.8445300427060892, "grad_norm": 0.12404860556125641, "learning_rate": 0.0005, "loss": 2.1057, "step": 221880 }, { "epoch": 0.8445681051742119, "grad_norm": 0.14446274936199188, "learning_rate": 0.0005, "loss": 2.1089, "step": 221890 }, { "epoch": 0.8446061676423346, "grad_norm": 0.12789031863212585, "learning_rate": 0.0005, "loss": 2.1026, "step": 221900 }, { "epoch": 0.8446442301104573, "grad_norm": 0.1414797306060791, "learning_rate": 0.0005, "loss": 2.1072, "step": 221910 }, { "epoch": 0.84468229257858, "grad_norm": 0.12627270817756653, "learning_rate": 0.0005, "loss": 2.0969, "step": 221920 }, { "epoch": 0.8447203550467026, "grad_norm": 0.13195939362049103, "learning_rate": 0.0005, "loss": 2.104, "step": 221930 }, { "epoch": 0.8447584175148253, "grad_norm": 0.13768230378627777, "learning_rate": 0.0005, "loss": 2.0994, "step": 221940 }, { "epoch": 0.844796479982948, "grad_norm": 0.12444497644901276, "learning_rate": 0.0005, "loss": 2.103, "step": 221950 }, { "epoch": 0.8448345424510707, "grad_norm": 0.130240336060524, "learning_rate": 0.0005, "loss": 2.0894, "step": 221960 }, { "epoch": 0.8448726049191934, "grad_norm": 0.1341758668422699, "learning_rate": 0.0005, "loss": 2.1119, "step": 221970 }, { "epoch": 0.844910667387316, "grad_norm": 0.1302516609430313, "learning_rate": 0.0005, "loss": 2.1232, "step": 221980 }, { "epoch": 0.8449487298554388, "grad_norm": 0.1489470899105072, "learning_rate": 0.0005, "loss": 2.1042, "step": 221990 }, { "epoch": 0.8449867923235614, "grad_norm": 0.11727765202522278, "learning_rate": 0.0005, "loss": 2.0888, "step": 222000 }, { "epoch": 0.8450248547916841, "grad_norm": 0.12005919963121414, "learning_rate": 0.0005, "loss": 2.1073, "step": 222010 }, { "epoch": 0.8450629172598068, "grad_norm": 0.12615996599197388, "learning_rate": 0.0005, "loss": 2.1137, "step": 222020 }, { "epoch": 0.8451009797279295, "grad_norm": 0.12618914246559143, "learning_rate": 0.0005, "loss": 2.1077, "step": 222030 }, { "epoch": 0.8451390421960522, "grad_norm": 0.12011348456144333, "learning_rate": 0.0005, "loss": 2.0983, "step": 222040 }, { "epoch": 0.8451771046641748, "grad_norm": 0.12232651561498642, "learning_rate": 0.0005, "loss": 2.1055, "step": 222050 }, { "epoch": 0.8452151671322975, "grad_norm": 0.1303800344467163, "learning_rate": 0.0005, "loss": 2.1122, "step": 222060 }, { "epoch": 0.8452532296004203, "grad_norm": 0.13714206218719482, "learning_rate": 0.0005, "loss": 2.1088, "step": 222070 }, { "epoch": 0.8452912920685429, "grad_norm": 0.1355716437101364, "learning_rate": 0.0005, "loss": 2.0955, "step": 222080 }, { "epoch": 0.8453293545366656, "grad_norm": 0.11672431975603104, "learning_rate": 0.0005, "loss": 2.1021, "step": 222090 }, { "epoch": 0.8453674170047882, "grad_norm": 0.12329725921154022, "learning_rate": 0.0005, "loss": 2.1047, "step": 222100 }, { "epoch": 0.8454054794729109, "grad_norm": 0.12570767104625702, "learning_rate": 0.0005, "loss": 2.1283, "step": 222110 }, { "epoch": 0.8454435419410337, "grad_norm": 0.13327881693840027, "learning_rate": 0.0005, "loss": 2.0952, "step": 222120 }, { "epoch": 0.8454816044091563, "grad_norm": 0.12163237482309341, "learning_rate": 0.0005, "loss": 2.1132, "step": 222130 }, { "epoch": 0.845519666877279, "grad_norm": 0.14293643832206726, "learning_rate": 0.0005, "loss": 2.0911, "step": 222140 }, { "epoch": 0.8455577293454016, "grad_norm": 0.12491180747747421, "learning_rate": 0.0005, "loss": 2.1185, "step": 222150 }, { "epoch": 0.8455957918135244, "grad_norm": 0.14374269545078278, "learning_rate": 0.0005, "loss": 2.0967, "step": 222160 }, { "epoch": 0.8456338542816471, "grad_norm": 0.11893656104803085, "learning_rate": 0.0005, "loss": 2.1158, "step": 222170 }, { "epoch": 0.8456719167497697, "grad_norm": 0.1267782598733902, "learning_rate": 0.0005, "loss": 2.1052, "step": 222180 }, { "epoch": 0.8457099792178924, "grad_norm": 0.13819265365600586, "learning_rate": 0.0005, "loss": 2.0971, "step": 222190 }, { "epoch": 0.8457480416860151, "grad_norm": 0.11883528530597687, "learning_rate": 0.0005, "loss": 2.1089, "step": 222200 }, { "epoch": 0.8457861041541378, "grad_norm": 0.11555849015712738, "learning_rate": 0.0005, "loss": 2.0961, "step": 222210 }, { "epoch": 0.8458241666222605, "grad_norm": 0.13113310933113098, "learning_rate": 0.0005, "loss": 2.0987, "step": 222220 }, { "epoch": 0.8458622290903831, "grad_norm": 0.12390421330928802, "learning_rate": 0.0005, "loss": 2.0994, "step": 222230 }, { "epoch": 0.8459002915585058, "grad_norm": 0.13246551156044006, "learning_rate": 0.0005, "loss": 2.1068, "step": 222240 }, { "epoch": 0.8459383540266285, "grad_norm": 0.12514223158359528, "learning_rate": 0.0005, "loss": 2.1039, "step": 222250 }, { "epoch": 0.8459764164947512, "grad_norm": 0.12240414321422577, "learning_rate": 0.0005, "loss": 2.0958, "step": 222260 }, { "epoch": 0.8460144789628739, "grad_norm": 0.1251474767923355, "learning_rate": 0.0005, "loss": 2.0967, "step": 222270 }, { "epoch": 0.8460525414309965, "grad_norm": 0.13345548510551453, "learning_rate": 0.0005, "loss": 2.1035, "step": 222280 }, { "epoch": 0.8460906038991193, "grad_norm": 0.13378407061100006, "learning_rate": 0.0005, "loss": 2.1136, "step": 222290 }, { "epoch": 0.8461286663672419, "grad_norm": 0.1198481023311615, "learning_rate": 0.0005, "loss": 2.0962, "step": 222300 }, { "epoch": 0.8461667288353646, "grad_norm": 0.12984788417816162, "learning_rate": 0.0005, "loss": 2.0917, "step": 222310 }, { "epoch": 0.8462047913034872, "grad_norm": 0.125489741563797, "learning_rate": 0.0005, "loss": 2.0982, "step": 222320 }, { "epoch": 0.84624285377161, "grad_norm": 0.1202753558754921, "learning_rate": 0.0005, "loss": 2.112, "step": 222330 }, { "epoch": 0.8462809162397327, "grad_norm": 0.12433353066444397, "learning_rate": 0.0005, "loss": 2.102, "step": 222340 }, { "epoch": 0.8463189787078553, "grad_norm": 0.11911118030548096, "learning_rate": 0.0005, "loss": 2.1088, "step": 222350 }, { "epoch": 0.846357041175978, "grad_norm": 0.11664999276399612, "learning_rate": 0.0005, "loss": 2.098, "step": 222360 }, { "epoch": 0.8463951036441006, "grad_norm": 0.13223478198051453, "learning_rate": 0.0005, "loss": 2.0976, "step": 222370 }, { "epoch": 0.8464331661122234, "grad_norm": 0.14218950271606445, "learning_rate": 0.0005, "loss": 2.1054, "step": 222380 }, { "epoch": 0.8464712285803461, "grad_norm": 0.11777843534946442, "learning_rate": 0.0005, "loss": 2.1028, "step": 222390 }, { "epoch": 0.8465092910484687, "grad_norm": 0.12073778361082077, "learning_rate": 0.0005, "loss": 2.0993, "step": 222400 }, { "epoch": 0.8465473535165914, "grad_norm": 0.11218991130590439, "learning_rate": 0.0005, "loss": 2.1099, "step": 222410 }, { "epoch": 0.8465854159847142, "grad_norm": 0.12169850617647171, "learning_rate": 0.0005, "loss": 2.0992, "step": 222420 }, { "epoch": 0.8466234784528368, "grad_norm": 0.11669646948575974, "learning_rate": 0.0005, "loss": 2.0886, "step": 222430 }, { "epoch": 0.8466615409209595, "grad_norm": 0.12841486930847168, "learning_rate": 0.0005, "loss": 2.1017, "step": 222440 }, { "epoch": 0.8466996033890821, "grad_norm": 0.11732825636863708, "learning_rate": 0.0005, "loss": 2.1212, "step": 222450 }, { "epoch": 0.8467376658572049, "grad_norm": 0.12267529964447021, "learning_rate": 0.0005, "loss": 2.1048, "step": 222460 }, { "epoch": 0.8467757283253275, "grad_norm": 0.13632412254810333, "learning_rate": 0.0005, "loss": 2.1003, "step": 222470 }, { "epoch": 0.8468137907934502, "grad_norm": 0.12303720414638519, "learning_rate": 0.0005, "loss": 2.1102, "step": 222480 }, { "epoch": 0.8468518532615729, "grad_norm": 0.1213759183883667, "learning_rate": 0.0005, "loss": 2.0978, "step": 222490 }, { "epoch": 0.8468899157296956, "grad_norm": 0.12312841415405273, "learning_rate": 0.0005, "loss": 2.1071, "step": 222500 }, { "epoch": 0.8469279781978183, "grad_norm": 0.12350822985172272, "learning_rate": 0.0005, "loss": 2.0946, "step": 222510 }, { "epoch": 0.846966040665941, "grad_norm": 0.14025971293449402, "learning_rate": 0.0005, "loss": 2.0968, "step": 222520 }, { "epoch": 0.8470041031340636, "grad_norm": 0.14085686206817627, "learning_rate": 0.0005, "loss": 2.1226, "step": 222530 }, { "epoch": 0.8470421656021863, "grad_norm": 0.12586809694766998, "learning_rate": 0.0005, "loss": 2.0876, "step": 222540 }, { "epoch": 0.847080228070309, "grad_norm": 0.14620359241962433, "learning_rate": 0.0005, "loss": 2.0921, "step": 222550 }, { "epoch": 0.8471182905384317, "grad_norm": 0.12692441046237946, "learning_rate": 0.0005, "loss": 2.0956, "step": 222560 }, { "epoch": 0.8471563530065543, "grad_norm": 0.13988254964351654, "learning_rate": 0.0005, "loss": 2.0914, "step": 222570 }, { "epoch": 0.847194415474677, "grad_norm": 0.13647693395614624, "learning_rate": 0.0005, "loss": 2.0958, "step": 222580 }, { "epoch": 0.8472324779427998, "grad_norm": 0.12744392454624176, "learning_rate": 0.0005, "loss": 2.0933, "step": 222590 }, { "epoch": 0.8472705404109224, "grad_norm": 0.1267772763967514, "learning_rate": 0.0005, "loss": 2.1032, "step": 222600 }, { "epoch": 0.8473086028790451, "grad_norm": 0.12130838632583618, "learning_rate": 0.0005, "loss": 2.1054, "step": 222610 }, { "epoch": 0.8473466653471677, "grad_norm": 0.143631711602211, "learning_rate": 0.0005, "loss": 2.1133, "step": 222620 }, { "epoch": 0.8473847278152905, "grad_norm": 0.12004122883081436, "learning_rate": 0.0005, "loss": 2.1126, "step": 222630 }, { "epoch": 0.8474227902834132, "grad_norm": 0.11501511931419373, "learning_rate": 0.0005, "loss": 2.103, "step": 222640 }, { "epoch": 0.8474608527515358, "grad_norm": 0.1254333108663559, "learning_rate": 0.0005, "loss": 2.1107, "step": 222650 }, { "epoch": 0.8474989152196585, "grad_norm": 0.12307880818843842, "learning_rate": 0.0005, "loss": 2.0908, "step": 222660 }, { "epoch": 0.8475369776877811, "grad_norm": 0.12675033509731293, "learning_rate": 0.0005, "loss": 2.0992, "step": 222670 }, { "epoch": 0.8475750401559039, "grad_norm": 0.1205359548330307, "learning_rate": 0.0005, "loss": 2.1036, "step": 222680 }, { "epoch": 0.8476131026240266, "grad_norm": 0.13939648866653442, "learning_rate": 0.0005, "loss": 2.0967, "step": 222690 }, { "epoch": 0.8476511650921492, "grad_norm": 0.12317508459091187, "learning_rate": 0.0005, "loss": 2.1005, "step": 222700 }, { "epoch": 0.8476892275602719, "grad_norm": 0.1242111399769783, "learning_rate": 0.0005, "loss": 2.1028, "step": 222710 }, { "epoch": 0.8477272900283946, "grad_norm": 0.13081376254558563, "learning_rate": 0.0005, "loss": 2.1102, "step": 222720 }, { "epoch": 0.8477653524965173, "grad_norm": 0.13119825720787048, "learning_rate": 0.0005, "loss": 2.1005, "step": 222730 }, { "epoch": 0.84780341496464, "grad_norm": 0.16249608993530273, "learning_rate": 0.0005, "loss": 2.1062, "step": 222740 }, { "epoch": 0.8478414774327626, "grad_norm": 0.12679052352905273, "learning_rate": 0.0005, "loss": 2.0937, "step": 222750 }, { "epoch": 0.8478795399008854, "grad_norm": 0.15585845708847046, "learning_rate": 0.0005, "loss": 2.1039, "step": 222760 }, { "epoch": 0.847917602369008, "grad_norm": 0.12806294858455658, "learning_rate": 0.0005, "loss": 2.1157, "step": 222770 }, { "epoch": 0.8479556648371307, "grad_norm": 0.11923015862703323, "learning_rate": 0.0005, "loss": 2.1168, "step": 222780 }, { "epoch": 0.8479937273052534, "grad_norm": 0.12365025281906128, "learning_rate": 0.0005, "loss": 2.1077, "step": 222790 }, { "epoch": 0.848031789773376, "grad_norm": 0.15433207154273987, "learning_rate": 0.0005, "loss": 2.0972, "step": 222800 }, { "epoch": 0.8480698522414988, "grad_norm": 0.1409216821193695, "learning_rate": 0.0005, "loss": 2.1204, "step": 222810 }, { "epoch": 0.8481079147096214, "grad_norm": 0.12249742448329926, "learning_rate": 0.0005, "loss": 2.112, "step": 222820 }, { "epoch": 0.8481459771777441, "grad_norm": 0.1369142085313797, "learning_rate": 0.0005, "loss": 2.1049, "step": 222830 }, { "epoch": 0.8481840396458667, "grad_norm": 0.1325635313987732, "learning_rate": 0.0005, "loss": 2.1266, "step": 222840 }, { "epoch": 0.8482221021139895, "grad_norm": 0.11897878348827362, "learning_rate": 0.0005, "loss": 2.0946, "step": 222850 }, { "epoch": 0.8482601645821122, "grad_norm": 0.1231696829199791, "learning_rate": 0.0005, "loss": 2.0909, "step": 222860 }, { "epoch": 0.8482982270502348, "grad_norm": 0.1305019110441208, "learning_rate": 0.0005, "loss": 2.1176, "step": 222870 }, { "epoch": 0.8483362895183575, "grad_norm": 0.11948280781507492, "learning_rate": 0.0005, "loss": 2.1175, "step": 222880 }, { "epoch": 0.8483743519864803, "grad_norm": 0.12036039680242538, "learning_rate": 0.0005, "loss": 2.0905, "step": 222890 }, { "epoch": 0.8484124144546029, "grad_norm": 0.11098724603652954, "learning_rate": 0.0005, "loss": 2.094, "step": 222900 }, { "epoch": 0.8484504769227256, "grad_norm": 0.1361108273267746, "learning_rate": 0.0005, "loss": 2.1001, "step": 222910 }, { "epoch": 0.8484885393908482, "grad_norm": 0.13085906207561493, "learning_rate": 0.0005, "loss": 2.109, "step": 222920 }, { "epoch": 0.848526601858971, "grad_norm": 0.1153787150979042, "learning_rate": 0.0005, "loss": 2.111, "step": 222930 }, { "epoch": 0.8485646643270937, "grad_norm": 0.13161927461624146, "learning_rate": 0.0005, "loss": 2.0967, "step": 222940 }, { "epoch": 0.8486027267952163, "grad_norm": 0.11290472000837326, "learning_rate": 0.0005, "loss": 2.0999, "step": 222950 }, { "epoch": 0.848640789263339, "grad_norm": 0.12351827323436737, "learning_rate": 0.0005, "loss": 2.0939, "step": 222960 }, { "epoch": 0.8486788517314616, "grad_norm": 0.11744271963834763, "learning_rate": 0.0005, "loss": 2.1173, "step": 222970 }, { "epoch": 0.8487169141995844, "grad_norm": 0.11657220125198364, "learning_rate": 0.0005, "loss": 2.0954, "step": 222980 }, { "epoch": 0.848754976667707, "grad_norm": 0.11970972269773483, "learning_rate": 0.0005, "loss": 2.1179, "step": 222990 }, { "epoch": 0.8487930391358297, "grad_norm": 0.14427636563777924, "learning_rate": 0.0005, "loss": 2.0958, "step": 223000 }, { "epoch": 0.8488311016039524, "grad_norm": 0.12226763367652893, "learning_rate": 0.0005, "loss": 2.1138, "step": 223010 }, { "epoch": 0.8488691640720751, "grad_norm": 0.12056204676628113, "learning_rate": 0.0005, "loss": 2.1043, "step": 223020 }, { "epoch": 0.8489072265401978, "grad_norm": 0.11601797491312027, "learning_rate": 0.0005, "loss": 2.1062, "step": 223030 }, { "epoch": 0.8489452890083204, "grad_norm": 0.12741492688655853, "learning_rate": 0.0005, "loss": 2.1078, "step": 223040 }, { "epoch": 0.8489833514764431, "grad_norm": 0.13284242153167725, "learning_rate": 0.0005, "loss": 2.0943, "step": 223050 }, { "epoch": 0.8490214139445659, "grad_norm": 0.120316281914711, "learning_rate": 0.0005, "loss": 2.1037, "step": 223060 }, { "epoch": 0.8490594764126885, "grad_norm": 0.12143097072839737, "learning_rate": 0.0005, "loss": 2.1076, "step": 223070 }, { "epoch": 0.8490975388808112, "grad_norm": 0.1196446344256401, "learning_rate": 0.0005, "loss": 2.0992, "step": 223080 }, { "epoch": 0.8491356013489338, "grad_norm": 0.12994390726089478, "learning_rate": 0.0005, "loss": 2.0839, "step": 223090 }, { "epoch": 0.8491736638170565, "grad_norm": 0.12859565019607544, "learning_rate": 0.0005, "loss": 2.0943, "step": 223100 }, { "epoch": 0.8492117262851793, "grad_norm": 0.12666714191436768, "learning_rate": 0.0005, "loss": 2.1141, "step": 223110 }, { "epoch": 0.8492497887533019, "grad_norm": 0.13240092992782593, "learning_rate": 0.0005, "loss": 2.1044, "step": 223120 }, { "epoch": 0.8492878512214246, "grad_norm": 0.13775987923145294, "learning_rate": 0.0005, "loss": 2.107, "step": 223130 }, { "epoch": 0.8493259136895472, "grad_norm": 0.13414819538593292, "learning_rate": 0.0005, "loss": 2.1062, "step": 223140 }, { "epoch": 0.84936397615767, "grad_norm": 0.12259962409734726, "learning_rate": 0.0005, "loss": 2.0953, "step": 223150 }, { "epoch": 0.8494020386257927, "grad_norm": 0.11610279977321625, "learning_rate": 0.0005, "loss": 2.1046, "step": 223160 }, { "epoch": 0.8494401010939153, "grad_norm": 0.13720989227294922, "learning_rate": 0.0005, "loss": 2.1005, "step": 223170 }, { "epoch": 0.849478163562038, "grad_norm": 0.13144254684448242, "learning_rate": 0.0005, "loss": 2.1002, "step": 223180 }, { "epoch": 0.8495162260301607, "grad_norm": 0.13327936828136444, "learning_rate": 0.0005, "loss": 2.113, "step": 223190 }, { "epoch": 0.8495542884982834, "grad_norm": 0.13548313081264496, "learning_rate": 0.0005, "loss": 2.1051, "step": 223200 }, { "epoch": 0.8495923509664061, "grad_norm": 0.12639567255973816, "learning_rate": 0.0005, "loss": 2.1161, "step": 223210 }, { "epoch": 0.8496304134345287, "grad_norm": 0.12638792395591736, "learning_rate": 0.0005, "loss": 2.1052, "step": 223220 }, { "epoch": 0.8496684759026514, "grad_norm": 0.1326935738325119, "learning_rate": 0.0005, "loss": 2.0998, "step": 223230 }, { "epoch": 0.8497065383707741, "grad_norm": 0.12060036510229111, "learning_rate": 0.0005, "loss": 2.1118, "step": 223240 }, { "epoch": 0.8497446008388968, "grad_norm": 0.13392925262451172, "learning_rate": 0.0005, "loss": 2.1098, "step": 223250 }, { "epoch": 0.8497826633070195, "grad_norm": 0.12088333815336227, "learning_rate": 0.0005, "loss": 2.1262, "step": 223260 }, { "epoch": 0.8498207257751421, "grad_norm": 0.12968561053276062, "learning_rate": 0.0005, "loss": 2.1037, "step": 223270 }, { "epoch": 0.8498587882432649, "grad_norm": 0.1378614455461502, "learning_rate": 0.0005, "loss": 2.1135, "step": 223280 }, { "epoch": 0.8498968507113875, "grad_norm": 0.11719853430986404, "learning_rate": 0.0005, "loss": 2.0941, "step": 223290 }, { "epoch": 0.8499349131795102, "grad_norm": 0.1415800154209137, "learning_rate": 0.0005, "loss": 2.1079, "step": 223300 }, { "epoch": 0.8499729756476329, "grad_norm": 0.13601985573768616, "learning_rate": 0.0005, "loss": 2.1095, "step": 223310 }, { "epoch": 0.8500110381157556, "grad_norm": 0.12387509644031525, "learning_rate": 0.0005, "loss": 2.1013, "step": 223320 }, { "epoch": 0.8500491005838783, "grad_norm": 0.1318059265613556, "learning_rate": 0.0005, "loss": 2.0944, "step": 223330 }, { "epoch": 0.8500871630520009, "grad_norm": 0.14278966188430786, "learning_rate": 0.0005, "loss": 2.1087, "step": 223340 }, { "epoch": 0.8501252255201236, "grad_norm": 0.12081281840801239, "learning_rate": 0.0005, "loss": 2.1024, "step": 223350 }, { "epoch": 0.8501632879882464, "grad_norm": 0.12138575315475464, "learning_rate": 0.0005, "loss": 2.1009, "step": 223360 }, { "epoch": 0.850201350456369, "grad_norm": 0.12061097472906113, "learning_rate": 0.0005, "loss": 2.104, "step": 223370 }, { "epoch": 0.8502394129244917, "grad_norm": 0.12924207746982574, "learning_rate": 0.0005, "loss": 2.1094, "step": 223380 }, { "epoch": 0.8502774753926143, "grad_norm": 0.1417977660894394, "learning_rate": 0.0005, "loss": 2.1041, "step": 223390 }, { "epoch": 0.850315537860737, "grad_norm": 0.13214237987995148, "learning_rate": 0.0005, "loss": 2.1154, "step": 223400 }, { "epoch": 0.8503536003288598, "grad_norm": 0.1482153981924057, "learning_rate": 0.0005, "loss": 2.1215, "step": 223410 }, { "epoch": 0.8503916627969824, "grad_norm": 0.12344057857990265, "learning_rate": 0.0005, "loss": 2.0972, "step": 223420 }, { "epoch": 0.8504297252651051, "grad_norm": 0.1381489783525467, "learning_rate": 0.0005, "loss": 2.1, "step": 223430 }, { "epoch": 0.8504677877332277, "grad_norm": 0.13054247200489044, "learning_rate": 0.0005, "loss": 2.1036, "step": 223440 }, { "epoch": 0.8505058502013505, "grad_norm": 0.12159658968448639, "learning_rate": 0.0005, "loss": 2.1074, "step": 223450 }, { "epoch": 0.8505439126694732, "grad_norm": 0.11975965648889542, "learning_rate": 0.0005, "loss": 2.1139, "step": 223460 }, { "epoch": 0.8505819751375958, "grad_norm": 0.12309782952070236, "learning_rate": 0.0005, "loss": 2.1098, "step": 223470 }, { "epoch": 0.8506200376057185, "grad_norm": 0.12559787929058075, "learning_rate": 0.0005, "loss": 2.1089, "step": 223480 }, { "epoch": 0.8506581000738412, "grad_norm": 0.1299746036529541, "learning_rate": 0.0005, "loss": 2.0944, "step": 223490 }, { "epoch": 0.8506961625419639, "grad_norm": 0.12091419100761414, "learning_rate": 0.0005, "loss": 2.113, "step": 223500 }, { "epoch": 0.8507342250100866, "grad_norm": 0.12265986949205399, "learning_rate": 0.0005, "loss": 2.1147, "step": 223510 }, { "epoch": 0.8507722874782092, "grad_norm": 0.12044291198253632, "learning_rate": 0.0005, "loss": 2.1121, "step": 223520 }, { "epoch": 0.8508103499463319, "grad_norm": 0.1332319676876068, "learning_rate": 0.0005, "loss": 2.0979, "step": 223530 }, { "epoch": 0.8508484124144546, "grad_norm": 0.1274791657924652, "learning_rate": 0.0005, "loss": 2.1016, "step": 223540 }, { "epoch": 0.8508864748825773, "grad_norm": 0.1327950358390808, "learning_rate": 0.0005, "loss": 2.0953, "step": 223550 }, { "epoch": 0.8509245373507, "grad_norm": 0.12357247620820999, "learning_rate": 0.0005, "loss": 2.1175, "step": 223560 }, { "epoch": 0.8509625998188226, "grad_norm": 0.11313042044639587, "learning_rate": 0.0005, "loss": 2.1076, "step": 223570 }, { "epoch": 0.8510006622869454, "grad_norm": 0.12048235535621643, "learning_rate": 0.0005, "loss": 2.1064, "step": 223580 }, { "epoch": 0.851038724755068, "grad_norm": 0.1278276890516281, "learning_rate": 0.0005, "loss": 2.1242, "step": 223590 }, { "epoch": 0.8510767872231907, "grad_norm": 0.13198168575763702, "learning_rate": 0.0005, "loss": 2.1067, "step": 223600 }, { "epoch": 0.8511148496913133, "grad_norm": 0.13076524436473846, "learning_rate": 0.0005, "loss": 2.0989, "step": 223610 }, { "epoch": 0.8511529121594361, "grad_norm": 0.14714770019054413, "learning_rate": 0.0005, "loss": 2.1264, "step": 223620 }, { "epoch": 0.8511909746275588, "grad_norm": 0.1160232275724411, "learning_rate": 0.0005, "loss": 2.1058, "step": 223630 }, { "epoch": 0.8512290370956814, "grad_norm": 0.13508348166942596, "learning_rate": 0.0005, "loss": 2.1022, "step": 223640 }, { "epoch": 0.8512670995638041, "grad_norm": 0.12602970004081726, "learning_rate": 0.0005, "loss": 2.0971, "step": 223650 }, { "epoch": 0.8513051620319267, "grad_norm": 0.12282170355319977, "learning_rate": 0.0005, "loss": 2.0992, "step": 223660 }, { "epoch": 0.8513432245000495, "grad_norm": 0.12498307228088379, "learning_rate": 0.0005, "loss": 2.1191, "step": 223670 }, { "epoch": 0.8513812869681722, "grad_norm": 0.12019147723913193, "learning_rate": 0.0005, "loss": 2.1122, "step": 223680 }, { "epoch": 0.8514193494362948, "grad_norm": 0.1340957134962082, "learning_rate": 0.0005, "loss": 2.0962, "step": 223690 }, { "epoch": 0.8514574119044175, "grad_norm": 0.118826724588871, "learning_rate": 0.0005, "loss": 2.1048, "step": 223700 }, { "epoch": 0.8514954743725403, "grad_norm": 0.12180516868829727, "learning_rate": 0.0005, "loss": 2.1086, "step": 223710 }, { "epoch": 0.8515335368406629, "grad_norm": 0.1166292056441307, "learning_rate": 0.0005, "loss": 2.1061, "step": 223720 }, { "epoch": 0.8515715993087856, "grad_norm": 0.1198718249797821, "learning_rate": 0.0005, "loss": 2.1014, "step": 223730 }, { "epoch": 0.8516096617769082, "grad_norm": 0.14058130979537964, "learning_rate": 0.0005, "loss": 2.0963, "step": 223740 }, { "epoch": 0.851647724245031, "grad_norm": 0.13598237931728363, "learning_rate": 0.0005, "loss": 2.0961, "step": 223750 }, { "epoch": 0.8516857867131536, "grad_norm": 0.14740775525569916, "learning_rate": 0.0005, "loss": 2.1035, "step": 223760 }, { "epoch": 0.8517238491812763, "grad_norm": 0.12089115381240845, "learning_rate": 0.0005, "loss": 2.1311, "step": 223770 }, { "epoch": 0.851761911649399, "grad_norm": 0.1189613789319992, "learning_rate": 0.0005, "loss": 2.1087, "step": 223780 }, { "epoch": 0.8517999741175217, "grad_norm": 0.12509900331497192, "learning_rate": 0.0005, "loss": 2.0995, "step": 223790 }, { "epoch": 0.8518380365856444, "grad_norm": 0.12147897481918335, "learning_rate": 0.0005, "loss": 2.1064, "step": 223800 }, { "epoch": 0.851876099053767, "grad_norm": 0.13214780390262604, "learning_rate": 0.0005, "loss": 2.1082, "step": 223810 }, { "epoch": 0.8519141615218897, "grad_norm": 0.13353584706783295, "learning_rate": 0.0005, "loss": 2.1099, "step": 223820 }, { "epoch": 0.8519522239900124, "grad_norm": 0.14544062316417694, "learning_rate": 0.0005, "loss": 2.0955, "step": 223830 }, { "epoch": 0.8519902864581351, "grad_norm": 0.13279809057712555, "learning_rate": 0.0005, "loss": 2.0964, "step": 223840 }, { "epoch": 0.8520283489262578, "grad_norm": 0.13682380318641663, "learning_rate": 0.0005, "loss": 2.1154, "step": 223850 }, { "epoch": 0.8520664113943804, "grad_norm": 0.1213570311665535, "learning_rate": 0.0005, "loss": 2.1024, "step": 223860 }, { "epoch": 0.8521044738625031, "grad_norm": 0.116135373711586, "learning_rate": 0.0005, "loss": 2.1037, "step": 223870 }, { "epoch": 0.8521425363306259, "grad_norm": 0.11622577905654907, "learning_rate": 0.0005, "loss": 2.1002, "step": 223880 }, { "epoch": 0.8521805987987485, "grad_norm": 0.14203287661075592, "learning_rate": 0.0005, "loss": 2.1037, "step": 223890 }, { "epoch": 0.8522186612668712, "grad_norm": 0.14461076259613037, "learning_rate": 0.0005, "loss": 2.1155, "step": 223900 }, { "epoch": 0.8522567237349938, "grad_norm": 0.12584730982780457, "learning_rate": 0.0005, "loss": 2.1012, "step": 223910 }, { "epoch": 0.8522947862031166, "grad_norm": 0.11870056390762329, "learning_rate": 0.0005, "loss": 2.1135, "step": 223920 }, { "epoch": 0.8523328486712393, "grad_norm": 0.1315356343984604, "learning_rate": 0.0005, "loss": 2.0937, "step": 223930 }, { "epoch": 0.8523709111393619, "grad_norm": 0.12040794640779495, "learning_rate": 0.0005, "loss": 2.0966, "step": 223940 }, { "epoch": 0.8524089736074846, "grad_norm": 0.1251789778470993, "learning_rate": 0.0005, "loss": 2.1028, "step": 223950 }, { "epoch": 0.8524470360756072, "grad_norm": 0.13328661024570465, "learning_rate": 0.0005, "loss": 2.1086, "step": 223960 }, { "epoch": 0.85248509854373, "grad_norm": 0.12740342319011688, "learning_rate": 0.0005, "loss": 2.1061, "step": 223970 }, { "epoch": 0.8525231610118527, "grad_norm": 0.12411228567361832, "learning_rate": 0.0005, "loss": 2.0802, "step": 223980 }, { "epoch": 0.8525612234799753, "grad_norm": 0.13667641580104828, "learning_rate": 0.0005, "loss": 2.0946, "step": 223990 }, { "epoch": 0.852599285948098, "grad_norm": 0.12220586091279984, "learning_rate": 0.0005, "loss": 2.1099, "step": 224000 }, { "epoch": 0.8526373484162207, "grad_norm": 0.1269436925649643, "learning_rate": 0.0005, "loss": 2.096, "step": 224010 }, { "epoch": 0.8526754108843434, "grad_norm": 0.12647700309753418, "learning_rate": 0.0005, "loss": 2.1067, "step": 224020 }, { "epoch": 0.8527134733524661, "grad_norm": 0.11566011607646942, "learning_rate": 0.0005, "loss": 2.0988, "step": 224030 }, { "epoch": 0.8527515358205887, "grad_norm": 0.12147703021764755, "learning_rate": 0.0005, "loss": 2.0977, "step": 224040 }, { "epoch": 0.8527895982887115, "grad_norm": 0.11546335369348526, "learning_rate": 0.0005, "loss": 2.0968, "step": 224050 }, { "epoch": 0.8528276607568341, "grad_norm": 0.1299871951341629, "learning_rate": 0.0005, "loss": 2.1028, "step": 224060 }, { "epoch": 0.8528657232249568, "grad_norm": 0.12042056024074554, "learning_rate": 0.0005, "loss": 2.0932, "step": 224070 }, { "epoch": 0.8529037856930795, "grad_norm": 0.11888284981250763, "learning_rate": 0.0005, "loss": 2.0946, "step": 224080 }, { "epoch": 0.8529418481612021, "grad_norm": 0.13539917767047882, "learning_rate": 0.0005, "loss": 2.0836, "step": 224090 }, { "epoch": 0.8529799106293249, "grad_norm": 0.13094571232795715, "learning_rate": 0.0005, "loss": 2.1028, "step": 224100 }, { "epoch": 0.8530179730974475, "grad_norm": 0.11932310461997986, "learning_rate": 0.0005, "loss": 2.1099, "step": 224110 }, { "epoch": 0.8530560355655702, "grad_norm": 0.1398283690214157, "learning_rate": 0.0005, "loss": 2.1066, "step": 224120 }, { "epoch": 0.8530940980336928, "grad_norm": 0.12287000566720963, "learning_rate": 0.0005, "loss": 2.0949, "step": 224130 }, { "epoch": 0.8531321605018156, "grad_norm": 0.12652814388275146, "learning_rate": 0.0005, "loss": 2.1044, "step": 224140 }, { "epoch": 0.8531702229699383, "grad_norm": 0.1283009648323059, "learning_rate": 0.0005, "loss": 2.0946, "step": 224150 }, { "epoch": 0.8532082854380609, "grad_norm": 0.1335098296403885, "learning_rate": 0.0005, "loss": 2.1032, "step": 224160 }, { "epoch": 0.8532463479061836, "grad_norm": 0.12675894796848297, "learning_rate": 0.0005, "loss": 2.097, "step": 224170 }, { "epoch": 0.8532844103743064, "grad_norm": 0.12911538779735565, "learning_rate": 0.0005, "loss": 2.0915, "step": 224180 }, { "epoch": 0.853322472842429, "grad_norm": 0.12739954888820648, "learning_rate": 0.0005, "loss": 2.1115, "step": 224190 }, { "epoch": 0.8533605353105517, "grad_norm": 0.17836807668209076, "learning_rate": 0.0005, "loss": 2.0915, "step": 224200 }, { "epoch": 0.8533985977786743, "grad_norm": 0.12320113927125931, "learning_rate": 0.0005, "loss": 2.1106, "step": 224210 }, { "epoch": 0.8534366602467971, "grad_norm": 0.12615108489990234, "learning_rate": 0.0005, "loss": 2.106, "step": 224220 }, { "epoch": 0.8534747227149198, "grad_norm": 0.11938022822141647, "learning_rate": 0.0005, "loss": 2.0919, "step": 224230 }, { "epoch": 0.8535127851830424, "grad_norm": 0.1230238601565361, "learning_rate": 0.0005, "loss": 2.1133, "step": 224240 }, { "epoch": 0.8535508476511651, "grad_norm": 0.12678667902946472, "learning_rate": 0.0005, "loss": 2.1123, "step": 224250 }, { "epoch": 0.8535889101192877, "grad_norm": 0.1329822987318039, "learning_rate": 0.0005, "loss": 2.1032, "step": 224260 }, { "epoch": 0.8536269725874105, "grad_norm": 0.12265556305646896, "learning_rate": 0.0005, "loss": 2.1152, "step": 224270 }, { "epoch": 0.8536650350555332, "grad_norm": 0.12903083860874176, "learning_rate": 0.0005, "loss": 2.115, "step": 224280 }, { "epoch": 0.8537030975236558, "grad_norm": 0.13243375718593597, "learning_rate": 0.0005, "loss": 2.0904, "step": 224290 }, { "epoch": 0.8537411599917785, "grad_norm": 0.12826158106327057, "learning_rate": 0.0005, "loss": 2.1168, "step": 224300 }, { "epoch": 0.8537792224599012, "grad_norm": 0.12951309978961945, "learning_rate": 0.0005, "loss": 2.1118, "step": 224310 }, { "epoch": 0.8538172849280239, "grad_norm": 0.12756584584712982, "learning_rate": 0.0005, "loss": 2.0995, "step": 224320 }, { "epoch": 0.8538553473961465, "grad_norm": 0.12267711013555527, "learning_rate": 0.0005, "loss": 2.1014, "step": 224330 }, { "epoch": 0.8538934098642692, "grad_norm": 0.14301878213882446, "learning_rate": 0.0005, "loss": 2.1101, "step": 224340 }, { "epoch": 0.853931472332392, "grad_norm": 0.14637650549411774, "learning_rate": 0.0005, "loss": 2.1029, "step": 224350 }, { "epoch": 0.8539695348005146, "grad_norm": 0.11642546951770782, "learning_rate": 0.0005, "loss": 2.0914, "step": 224360 }, { "epoch": 0.8540075972686373, "grad_norm": 0.11581864207983017, "learning_rate": 0.0005, "loss": 2.0933, "step": 224370 }, { "epoch": 0.8540456597367599, "grad_norm": 0.1454319804906845, "learning_rate": 0.0005, "loss": 2.0971, "step": 224380 }, { "epoch": 0.8540837222048826, "grad_norm": 0.12507648766040802, "learning_rate": 0.0005, "loss": 2.1123, "step": 224390 }, { "epoch": 0.8541217846730054, "grad_norm": 0.11913920938968658, "learning_rate": 0.0005, "loss": 2.1034, "step": 224400 }, { "epoch": 0.854159847141128, "grad_norm": 0.1339544951915741, "learning_rate": 0.0005, "loss": 2.1042, "step": 224410 }, { "epoch": 0.8541979096092507, "grad_norm": 0.14385294914245605, "learning_rate": 0.0005, "loss": 2.1089, "step": 224420 }, { "epoch": 0.8542359720773733, "grad_norm": 0.14541102945804596, "learning_rate": 0.0005, "loss": 2.1077, "step": 224430 }, { "epoch": 0.8542740345454961, "grad_norm": 0.12655025720596313, "learning_rate": 0.0005, "loss": 2.1106, "step": 224440 }, { "epoch": 0.8543120970136188, "grad_norm": 0.1332712322473526, "learning_rate": 0.0005, "loss": 2.0887, "step": 224450 }, { "epoch": 0.8543501594817414, "grad_norm": 0.11915437132120132, "learning_rate": 0.0005, "loss": 2.1075, "step": 224460 }, { "epoch": 0.8543882219498641, "grad_norm": 0.13157260417938232, "learning_rate": 0.0005, "loss": 2.1093, "step": 224470 }, { "epoch": 0.8544262844179868, "grad_norm": 0.12617330253124237, "learning_rate": 0.0005, "loss": 2.1035, "step": 224480 }, { "epoch": 0.8544643468861095, "grad_norm": 0.116673544049263, "learning_rate": 0.0005, "loss": 2.0889, "step": 224490 }, { "epoch": 0.8545024093542322, "grad_norm": 0.1166527196764946, "learning_rate": 0.0005, "loss": 2.0988, "step": 224500 }, { "epoch": 0.8545404718223548, "grad_norm": 0.12571091949939728, "learning_rate": 0.0005, "loss": 2.0896, "step": 224510 }, { "epoch": 0.8545785342904776, "grad_norm": 0.1225324347615242, "learning_rate": 0.0005, "loss": 2.0977, "step": 224520 }, { "epoch": 0.8546165967586002, "grad_norm": 0.13173021376132965, "learning_rate": 0.0005, "loss": 2.1036, "step": 224530 }, { "epoch": 0.8546546592267229, "grad_norm": 0.11553698778152466, "learning_rate": 0.0005, "loss": 2.1122, "step": 224540 }, { "epoch": 0.8546927216948456, "grad_norm": 0.12536373734474182, "learning_rate": 0.0005, "loss": 2.1131, "step": 224550 }, { "epoch": 0.8547307841629682, "grad_norm": 0.13124406337738037, "learning_rate": 0.0005, "loss": 2.1158, "step": 224560 }, { "epoch": 0.854768846631091, "grad_norm": 0.1201077252626419, "learning_rate": 0.0005, "loss": 2.0968, "step": 224570 }, { "epoch": 0.8548069090992136, "grad_norm": 0.13268786668777466, "learning_rate": 0.0005, "loss": 2.1068, "step": 224580 }, { "epoch": 0.8548449715673363, "grad_norm": 0.1516837626695633, "learning_rate": 0.0005, "loss": 2.0951, "step": 224590 }, { "epoch": 0.854883034035459, "grad_norm": 0.11803478002548218, "learning_rate": 0.0005, "loss": 2.1009, "step": 224600 }, { "epoch": 0.8549210965035817, "grad_norm": 0.11986857652664185, "learning_rate": 0.0005, "loss": 2.1046, "step": 224610 }, { "epoch": 0.8549591589717044, "grad_norm": 0.11951293796300888, "learning_rate": 0.0005, "loss": 2.1023, "step": 224620 }, { "epoch": 0.854997221439827, "grad_norm": 0.1232936680316925, "learning_rate": 0.0005, "loss": 2.0982, "step": 224630 }, { "epoch": 0.8550352839079497, "grad_norm": 0.12167305499315262, "learning_rate": 0.0005, "loss": 2.1082, "step": 224640 }, { "epoch": 0.8550733463760725, "grad_norm": 0.12915948033332825, "learning_rate": 0.0005, "loss": 2.1019, "step": 224650 }, { "epoch": 0.8551114088441951, "grad_norm": 0.12672092020511627, "learning_rate": 0.0005, "loss": 2.112, "step": 224660 }, { "epoch": 0.8551494713123178, "grad_norm": 0.1322372853755951, "learning_rate": 0.0005, "loss": 2.0963, "step": 224670 }, { "epoch": 0.8551875337804404, "grad_norm": 0.13333991169929504, "learning_rate": 0.0005, "loss": 2.1005, "step": 224680 }, { "epoch": 0.8552255962485631, "grad_norm": 0.11972727626562119, "learning_rate": 0.0005, "loss": 2.1025, "step": 224690 }, { "epoch": 0.8552636587166859, "grad_norm": 0.12012451142072678, "learning_rate": 0.0005, "loss": 2.1106, "step": 224700 }, { "epoch": 0.8553017211848085, "grad_norm": 0.1278320550918579, "learning_rate": 0.0005, "loss": 2.0922, "step": 224710 }, { "epoch": 0.8553397836529312, "grad_norm": 0.12670546770095825, "learning_rate": 0.0005, "loss": 2.1165, "step": 224720 }, { "epoch": 0.8553778461210538, "grad_norm": 0.12710654735565186, "learning_rate": 0.0005, "loss": 2.0964, "step": 224730 }, { "epoch": 0.8554159085891766, "grad_norm": 0.12144336849451065, "learning_rate": 0.0005, "loss": 2.0966, "step": 224740 }, { "epoch": 0.8554539710572993, "grad_norm": 0.136688232421875, "learning_rate": 0.0005, "loss": 2.1071, "step": 224750 }, { "epoch": 0.8554920335254219, "grad_norm": 0.12287929654121399, "learning_rate": 0.0005, "loss": 2.0962, "step": 224760 }, { "epoch": 0.8555300959935446, "grad_norm": 0.13087445497512817, "learning_rate": 0.0005, "loss": 2.1043, "step": 224770 }, { "epoch": 0.8555681584616673, "grad_norm": 0.1203342080116272, "learning_rate": 0.0005, "loss": 2.0981, "step": 224780 }, { "epoch": 0.85560622092979, "grad_norm": 0.1329360008239746, "learning_rate": 0.0005, "loss": 2.1069, "step": 224790 }, { "epoch": 0.8556442833979127, "grad_norm": 0.13662736117839813, "learning_rate": 0.0005, "loss": 2.1053, "step": 224800 }, { "epoch": 0.8556823458660353, "grad_norm": 0.12587331235408783, "learning_rate": 0.0005, "loss": 2.1075, "step": 224810 }, { "epoch": 0.855720408334158, "grad_norm": 0.1271894872188568, "learning_rate": 0.0005, "loss": 2.1074, "step": 224820 }, { "epoch": 0.8557584708022807, "grad_norm": 0.13241060078144073, "learning_rate": 0.0005, "loss": 2.098, "step": 224830 }, { "epoch": 0.8557965332704034, "grad_norm": 0.14475074410438538, "learning_rate": 0.0005, "loss": 2.1018, "step": 224840 }, { "epoch": 0.855834595738526, "grad_norm": 0.12807601690292358, "learning_rate": 0.0005, "loss": 2.1067, "step": 224850 }, { "epoch": 0.8558726582066487, "grad_norm": 0.13161490857601166, "learning_rate": 0.0005, "loss": 2.1114, "step": 224860 }, { "epoch": 0.8559107206747715, "grad_norm": 0.12225492298603058, "learning_rate": 0.0005, "loss": 2.088, "step": 224870 }, { "epoch": 0.8559487831428941, "grad_norm": 0.11510666459798813, "learning_rate": 0.0005, "loss": 2.0963, "step": 224880 }, { "epoch": 0.8559868456110168, "grad_norm": 0.11517883092164993, "learning_rate": 0.0005, "loss": 2.1087, "step": 224890 }, { "epoch": 0.8560249080791394, "grad_norm": 0.1427791714668274, "learning_rate": 0.0005, "loss": 2.1174, "step": 224900 }, { "epoch": 0.8560629705472622, "grad_norm": 0.14692747592926025, "learning_rate": 0.0005, "loss": 2.1148, "step": 224910 }, { "epoch": 0.8561010330153849, "grad_norm": 0.13827954232692719, "learning_rate": 0.0005, "loss": 2.1172, "step": 224920 }, { "epoch": 0.8561390954835075, "grad_norm": 0.11719125509262085, "learning_rate": 0.0005, "loss": 2.1109, "step": 224930 }, { "epoch": 0.8561771579516302, "grad_norm": 0.12412826716899872, "learning_rate": 0.0005, "loss": 2.0967, "step": 224940 }, { "epoch": 0.856215220419753, "grad_norm": 0.12181001156568527, "learning_rate": 0.0005, "loss": 2.1036, "step": 224950 }, { "epoch": 0.8562532828878756, "grad_norm": 0.1239355206489563, "learning_rate": 0.0005, "loss": 2.1072, "step": 224960 }, { "epoch": 0.8562913453559983, "grad_norm": 0.12484218925237656, "learning_rate": 0.0005, "loss": 2.1094, "step": 224970 }, { "epoch": 0.8563294078241209, "grad_norm": 0.12160801142454147, "learning_rate": 0.0005, "loss": 2.0942, "step": 224980 }, { "epoch": 0.8563674702922436, "grad_norm": 0.12800124287605286, "learning_rate": 0.0005, "loss": 2.1096, "step": 224990 }, { "epoch": 0.8564055327603664, "grad_norm": 0.12416477501392365, "learning_rate": 0.0005, "loss": 2.097, "step": 225000 }, { "epoch": 0.856443595228489, "grad_norm": 0.14295855164527893, "learning_rate": 0.0005, "loss": 2.1082, "step": 225010 }, { "epoch": 0.8564816576966117, "grad_norm": 0.21226909756660461, "learning_rate": 0.0005, "loss": 2.1154, "step": 225020 }, { "epoch": 0.8565197201647343, "grad_norm": 0.11625513434410095, "learning_rate": 0.0005, "loss": 2.0995, "step": 225030 }, { "epoch": 0.8565577826328571, "grad_norm": 0.1246718093752861, "learning_rate": 0.0005, "loss": 2.1142, "step": 225040 }, { "epoch": 0.8565958451009797, "grad_norm": 0.13294856250286102, "learning_rate": 0.0005, "loss": 2.122, "step": 225050 }, { "epoch": 0.8566339075691024, "grad_norm": 0.13694648444652557, "learning_rate": 0.0005, "loss": 2.1076, "step": 225060 }, { "epoch": 0.8566719700372251, "grad_norm": 0.11973528563976288, "learning_rate": 0.0005, "loss": 2.101, "step": 225070 }, { "epoch": 0.8567100325053478, "grad_norm": 0.13602188229560852, "learning_rate": 0.0005, "loss": 2.0973, "step": 225080 }, { "epoch": 0.8567480949734705, "grad_norm": 0.13067738711833954, "learning_rate": 0.0005, "loss": 2.0949, "step": 225090 }, { "epoch": 0.8567861574415931, "grad_norm": 0.13641096651554108, "learning_rate": 0.0005, "loss": 2.0835, "step": 225100 }, { "epoch": 0.8568242199097158, "grad_norm": 0.12239253520965576, "learning_rate": 0.0005, "loss": 2.1059, "step": 225110 }, { "epoch": 0.8568622823778385, "grad_norm": 0.127982959151268, "learning_rate": 0.0005, "loss": 2.1123, "step": 225120 }, { "epoch": 0.8569003448459612, "grad_norm": 0.11979227513074875, "learning_rate": 0.0005, "loss": 2.1108, "step": 225130 }, { "epoch": 0.8569384073140839, "grad_norm": 0.14429311454296112, "learning_rate": 0.0005, "loss": 2.1208, "step": 225140 }, { "epoch": 0.8569764697822065, "grad_norm": 0.12793608009815216, "learning_rate": 0.0005, "loss": 2.1161, "step": 225150 }, { "epoch": 0.8570145322503292, "grad_norm": 0.12261246889829636, "learning_rate": 0.0005, "loss": 2.0988, "step": 225160 }, { "epoch": 0.857052594718452, "grad_norm": 0.12844735383987427, "learning_rate": 0.0005, "loss": 2.1152, "step": 225170 }, { "epoch": 0.8570906571865746, "grad_norm": 0.12870533764362335, "learning_rate": 0.0005, "loss": 2.0932, "step": 225180 }, { "epoch": 0.8571287196546973, "grad_norm": 0.1424136906862259, "learning_rate": 0.0005, "loss": 2.1058, "step": 225190 }, { "epoch": 0.8571667821228199, "grad_norm": 0.12998011708259583, "learning_rate": 0.0005, "loss": 2.1073, "step": 225200 }, { "epoch": 0.8572048445909427, "grad_norm": 0.1247173473238945, "learning_rate": 0.0005, "loss": 2.0944, "step": 225210 }, { "epoch": 0.8572429070590654, "grad_norm": 0.12054622918367386, "learning_rate": 0.0005, "loss": 2.0907, "step": 225220 }, { "epoch": 0.857280969527188, "grad_norm": 0.1245812177658081, "learning_rate": 0.0005, "loss": 2.1055, "step": 225230 }, { "epoch": 0.8573190319953107, "grad_norm": 0.13140606880187988, "learning_rate": 0.0005, "loss": 2.1019, "step": 225240 }, { "epoch": 0.8573570944634333, "grad_norm": 0.13697367906570435, "learning_rate": 0.0005, "loss": 2.1006, "step": 225250 }, { "epoch": 0.8573951569315561, "grad_norm": 0.13572590053081512, "learning_rate": 0.0005, "loss": 2.1079, "step": 225260 }, { "epoch": 0.8574332193996788, "grad_norm": 0.1267860233783722, "learning_rate": 0.0005, "loss": 2.1042, "step": 225270 }, { "epoch": 0.8574712818678014, "grad_norm": 0.1345335990190506, "learning_rate": 0.0005, "loss": 2.1091, "step": 225280 }, { "epoch": 0.8575093443359241, "grad_norm": 0.12333787232637405, "learning_rate": 0.0005, "loss": 2.0963, "step": 225290 }, { "epoch": 0.8575474068040468, "grad_norm": 0.12491423636674881, "learning_rate": 0.0005, "loss": 2.0999, "step": 225300 }, { "epoch": 0.8575854692721695, "grad_norm": 0.12076954543590546, "learning_rate": 0.0005, "loss": 2.0976, "step": 225310 }, { "epoch": 0.8576235317402922, "grad_norm": 0.127850741147995, "learning_rate": 0.0005, "loss": 2.1108, "step": 225320 }, { "epoch": 0.8576615942084148, "grad_norm": 0.12069816142320633, "learning_rate": 0.0005, "loss": 2.1022, "step": 225330 }, { "epoch": 0.8576996566765376, "grad_norm": 0.13940003514289856, "learning_rate": 0.0005, "loss": 2.0992, "step": 225340 }, { "epoch": 0.8577377191446602, "grad_norm": 0.13708564639091492, "learning_rate": 0.0005, "loss": 2.1033, "step": 225350 }, { "epoch": 0.8577757816127829, "grad_norm": 0.12923499941825867, "learning_rate": 0.0005, "loss": 2.107, "step": 225360 }, { "epoch": 0.8578138440809056, "grad_norm": 0.12485264986753464, "learning_rate": 0.0005, "loss": 2.0914, "step": 225370 }, { "epoch": 0.8578519065490283, "grad_norm": 0.131544828414917, "learning_rate": 0.0005, "loss": 2.1044, "step": 225380 }, { "epoch": 0.857889969017151, "grad_norm": 0.13050268590450287, "learning_rate": 0.0005, "loss": 2.1037, "step": 225390 }, { "epoch": 0.8579280314852736, "grad_norm": 0.1251644492149353, "learning_rate": 0.0005, "loss": 2.0943, "step": 225400 }, { "epoch": 0.8579660939533963, "grad_norm": 0.13173510134220123, "learning_rate": 0.0005, "loss": 2.0995, "step": 225410 }, { "epoch": 0.858004156421519, "grad_norm": 0.12391963601112366, "learning_rate": 0.0005, "loss": 2.0996, "step": 225420 }, { "epoch": 0.8580422188896417, "grad_norm": 0.1279720813035965, "learning_rate": 0.0005, "loss": 2.0961, "step": 225430 }, { "epoch": 0.8580802813577644, "grad_norm": 0.11828334629535675, "learning_rate": 0.0005, "loss": 2.1063, "step": 225440 }, { "epoch": 0.858118343825887, "grad_norm": 0.12441591918468475, "learning_rate": 0.0005, "loss": 2.1229, "step": 225450 }, { "epoch": 0.8581564062940097, "grad_norm": 0.12340865284204483, "learning_rate": 0.0005, "loss": 2.1037, "step": 225460 }, { "epoch": 0.8581944687621325, "grad_norm": 0.11606843024492264, "learning_rate": 0.0005, "loss": 2.1109, "step": 225470 }, { "epoch": 0.8582325312302551, "grad_norm": 0.12435857206583023, "learning_rate": 0.0005, "loss": 2.1138, "step": 225480 }, { "epoch": 0.8582705936983778, "grad_norm": 0.12200822681188583, "learning_rate": 0.0005, "loss": 2.124, "step": 225490 }, { "epoch": 0.8583086561665004, "grad_norm": 0.1329309344291687, "learning_rate": 0.0005, "loss": 2.1019, "step": 225500 }, { "epoch": 0.8583467186346232, "grad_norm": 0.12208396941423416, "learning_rate": 0.0005, "loss": 2.1145, "step": 225510 }, { "epoch": 0.8583847811027459, "grad_norm": 0.12494415789842606, "learning_rate": 0.0005, "loss": 2.0953, "step": 225520 }, { "epoch": 0.8584228435708685, "grad_norm": 0.13383197784423828, "learning_rate": 0.0005, "loss": 2.1067, "step": 225530 }, { "epoch": 0.8584609060389912, "grad_norm": 0.13817362487316132, "learning_rate": 0.0005, "loss": 2.0963, "step": 225540 }, { "epoch": 0.8584989685071138, "grad_norm": 0.12982138991355896, "learning_rate": 0.0005, "loss": 2.097, "step": 225550 }, { "epoch": 0.8585370309752366, "grad_norm": 0.12778323888778687, "learning_rate": 0.0005, "loss": 2.1068, "step": 225560 }, { "epoch": 0.8585750934433592, "grad_norm": 0.13495448231697083, "learning_rate": 0.0005, "loss": 2.1054, "step": 225570 }, { "epoch": 0.8586131559114819, "grad_norm": 0.12340793013572693, "learning_rate": 0.0005, "loss": 2.1018, "step": 225580 }, { "epoch": 0.8586512183796046, "grad_norm": 0.12468979507684708, "learning_rate": 0.0005, "loss": 2.1028, "step": 225590 }, { "epoch": 0.8586892808477273, "grad_norm": 0.12642750144004822, "learning_rate": 0.0005, "loss": 2.1203, "step": 225600 }, { "epoch": 0.85872734331585, "grad_norm": 0.1208629161119461, "learning_rate": 0.0005, "loss": 2.1088, "step": 225610 }, { "epoch": 0.8587654057839726, "grad_norm": 0.11862723529338837, "learning_rate": 0.0005, "loss": 2.1116, "step": 225620 }, { "epoch": 0.8588034682520953, "grad_norm": 0.1195588931441307, "learning_rate": 0.0005, "loss": 2.1027, "step": 225630 }, { "epoch": 0.8588415307202181, "grad_norm": 0.12250778824090958, "learning_rate": 0.0005, "loss": 2.112, "step": 225640 }, { "epoch": 0.8588795931883407, "grad_norm": 0.13086935877799988, "learning_rate": 0.0005, "loss": 2.108, "step": 225650 }, { "epoch": 0.8589176556564634, "grad_norm": 0.11836916953325272, "learning_rate": 0.0005, "loss": 2.1019, "step": 225660 }, { "epoch": 0.858955718124586, "grad_norm": 0.12411430478096008, "learning_rate": 0.0005, "loss": 2.1163, "step": 225670 }, { "epoch": 0.8589937805927087, "grad_norm": 0.12509943544864655, "learning_rate": 0.0005, "loss": 2.0843, "step": 225680 }, { "epoch": 0.8590318430608315, "grad_norm": 0.14191097021102905, "learning_rate": 0.0005, "loss": 2.1058, "step": 225690 }, { "epoch": 0.8590699055289541, "grad_norm": 0.13417589664459229, "learning_rate": 0.0005, "loss": 2.0949, "step": 225700 }, { "epoch": 0.8591079679970768, "grad_norm": 0.1313231736421585, "learning_rate": 0.0005, "loss": 2.09, "step": 225710 }, { "epoch": 0.8591460304651994, "grad_norm": 0.12625999748706818, "learning_rate": 0.0005, "loss": 2.0932, "step": 225720 }, { "epoch": 0.8591840929333222, "grad_norm": 0.13181033730506897, "learning_rate": 0.0005, "loss": 2.1052, "step": 225730 }, { "epoch": 0.8592221554014449, "grad_norm": 0.13501916825771332, "learning_rate": 0.0005, "loss": 2.0981, "step": 225740 }, { "epoch": 0.8592602178695675, "grad_norm": 0.12991739809513092, "learning_rate": 0.0005, "loss": 2.097, "step": 225750 }, { "epoch": 0.8592982803376902, "grad_norm": 0.13025683164596558, "learning_rate": 0.0005, "loss": 2.1052, "step": 225760 }, { "epoch": 0.859336342805813, "grad_norm": 0.12073546648025513, "learning_rate": 0.0005, "loss": 2.0893, "step": 225770 }, { "epoch": 0.8593744052739356, "grad_norm": 0.12659108638763428, "learning_rate": 0.0005, "loss": 2.1127, "step": 225780 }, { "epoch": 0.8594124677420583, "grad_norm": 0.12313074618577957, "learning_rate": 0.0005, "loss": 2.0964, "step": 225790 }, { "epoch": 0.8594505302101809, "grad_norm": 0.12898115813732147, "learning_rate": 0.0005, "loss": 2.1037, "step": 225800 }, { "epoch": 0.8594885926783037, "grad_norm": 0.1190689355134964, "learning_rate": 0.0005, "loss": 2.118, "step": 225810 }, { "epoch": 0.8595266551464263, "grad_norm": 0.13219016790390015, "learning_rate": 0.0005, "loss": 2.089, "step": 225820 }, { "epoch": 0.859564717614549, "grad_norm": 0.14062274992465973, "learning_rate": 0.0005, "loss": 2.1105, "step": 225830 }, { "epoch": 0.8596027800826717, "grad_norm": 0.11882352828979492, "learning_rate": 0.0005, "loss": 2.092, "step": 225840 }, { "epoch": 0.8596408425507943, "grad_norm": 0.13080590963363647, "learning_rate": 0.0005, "loss": 2.1033, "step": 225850 }, { "epoch": 0.8596789050189171, "grad_norm": 0.1176263764500618, "learning_rate": 0.0005, "loss": 2.1022, "step": 225860 }, { "epoch": 0.8597169674870397, "grad_norm": 0.12545640766620636, "learning_rate": 0.0005, "loss": 2.0974, "step": 225870 }, { "epoch": 0.8597550299551624, "grad_norm": 0.13276277482509613, "learning_rate": 0.0005, "loss": 2.1071, "step": 225880 }, { "epoch": 0.859793092423285, "grad_norm": 0.13441850244998932, "learning_rate": 0.0005, "loss": 2.1151, "step": 225890 }, { "epoch": 0.8598311548914078, "grad_norm": 0.12007158249616623, "learning_rate": 0.0005, "loss": 2.1005, "step": 225900 }, { "epoch": 0.8598692173595305, "grad_norm": 0.13201579451560974, "learning_rate": 0.0005, "loss": 2.098, "step": 225910 }, { "epoch": 0.8599072798276531, "grad_norm": 0.12150247395038605, "learning_rate": 0.0005, "loss": 2.1122, "step": 225920 }, { "epoch": 0.8599453422957758, "grad_norm": 0.12309324741363525, "learning_rate": 0.0005, "loss": 2.1095, "step": 225930 }, { "epoch": 0.8599834047638986, "grad_norm": 0.11839766800403595, "learning_rate": 0.0005, "loss": 2.0867, "step": 225940 }, { "epoch": 0.8600214672320212, "grad_norm": 0.13090623915195465, "learning_rate": 0.0005, "loss": 2.101, "step": 225950 }, { "epoch": 0.8600595297001439, "grad_norm": 0.13575056195259094, "learning_rate": 0.0005, "loss": 2.1121, "step": 225960 }, { "epoch": 0.8600975921682665, "grad_norm": 0.127581387758255, "learning_rate": 0.0005, "loss": 2.1069, "step": 225970 }, { "epoch": 0.8601356546363892, "grad_norm": 0.12778790295124054, "learning_rate": 0.0005, "loss": 2.1045, "step": 225980 }, { "epoch": 0.860173717104512, "grad_norm": 0.12477439641952515, "learning_rate": 0.0005, "loss": 2.1024, "step": 225990 }, { "epoch": 0.8602117795726346, "grad_norm": 0.11519020795822144, "learning_rate": 0.0005, "loss": 2.1047, "step": 226000 }, { "epoch": 0.8602498420407573, "grad_norm": 0.12137072533369064, "learning_rate": 0.0005, "loss": 2.1144, "step": 226010 }, { "epoch": 0.8602879045088799, "grad_norm": 0.12396124005317688, "learning_rate": 0.0005, "loss": 2.1046, "step": 226020 }, { "epoch": 0.8603259669770027, "grad_norm": 0.13137753307819366, "learning_rate": 0.0005, "loss": 2.0965, "step": 226030 }, { "epoch": 0.8603640294451254, "grad_norm": 0.1262117326259613, "learning_rate": 0.0005, "loss": 2.1239, "step": 226040 }, { "epoch": 0.860402091913248, "grad_norm": 0.12531767785549164, "learning_rate": 0.0005, "loss": 2.0982, "step": 226050 }, { "epoch": 0.8604401543813707, "grad_norm": 0.12092337757349014, "learning_rate": 0.0005, "loss": 2.0964, "step": 226060 }, { "epoch": 0.8604782168494934, "grad_norm": 0.13076327741146088, "learning_rate": 0.0005, "loss": 2.1165, "step": 226070 }, { "epoch": 0.8605162793176161, "grad_norm": 0.12410133332014084, "learning_rate": 0.0005, "loss": 2.1056, "step": 226080 }, { "epoch": 0.8605543417857388, "grad_norm": 0.1234215795993805, "learning_rate": 0.0005, "loss": 2.1101, "step": 226090 }, { "epoch": 0.8605924042538614, "grad_norm": 0.13111284375190735, "learning_rate": 0.0005, "loss": 2.1007, "step": 226100 }, { "epoch": 0.8606304667219841, "grad_norm": 0.11520785838365555, "learning_rate": 0.0005, "loss": 2.0976, "step": 226110 }, { "epoch": 0.8606685291901068, "grad_norm": 0.12031671404838562, "learning_rate": 0.0005, "loss": 2.1043, "step": 226120 }, { "epoch": 0.8607065916582295, "grad_norm": 0.12421654164791107, "learning_rate": 0.0005, "loss": 2.097, "step": 226130 }, { "epoch": 0.8607446541263521, "grad_norm": 0.12553544342517853, "learning_rate": 0.0005, "loss": 2.105, "step": 226140 }, { "epoch": 0.8607827165944748, "grad_norm": 0.12797226011753082, "learning_rate": 0.0005, "loss": 2.0918, "step": 226150 }, { "epoch": 0.8608207790625976, "grad_norm": 0.13866060972213745, "learning_rate": 0.0005, "loss": 2.0974, "step": 226160 }, { "epoch": 0.8608588415307202, "grad_norm": 0.1263991892337799, "learning_rate": 0.0005, "loss": 2.0962, "step": 226170 }, { "epoch": 0.8608969039988429, "grad_norm": 0.1374591439962387, "learning_rate": 0.0005, "loss": 2.098, "step": 226180 }, { "epoch": 0.8609349664669655, "grad_norm": 0.12693995237350464, "learning_rate": 0.0005, "loss": 2.1163, "step": 226190 }, { "epoch": 0.8609730289350883, "grad_norm": 0.1270032674074173, "learning_rate": 0.0005, "loss": 2.1065, "step": 226200 }, { "epoch": 0.861011091403211, "grad_norm": 0.13378532230854034, "learning_rate": 0.0005, "loss": 2.0968, "step": 226210 }, { "epoch": 0.8610491538713336, "grad_norm": 0.1346651166677475, "learning_rate": 0.0005, "loss": 2.1093, "step": 226220 }, { "epoch": 0.8610872163394563, "grad_norm": 0.12393094599246979, "learning_rate": 0.0005, "loss": 2.1129, "step": 226230 }, { "epoch": 0.861125278807579, "grad_norm": 0.15087266266345978, "learning_rate": 0.0005, "loss": 2.0978, "step": 226240 }, { "epoch": 0.8611633412757017, "grad_norm": 0.11483550816774368, "learning_rate": 0.0005, "loss": 2.1081, "step": 226250 }, { "epoch": 0.8612014037438244, "grad_norm": 0.1254885494709015, "learning_rate": 0.0005, "loss": 2.0951, "step": 226260 }, { "epoch": 0.861239466211947, "grad_norm": 0.12374576181173325, "learning_rate": 0.0005, "loss": 2.1055, "step": 226270 }, { "epoch": 0.8612775286800697, "grad_norm": 0.12660790979862213, "learning_rate": 0.0005, "loss": 2.1078, "step": 226280 }, { "epoch": 0.8613155911481924, "grad_norm": 0.12744198739528656, "learning_rate": 0.0005, "loss": 2.1077, "step": 226290 }, { "epoch": 0.8613536536163151, "grad_norm": 0.12421263754367828, "learning_rate": 0.0005, "loss": 2.0946, "step": 226300 }, { "epoch": 0.8613917160844378, "grad_norm": 0.11595192551612854, "learning_rate": 0.0005, "loss": 2.0966, "step": 226310 }, { "epoch": 0.8614297785525604, "grad_norm": 0.12452074885368347, "learning_rate": 0.0005, "loss": 2.0938, "step": 226320 }, { "epoch": 0.8614678410206832, "grad_norm": 0.12246125936508179, "learning_rate": 0.0005, "loss": 2.1145, "step": 226330 }, { "epoch": 0.8615059034888058, "grad_norm": 0.13508029282093048, "learning_rate": 0.0005, "loss": 2.1007, "step": 226340 }, { "epoch": 0.8615439659569285, "grad_norm": 0.11969486624002457, "learning_rate": 0.0005, "loss": 2.0936, "step": 226350 }, { "epoch": 0.8615820284250512, "grad_norm": 0.1402081400156021, "learning_rate": 0.0005, "loss": 2.1014, "step": 226360 }, { "epoch": 0.8616200908931739, "grad_norm": 0.1315373182296753, "learning_rate": 0.0005, "loss": 2.1153, "step": 226370 }, { "epoch": 0.8616581533612966, "grad_norm": 0.13985498249530792, "learning_rate": 0.0005, "loss": 2.0986, "step": 226380 }, { "epoch": 0.8616962158294192, "grad_norm": 0.12545058131217957, "learning_rate": 0.0005, "loss": 2.0904, "step": 226390 }, { "epoch": 0.8617342782975419, "grad_norm": 0.17630834877490997, "learning_rate": 0.0005, "loss": 2.097, "step": 226400 }, { "epoch": 0.8617723407656646, "grad_norm": 0.13177427649497986, "learning_rate": 0.0005, "loss": 2.0995, "step": 226410 }, { "epoch": 0.8618104032337873, "grad_norm": 0.12645024061203003, "learning_rate": 0.0005, "loss": 2.0984, "step": 226420 }, { "epoch": 0.86184846570191, "grad_norm": 0.13735495507717133, "learning_rate": 0.0005, "loss": 2.0954, "step": 226430 }, { "epoch": 0.8618865281700326, "grad_norm": 0.13273243606090546, "learning_rate": 0.0005, "loss": 2.1046, "step": 226440 }, { "epoch": 0.8619245906381553, "grad_norm": 0.13654807209968567, "learning_rate": 0.0005, "loss": 2.101, "step": 226450 }, { "epoch": 0.8619626531062781, "grad_norm": 0.11886227875947952, "learning_rate": 0.0005, "loss": 2.1124, "step": 226460 }, { "epoch": 0.8620007155744007, "grad_norm": 0.1377406120300293, "learning_rate": 0.0005, "loss": 2.1019, "step": 226470 }, { "epoch": 0.8620387780425234, "grad_norm": 0.13204039633274078, "learning_rate": 0.0005, "loss": 2.1068, "step": 226480 }, { "epoch": 0.862076840510646, "grad_norm": 0.13246789574623108, "learning_rate": 0.0005, "loss": 2.1171, "step": 226490 }, { "epoch": 0.8621149029787688, "grad_norm": 0.14469172060489655, "learning_rate": 0.0005, "loss": 2.1078, "step": 226500 }, { "epoch": 0.8621529654468915, "grad_norm": 0.13322791457176208, "learning_rate": 0.0005, "loss": 2.0996, "step": 226510 }, { "epoch": 0.8621910279150141, "grad_norm": 0.13450798392295837, "learning_rate": 0.0005, "loss": 2.0884, "step": 226520 }, { "epoch": 0.8622290903831368, "grad_norm": 0.12661725282669067, "learning_rate": 0.0005, "loss": 2.0988, "step": 226530 }, { "epoch": 0.8622671528512594, "grad_norm": 0.12003296613693237, "learning_rate": 0.0005, "loss": 2.0919, "step": 226540 }, { "epoch": 0.8623052153193822, "grad_norm": 0.16400742530822754, "learning_rate": 0.0005, "loss": 2.1124, "step": 226550 }, { "epoch": 0.8623432777875049, "grad_norm": 0.12328396737575531, "learning_rate": 0.0005, "loss": 2.1073, "step": 226560 }, { "epoch": 0.8623813402556275, "grad_norm": 0.11483404785394669, "learning_rate": 0.0005, "loss": 2.0864, "step": 226570 }, { "epoch": 0.8624194027237502, "grad_norm": 0.12388511002063751, "learning_rate": 0.0005, "loss": 2.1091, "step": 226580 }, { "epoch": 0.8624574651918729, "grad_norm": 0.11999114602804184, "learning_rate": 0.0005, "loss": 2.1087, "step": 226590 }, { "epoch": 0.8624955276599956, "grad_norm": 0.12741059064865112, "learning_rate": 0.0005, "loss": 2.1085, "step": 226600 }, { "epoch": 0.8625335901281183, "grad_norm": 0.12094668298959732, "learning_rate": 0.0005, "loss": 2.1063, "step": 226610 }, { "epoch": 0.8625716525962409, "grad_norm": 0.11427821218967438, "learning_rate": 0.0005, "loss": 2.0967, "step": 226620 }, { "epoch": 0.8626097150643637, "grad_norm": 0.12909498810768127, "learning_rate": 0.0005, "loss": 2.1021, "step": 226630 }, { "epoch": 0.8626477775324863, "grad_norm": 0.29847195744514465, "learning_rate": 0.0005, "loss": 2.1104, "step": 226640 }, { "epoch": 0.862685840000609, "grad_norm": 0.1321536898612976, "learning_rate": 0.0005, "loss": 2.0969, "step": 226650 }, { "epoch": 0.8627239024687317, "grad_norm": 0.12902212142944336, "learning_rate": 0.0005, "loss": 2.1088, "step": 226660 }, { "epoch": 0.8627619649368544, "grad_norm": 0.13099472224712372, "learning_rate": 0.0005, "loss": 2.0955, "step": 226670 }, { "epoch": 0.8628000274049771, "grad_norm": 0.12781858444213867, "learning_rate": 0.0005, "loss": 2.0906, "step": 226680 }, { "epoch": 0.8628380898730997, "grad_norm": 0.12633411586284637, "learning_rate": 0.0005, "loss": 2.0903, "step": 226690 }, { "epoch": 0.8628761523412224, "grad_norm": 0.13626986742019653, "learning_rate": 0.0005, "loss": 2.1017, "step": 226700 }, { "epoch": 0.862914214809345, "grad_norm": 0.13220657408237457, "learning_rate": 0.0005, "loss": 2.1131, "step": 226710 }, { "epoch": 0.8629522772774678, "grad_norm": 0.11806018650531769, "learning_rate": 0.0005, "loss": 2.1141, "step": 226720 }, { "epoch": 0.8629903397455905, "grad_norm": 0.12303390353918076, "learning_rate": 0.0005, "loss": 2.1124, "step": 226730 }, { "epoch": 0.8630284022137131, "grad_norm": 0.12535367906093597, "learning_rate": 0.0005, "loss": 2.1101, "step": 226740 }, { "epoch": 0.8630664646818358, "grad_norm": 0.12739968299865723, "learning_rate": 0.0005, "loss": 2.0964, "step": 226750 }, { "epoch": 0.8631045271499586, "grad_norm": 0.12450426071882248, "learning_rate": 0.0005, "loss": 2.0947, "step": 226760 }, { "epoch": 0.8631425896180812, "grad_norm": 0.12579986453056335, "learning_rate": 0.0005, "loss": 2.1173, "step": 226770 }, { "epoch": 0.8631806520862039, "grad_norm": 0.1489512324333191, "learning_rate": 0.0005, "loss": 2.0969, "step": 226780 }, { "epoch": 0.8632187145543265, "grad_norm": 0.12502668797969818, "learning_rate": 0.0005, "loss": 2.0738, "step": 226790 }, { "epoch": 0.8632567770224493, "grad_norm": 0.13363705575466156, "learning_rate": 0.0005, "loss": 2.0976, "step": 226800 }, { "epoch": 0.863294839490572, "grad_norm": 0.12163389474153519, "learning_rate": 0.0005, "loss": 2.1007, "step": 226810 }, { "epoch": 0.8633329019586946, "grad_norm": 0.13291381299495697, "learning_rate": 0.0005, "loss": 2.0966, "step": 226820 }, { "epoch": 0.8633709644268173, "grad_norm": 0.12920823693275452, "learning_rate": 0.0005, "loss": 2.0972, "step": 226830 }, { "epoch": 0.8634090268949399, "grad_norm": 0.1237969845533371, "learning_rate": 0.0005, "loss": 2.1108, "step": 226840 }, { "epoch": 0.8634470893630627, "grad_norm": 0.1371234804391861, "learning_rate": 0.0005, "loss": 2.103, "step": 226850 }, { "epoch": 0.8634851518311853, "grad_norm": 0.12614992260932922, "learning_rate": 0.0005, "loss": 2.1, "step": 226860 }, { "epoch": 0.863523214299308, "grad_norm": 0.13154858350753784, "learning_rate": 0.0005, "loss": 2.1038, "step": 226870 }, { "epoch": 0.8635612767674307, "grad_norm": 0.13891302049160004, "learning_rate": 0.0005, "loss": 2.1038, "step": 226880 }, { "epoch": 0.8635993392355534, "grad_norm": 0.147722527384758, "learning_rate": 0.0005, "loss": 2.1106, "step": 226890 }, { "epoch": 0.8636374017036761, "grad_norm": 0.1390915811061859, "learning_rate": 0.0005, "loss": 2.1014, "step": 226900 }, { "epoch": 0.8636754641717987, "grad_norm": 0.11712667346000671, "learning_rate": 0.0005, "loss": 2.0922, "step": 226910 }, { "epoch": 0.8637135266399214, "grad_norm": 0.12969042360782623, "learning_rate": 0.0005, "loss": 2.0975, "step": 226920 }, { "epoch": 0.8637515891080442, "grad_norm": 0.12357570976018906, "learning_rate": 0.0005, "loss": 2.093, "step": 226930 }, { "epoch": 0.8637896515761668, "grad_norm": 0.12982304394245148, "learning_rate": 0.0005, "loss": 2.1053, "step": 226940 }, { "epoch": 0.8638277140442895, "grad_norm": 0.11862597614526749, "learning_rate": 0.0005, "loss": 2.1022, "step": 226950 }, { "epoch": 0.8638657765124121, "grad_norm": 0.11904960125684738, "learning_rate": 0.0005, "loss": 2.1075, "step": 226960 }, { "epoch": 0.8639038389805348, "grad_norm": 0.11986031383275986, "learning_rate": 0.0005, "loss": 2.1115, "step": 226970 }, { "epoch": 0.8639419014486576, "grad_norm": 0.13269126415252686, "learning_rate": 0.0005, "loss": 2.1018, "step": 226980 }, { "epoch": 0.8639799639167802, "grad_norm": 0.16061635315418243, "learning_rate": 0.0005, "loss": 2.1008, "step": 226990 }, { "epoch": 0.8640180263849029, "grad_norm": 0.12891733646392822, "learning_rate": 0.0005, "loss": 2.0913, "step": 227000 }, { "epoch": 0.8640560888530255, "grad_norm": 0.12706734240055084, "learning_rate": 0.0005, "loss": 2.1059, "step": 227010 }, { "epoch": 0.8640941513211483, "grad_norm": 0.12456909567117691, "learning_rate": 0.0005, "loss": 2.1148, "step": 227020 }, { "epoch": 0.864132213789271, "grad_norm": 0.13296130299568176, "learning_rate": 0.0005, "loss": 2.1021, "step": 227030 }, { "epoch": 0.8641702762573936, "grad_norm": 0.1313837468624115, "learning_rate": 0.0005, "loss": 2.0953, "step": 227040 }, { "epoch": 0.8642083387255163, "grad_norm": 0.12122230976819992, "learning_rate": 0.0005, "loss": 2.1011, "step": 227050 }, { "epoch": 0.864246401193639, "grad_norm": 0.12515757977962494, "learning_rate": 0.0005, "loss": 2.094, "step": 227060 }, { "epoch": 0.8642844636617617, "grad_norm": 0.11461256444454193, "learning_rate": 0.0005, "loss": 2.1045, "step": 227070 }, { "epoch": 0.8643225261298844, "grad_norm": 0.12336868047714233, "learning_rate": 0.0005, "loss": 2.1041, "step": 227080 }, { "epoch": 0.864360588598007, "grad_norm": 0.12718035280704498, "learning_rate": 0.0005, "loss": 2.1042, "step": 227090 }, { "epoch": 0.8643986510661298, "grad_norm": 0.13720561563968658, "learning_rate": 0.0005, "loss": 2.1087, "step": 227100 }, { "epoch": 0.8644367135342524, "grad_norm": 0.11648658663034439, "learning_rate": 0.0005, "loss": 2.086, "step": 227110 }, { "epoch": 0.8644747760023751, "grad_norm": 0.12198827415704727, "learning_rate": 0.0005, "loss": 2.116, "step": 227120 }, { "epoch": 0.8645128384704978, "grad_norm": 0.12043623626232147, "learning_rate": 0.0005, "loss": 2.1036, "step": 227130 }, { "epoch": 0.8645509009386204, "grad_norm": 0.11719216406345367, "learning_rate": 0.0005, "loss": 2.1048, "step": 227140 }, { "epoch": 0.8645889634067432, "grad_norm": 0.1277819722890854, "learning_rate": 0.0005, "loss": 2.106, "step": 227150 }, { "epoch": 0.8646270258748658, "grad_norm": 0.1243322491645813, "learning_rate": 0.0005, "loss": 2.1115, "step": 227160 }, { "epoch": 0.8646650883429885, "grad_norm": 0.12716972827911377, "learning_rate": 0.0005, "loss": 2.1, "step": 227170 }, { "epoch": 0.8647031508111112, "grad_norm": 0.14013415575027466, "learning_rate": 0.0005, "loss": 2.1044, "step": 227180 }, { "epoch": 0.8647412132792339, "grad_norm": 0.12532857060432434, "learning_rate": 0.0005, "loss": 2.0909, "step": 227190 }, { "epoch": 0.8647792757473566, "grad_norm": 0.12132761627435684, "learning_rate": 0.0005, "loss": 2.1062, "step": 227200 }, { "epoch": 0.8648173382154792, "grad_norm": 0.1268184930086136, "learning_rate": 0.0005, "loss": 2.105, "step": 227210 }, { "epoch": 0.8648554006836019, "grad_norm": 0.12742270529270172, "learning_rate": 0.0005, "loss": 2.114, "step": 227220 }, { "epoch": 0.8648934631517247, "grad_norm": 0.12107336521148682, "learning_rate": 0.0005, "loss": 2.1014, "step": 227230 }, { "epoch": 0.8649315256198473, "grad_norm": 0.13903018832206726, "learning_rate": 0.0005, "loss": 2.1066, "step": 227240 }, { "epoch": 0.86496958808797, "grad_norm": 0.1315573751926422, "learning_rate": 0.0005, "loss": 2.1082, "step": 227250 }, { "epoch": 0.8650076505560926, "grad_norm": 0.1224905326962471, "learning_rate": 0.0005, "loss": 2.1, "step": 227260 }, { "epoch": 0.8650457130242153, "grad_norm": 0.122121661901474, "learning_rate": 0.0005, "loss": 2.1069, "step": 227270 }, { "epoch": 0.8650837754923381, "grad_norm": 0.12318921089172363, "learning_rate": 0.0005, "loss": 2.094, "step": 227280 }, { "epoch": 0.8651218379604607, "grad_norm": 0.12507276237010956, "learning_rate": 0.0005, "loss": 2.0907, "step": 227290 }, { "epoch": 0.8651599004285834, "grad_norm": 0.12425078451633453, "learning_rate": 0.0005, "loss": 2.104, "step": 227300 }, { "epoch": 0.865197962896706, "grad_norm": 0.1246219128370285, "learning_rate": 0.0005, "loss": 2.0936, "step": 227310 }, { "epoch": 0.8652360253648288, "grad_norm": 0.12110217660665512, "learning_rate": 0.0005, "loss": 2.1081, "step": 227320 }, { "epoch": 0.8652740878329515, "grad_norm": 0.11924567073583603, "learning_rate": 0.0005, "loss": 2.0879, "step": 227330 }, { "epoch": 0.8653121503010741, "grad_norm": 0.11475517600774765, "learning_rate": 0.0005, "loss": 2.0928, "step": 227340 }, { "epoch": 0.8653502127691968, "grad_norm": 0.12574663758277893, "learning_rate": 0.0005, "loss": 2.1013, "step": 227350 }, { "epoch": 0.8653882752373195, "grad_norm": 0.12827351689338684, "learning_rate": 0.0005, "loss": 2.0993, "step": 227360 }, { "epoch": 0.8654263377054422, "grad_norm": 0.11690565198659897, "learning_rate": 0.0005, "loss": 2.1018, "step": 227370 }, { "epoch": 0.8654644001735649, "grad_norm": 0.137576162815094, "learning_rate": 0.0005, "loss": 2.0958, "step": 227380 }, { "epoch": 0.8655024626416875, "grad_norm": 0.11919543892145157, "learning_rate": 0.0005, "loss": 2.0955, "step": 227390 }, { "epoch": 0.8655405251098102, "grad_norm": 0.1196046695113182, "learning_rate": 0.0005, "loss": 2.1166, "step": 227400 }, { "epoch": 0.8655785875779329, "grad_norm": 0.11969230324029922, "learning_rate": 0.0005, "loss": 2.1128, "step": 227410 }, { "epoch": 0.8656166500460556, "grad_norm": 0.13001693785190582, "learning_rate": 0.0005, "loss": 2.0981, "step": 227420 }, { "epoch": 0.8656547125141782, "grad_norm": 0.1436636596918106, "learning_rate": 0.0005, "loss": 2.0966, "step": 227430 }, { "epoch": 0.8656927749823009, "grad_norm": 0.12186801433563232, "learning_rate": 0.0005, "loss": 2.1062, "step": 227440 }, { "epoch": 0.8657308374504237, "grad_norm": 0.11917836219072342, "learning_rate": 0.0005, "loss": 2.1015, "step": 227450 }, { "epoch": 0.8657688999185463, "grad_norm": 0.16602906584739685, "learning_rate": 0.0005, "loss": 2.1134, "step": 227460 }, { "epoch": 0.865806962386669, "grad_norm": 0.12305092066526413, "learning_rate": 0.0005, "loss": 2.1072, "step": 227470 }, { "epoch": 0.8658450248547916, "grad_norm": 0.12628142535686493, "learning_rate": 0.0005, "loss": 2.1086, "step": 227480 }, { "epoch": 0.8658830873229144, "grad_norm": 0.12511824071407318, "learning_rate": 0.0005, "loss": 2.0988, "step": 227490 }, { "epoch": 0.8659211497910371, "grad_norm": 0.12373516708612442, "learning_rate": 0.0005, "loss": 2.0956, "step": 227500 }, { "epoch": 0.8659592122591597, "grad_norm": 0.12012782692909241, "learning_rate": 0.0005, "loss": 2.0917, "step": 227510 }, { "epoch": 0.8659972747272824, "grad_norm": 0.1300332099199295, "learning_rate": 0.0005, "loss": 2.1027, "step": 227520 }, { "epoch": 0.8660353371954052, "grad_norm": 0.12339738011360168, "learning_rate": 0.0005, "loss": 2.1047, "step": 227530 }, { "epoch": 0.8660733996635278, "grad_norm": 0.13832895457744598, "learning_rate": 0.0005, "loss": 2.0933, "step": 227540 }, { "epoch": 0.8661114621316505, "grad_norm": 0.12086351215839386, "learning_rate": 0.0005, "loss": 2.1131, "step": 227550 }, { "epoch": 0.8661495245997731, "grad_norm": 0.1234988421201706, "learning_rate": 0.0005, "loss": 2.1153, "step": 227560 }, { "epoch": 0.8661875870678958, "grad_norm": 0.1281185895204544, "learning_rate": 0.0005, "loss": 2.1284, "step": 227570 }, { "epoch": 0.8662256495360185, "grad_norm": 0.12379458546638489, "learning_rate": 0.0005, "loss": 2.1112, "step": 227580 }, { "epoch": 0.8662637120041412, "grad_norm": 0.12381254881620407, "learning_rate": 0.0005, "loss": 2.0958, "step": 227590 }, { "epoch": 0.8663017744722639, "grad_norm": 0.12239328026771545, "learning_rate": 0.0005, "loss": 2.0999, "step": 227600 }, { "epoch": 0.8663398369403865, "grad_norm": 0.12826788425445557, "learning_rate": 0.0005, "loss": 2.0941, "step": 227610 }, { "epoch": 0.8663778994085093, "grad_norm": 0.11990267038345337, "learning_rate": 0.0005, "loss": 2.104, "step": 227620 }, { "epoch": 0.866415961876632, "grad_norm": 0.12026792019605637, "learning_rate": 0.0005, "loss": 2.1286, "step": 227630 }, { "epoch": 0.8664540243447546, "grad_norm": 0.12082263827323914, "learning_rate": 0.0005, "loss": 2.1054, "step": 227640 }, { "epoch": 0.8664920868128773, "grad_norm": 0.13680067658424377, "learning_rate": 0.0005, "loss": 2.0892, "step": 227650 }, { "epoch": 0.866530149281, "grad_norm": 0.13015596568584442, "learning_rate": 0.0005, "loss": 2.1084, "step": 227660 }, { "epoch": 0.8665682117491227, "grad_norm": 0.1317087709903717, "learning_rate": 0.0005, "loss": 2.1016, "step": 227670 }, { "epoch": 0.8666062742172453, "grad_norm": 0.13107813894748688, "learning_rate": 0.0005, "loss": 2.103, "step": 227680 }, { "epoch": 0.866644336685368, "grad_norm": 0.11237580329179764, "learning_rate": 0.0005, "loss": 2.101, "step": 227690 }, { "epoch": 0.8666823991534907, "grad_norm": 0.13148245215415955, "learning_rate": 0.0005, "loss": 2.122, "step": 227700 }, { "epoch": 0.8667204616216134, "grad_norm": 0.12727142870426178, "learning_rate": 0.0005, "loss": 2.0995, "step": 227710 }, { "epoch": 0.8667585240897361, "grad_norm": 0.12946079671382904, "learning_rate": 0.0005, "loss": 2.1166, "step": 227720 }, { "epoch": 0.8667965865578587, "grad_norm": 0.1307932585477829, "learning_rate": 0.0005, "loss": 2.0848, "step": 227730 }, { "epoch": 0.8668346490259814, "grad_norm": 0.13611574470996857, "learning_rate": 0.0005, "loss": 2.1055, "step": 227740 }, { "epoch": 0.8668727114941042, "grad_norm": 0.12899921834468842, "learning_rate": 0.0005, "loss": 2.1027, "step": 227750 }, { "epoch": 0.8669107739622268, "grad_norm": 0.14597873389720917, "learning_rate": 0.0005, "loss": 2.1127, "step": 227760 }, { "epoch": 0.8669488364303495, "grad_norm": 0.13091064989566803, "learning_rate": 0.0005, "loss": 2.1104, "step": 227770 }, { "epoch": 0.8669868988984721, "grad_norm": 0.13377290964126587, "learning_rate": 0.0005, "loss": 2.1062, "step": 227780 }, { "epoch": 0.8670249613665949, "grad_norm": 0.12073376029729843, "learning_rate": 0.0005, "loss": 2.0993, "step": 227790 }, { "epoch": 0.8670630238347176, "grad_norm": 0.12615399062633514, "learning_rate": 0.0005, "loss": 2.1087, "step": 227800 }, { "epoch": 0.8671010863028402, "grad_norm": 0.12113891541957855, "learning_rate": 0.0005, "loss": 2.0971, "step": 227810 }, { "epoch": 0.8671391487709629, "grad_norm": 0.1192716658115387, "learning_rate": 0.0005, "loss": 2.0954, "step": 227820 }, { "epoch": 0.8671772112390855, "grad_norm": 0.1256159394979477, "learning_rate": 0.0005, "loss": 2.1029, "step": 227830 }, { "epoch": 0.8672152737072083, "grad_norm": 0.12360091507434845, "learning_rate": 0.0005, "loss": 2.1101, "step": 227840 }, { "epoch": 0.867253336175331, "grad_norm": 0.11907059699296951, "learning_rate": 0.0005, "loss": 2.1016, "step": 227850 }, { "epoch": 0.8672913986434536, "grad_norm": 0.12359027564525604, "learning_rate": 0.0005, "loss": 2.1018, "step": 227860 }, { "epoch": 0.8673294611115763, "grad_norm": 0.12925948202610016, "learning_rate": 0.0005, "loss": 2.0875, "step": 227870 }, { "epoch": 0.867367523579699, "grad_norm": 0.1352248638868332, "learning_rate": 0.0005, "loss": 2.0974, "step": 227880 }, { "epoch": 0.8674055860478217, "grad_norm": 0.1331569403409958, "learning_rate": 0.0005, "loss": 2.0831, "step": 227890 }, { "epoch": 0.8674436485159444, "grad_norm": 0.12908479571342468, "learning_rate": 0.0005, "loss": 2.1174, "step": 227900 }, { "epoch": 0.867481710984067, "grad_norm": 0.1242067888379097, "learning_rate": 0.0005, "loss": 2.0978, "step": 227910 }, { "epoch": 0.8675197734521898, "grad_norm": 0.13277634978294373, "learning_rate": 0.0005, "loss": 2.1008, "step": 227920 }, { "epoch": 0.8675578359203124, "grad_norm": 0.12008011341094971, "learning_rate": 0.0005, "loss": 2.1114, "step": 227930 }, { "epoch": 0.8675958983884351, "grad_norm": 0.11581023037433624, "learning_rate": 0.0005, "loss": 2.0985, "step": 227940 }, { "epoch": 0.8676339608565578, "grad_norm": 0.12725763022899628, "learning_rate": 0.0005, "loss": 2.1005, "step": 227950 }, { "epoch": 0.8676720233246805, "grad_norm": 0.11594830453395844, "learning_rate": 0.0005, "loss": 2.1051, "step": 227960 }, { "epoch": 0.8677100857928032, "grad_norm": 0.12017028033733368, "learning_rate": 0.0005, "loss": 2.0935, "step": 227970 }, { "epoch": 0.8677481482609258, "grad_norm": 0.1227264478802681, "learning_rate": 0.0005, "loss": 2.1008, "step": 227980 }, { "epoch": 0.8677862107290485, "grad_norm": 0.12182033061981201, "learning_rate": 0.0005, "loss": 2.1059, "step": 227990 }, { "epoch": 0.8678242731971711, "grad_norm": 0.13833528757095337, "learning_rate": 0.0005, "loss": 2.1079, "step": 228000 }, { "epoch": 0.8678623356652939, "grad_norm": 0.13079838454723358, "learning_rate": 0.0005, "loss": 2.103, "step": 228010 }, { "epoch": 0.8679003981334166, "grad_norm": 0.13576842844486237, "learning_rate": 0.0005, "loss": 2.111, "step": 228020 }, { "epoch": 0.8679384606015392, "grad_norm": 0.12961263954639435, "learning_rate": 0.0005, "loss": 2.0894, "step": 228030 }, { "epoch": 0.8679765230696619, "grad_norm": 0.12877993285655975, "learning_rate": 0.0005, "loss": 2.1088, "step": 228040 }, { "epoch": 0.8680145855377847, "grad_norm": 0.12145891785621643, "learning_rate": 0.0005, "loss": 2.0949, "step": 228050 }, { "epoch": 0.8680526480059073, "grad_norm": 0.11703445017337799, "learning_rate": 0.0005, "loss": 2.1172, "step": 228060 }, { "epoch": 0.86809071047403, "grad_norm": 0.12900608777999878, "learning_rate": 0.0005, "loss": 2.1032, "step": 228070 }, { "epoch": 0.8681287729421526, "grad_norm": 0.11951258033514023, "learning_rate": 0.0005, "loss": 2.0884, "step": 228080 }, { "epoch": 0.8681668354102754, "grad_norm": 0.1197548508644104, "learning_rate": 0.0005, "loss": 2.0922, "step": 228090 }, { "epoch": 0.868204897878398, "grad_norm": 0.12547695636749268, "learning_rate": 0.0005, "loss": 2.1062, "step": 228100 }, { "epoch": 0.8682429603465207, "grad_norm": 0.12415210902690887, "learning_rate": 0.0005, "loss": 2.0972, "step": 228110 }, { "epoch": 0.8682810228146434, "grad_norm": 0.12451770156621933, "learning_rate": 0.0005, "loss": 2.097, "step": 228120 }, { "epoch": 0.868319085282766, "grad_norm": 0.12714098393917084, "learning_rate": 0.0005, "loss": 2.0968, "step": 228130 }, { "epoch": 0.8683571477508888, "grad_norm": 0.12449847161769867, "learning_rate": 0.0005, "loss": 2.1054, "step": 228140 }, { "epoch": 0.8683952102190114, "grad_norm": 0.11371887475252151, "learning_rate": 0.0005, "loss": 2.1075, "step": 228150 }, { "epoch": 0.8684332726871341, "grad_norm": 0.11736097186803818, "learning_rate": 0.0005, "loss": 2.1079, "step": 228160 }, { "epoch": 0.8684713351552568, "grad_norm": 0.1239897757768631, "learning_rate": 0.0005, "loss": 2.1054, "step": 228170 }, { "epoch": 0.8685093976233795, "grad_norm": 0.13431617617607117, "learning_rate": 0.0005, "loss": 2.11, "step": 228180 }, { "epoch": 0.8685474600915022, "grad_norm": 0.1289248764514923, "learning_rate": 0.0005, "loss": 2.1025, "step": 228190 }, { "epoch": 0.8685855225596248, "grad_norm": 0.11664478480815887, "learning_rate": 0.0005, "loss": 2.1096, "step": 228200 }, { "epoch": 0.8686235850277475, "grad_norm": 0.13370691239833832, "learning_rate": 0.0005, "loss": 2.1118, "step": 228210 }, { "epoch": 0.8686616474958703, "grad_norm": 0.12480060011148453, "learning_rate": 0.0005, "loss": 2.1067, "step": 228220 }, { "epoch": 0.8686997099639929, "grad_norm": 0.14454995095729828, "learning_rate": 0.0005, "loss": 2.1044, "step": 228230 }, { "epoch": 0.8687377724321156, "grad_norm": 0.13794377446174622, "learning_rate": 0.0005, "loss": 2.1122, "step": 228240 }, { "epoch": 0.8687758349002382, "grad_norm": 0.13001668453216553, "learning_rate": 0.0005, "loss": 2.1098, "step": 228250 }, { "epoch": 0.8688138973683609, "grad_norm": 0.1631646305322647, "learning_rate": 0.0005, "loss": 2.1033, "step": 228260 }, { "epoch": 0.8688519598364837, "grad_norm": 0.13760943710803986, "learning_rate": 0.0005, "loss": 2.0969, "step": 228270 }, { "epoch": 0.8688900223046063, "grad_norm": 0.12570780515670776, "learning_rate": 0.0005, "loss": 2.1117, "step": 228280 }, { "epoch": 0.868928084772729, "grad_norm": 0.1366773247718811, "learning_rate": 0.0005, "loss": 2.1004, "step": 228290 }, { "epoch": 0.8689661472408516, "grad_norm": 0.12486300617456436, "learning_rate": 0.0005, "loss": 2.102, "step": 228300 }, { "epoch": 0.8690042097089744, "grad_norm": 0.12038671225309372, "learning_rate": 0.0005, "loss": 2.0962, "step": 228310 }, { "epoch": 0.8690422721770971, "grad_norm": 0.12719301879405975, "learning_rate": 0.0005, "loss": 2.0913, "step": 228320 }, { "epoch": 0.8690803346452197, "grad_norm": 0.1270996630191803, "learning_rate": 0.0005, "loss": 2.0907, "step": 228330 }, { "epoch": 0.8691183971133424, "grad_norm": 0.13092835247516632, "learning_rate": 0.0005, "loss": 2.0882, "step": 228340 }, { "epoch": 0.8691564595814651, "grad_norm": 0.12825457751750946, "learning_rate": 0.0005, "loss": 2.0839, "step": 228350 }, { "epoch": 0.8691945220495878, "grad_norm": 0.12021090090274811, "learning_rate": 0.0005, "loss": 2.1146, "step": 228360 }, { "epoch": 0.8692325845177105, "grad_norm": 0.13270549476146698, "learning_rate": 0.0005, "loss": 2.1046, "step": 228370 }, { "epoch": 0.8692706469858331, "grad_norm": 0.11613058298826218, "learning_rate": 0.0005, "loss": 2.1143, "step": 228380 }, { "epoch": 0.8693087094539559, "grad_norm": 0.12812206149101257, "learning_rate": 0.0005, "loss": 2.1086, "step": 228390 }, { "epoch": 0.8693467719220785, "grad_norm": 0.13474640250205994, "learning_rate": 0.0005, "loss": 2.1171, "step": 228400 }, { "epoch": 0.8693848343902012, "grad_norm": 0.12155559659004211, "learning_rate": 0.0005, "loss": 2.0865, "step": 228410 }, { "epoch": 0.8694228968583239, "grad_norm": 0.5138123035430908, "learning_rate": 0.0005, "loss": 2.1166, "step": 228420 }, { "epoch": 0.8694609593264465, "grad_norm": 0.12644684314727783, "learning_rate": 0.0005, "loss": 2.0947, "step": 228430 }, { "epoch": 0.8694990217945693, "grad_norm": 0.1481925994157791, "learning_rate": 0.0005, "loss": 2.0812, "step": 228440 }, { "epoch": 0.8695370842626919, "grad_norm": 0.11693347245454788, "learning_rate": 0.0005, "loss": 2.1022, "step": 228450 }, { "epoch": 0.8695751467308146, "grad_norm": 0.13215351104736328, "learning_rate": 0.0005, "loss": 2.107, "step": 228460 }, { "epoch": 0.8696132091989373, "grad_norm": 0.11924117803573608, "learning_rate": 0.0005, "loss": 2.0976, "step": 228470 }, { "epoch": 0.86965127166706, "grad_norm": 0.13626554608345032, "learning_rate": 0.0005, "loss": 2.1044, "step": 228480 }, { "epoch": 0.8696893341351827, "grad_norm": 0.13347983360290527, "learning_rate": 0.0005, "loss": 2.096, "step": 228490 }, { "epoch": 0.8697273966033053, "grad_norm": 0.13828565180301666, "learning_rate": 0.0005, "loss": 2.0969, "step": 228500 }, { "epoch": 0.869765459071428, "grad_norm": 0.1184941977262497, "learning_rate": 0.0005, "loss": 2.1137, "step": 228510 }, { "epoch": 0.8698035215395508, "grad_norm": 0.13307888805866241, "learning_rate": 0.0005, "loss": 2.0961, "step": 228520 }, { "epoch": 0.8698415840076734, "grad_norm": 0.13183127343654633, "learning_rate": 0.0005, "loss": 2.0983, "step": 228530 }, { "epoch": 0.8698796464757961, "grad_norm": 0.13290202617645264, "learning_rate": 0.0005, "loss": 2.0999, "step": 228540 }, { "epoch": 0.8699177089439187, "grad_norm": 0.12918685376644135, "learning_rate": 0.0005, "loss": 2.0986, "step": 228550 }, { "epoch": 0.8699557714120414, "grad_norm": 0.12262940406799316, "learning_rate": 0.0005, "loss": 2.1073, "step": 228560 }, { "epoch": 0.8699938338801642, "grad_norm": 0.12436863780021667, "learning_rate": 0.0005, "loss": 2.1061, "step": 228570 }, { "epoch": 0.8700318963482868, "grad_norm": 0.12879203259944916, "learning_rate": 0.0005, "loss": 2.1023, "step": 228580 }, { "epoch": 0.8700699588164095, "grad_norm": 0.12833668291568756, "learning_rate": 0.0005, "loss": 2.1001, "step": 228590 }, { "epoch": 0.8701080212845321, "grad_norm": 0.13610157370567322, "learning_rate": 0.0005, "loss": 2.0854, "step": 228600 }, { "epoch": 0.8701460837526549, "grad_norm": 0.12968987226486206, "learning_rate": 0.0005, "loss": 2.1096, "step": 228610 }, { "epoch": 0.8701841462207776, "grad_norm": 0.1264941394329071, "learning_rate": 0.0005, "loss": 2.1117, "step": 228620 }, { "epoch": 0.8702222086889002, "grad_norm": 0.12434609234333038, "learning_rate": 0.0005, "loss": 2.1111, "step": 228630 }, { "epoch": 0.8702602711570229, "grad_norm": 0.12505097687244415, "learning_rate": 0.0005, "loss": 2.1146, "step": 228640 }, { "epoch": 0.8702983336251456, "grad_norm": 0.12018077075481415, "learning_rate": 0.0005, "loss": 2.1052, "step": 228650 }, { "epoch": 0.8703363960932683, "grad_norm": 0.11911716312170029, "learning_rate": 0.0005, "loss": 2.1047, "step": 228660 }, { "epoch": 0.870374458561391, "grad_norm": 0.12822499871253967, "learning_rate": 0.0005, "loss": 2.1287, "step": 228670 }, { "epoch": 0.8704125210295136, "grad_norm": 0.11791371554136276, "learning_rate": 0.0005, "loss": 2.1008, "step": 228680 }, { "epoch": 0.8704505834976363, "grad_norm": 0.12105327099561691, "learning_rate": 0.0005, "loss": 2.1028, "step": 228690 }, { "epoch": 0.870488645965759, "grad_norm": 0.13159845769405365, "learning_rate": 0.0005, "loss": 2.1049, "step": 228700 }, { "epoch": 0.8705267084338817, "grad_norm": 0.12858182191848755, "learning_rate": 0.0005, "loss": 2.0934, "step": 228710 }, { "epoch": 0.8705647709020043, "grad_norm": 0.12227248400449753, "learning_rate": 0.0005, "loss": 2.1099, "step": 228720 }, { "epoch": 0.870602833370127, "grad_norm": 0.13533693552017212, "learning_rate": 0.0005, "loss": 2.1047, "step": 228730 }, { "epoch": 0.8706408958382498, "grad_norm": 0.128210186958313, "learning_rate": 0.0005, "loss": 2.0864, "step": 228740 }, { "epoch": 0.8706789583063724, "grad_norm": 0.33541253209114075, "learning_rate": 0.0005, "loss": 2.1087, "step": 228750 }, { "epoch": 0.8707170207744951, "grad_norm": 0.13576674461364746, "learning_rate": 0.0005, "loss": 2.1212, "step": 228760 }, { "epoch": 0.8707550832426177, "grad_norm": 0.11649937182664871, "learning_rate": 0.0005, "loss": 2.1126, "step": 228770 }, { "epoch": 0.8707931457107405, "grad_norm": 0.12344325333833694, "learning_rate": 0.0005, "loss": 2.1094, "step": 228780 }, { "epoch": 0.8708312081788632, "grad_norm": 0.12784738838672638, "learning_rate": 0.0005, "loss": 2.0903, "step": 228790 }, { "epoch": 0.8708692706469858, "grad_norm": 0.1322634369134903, "learning_rate": 0.0005, "loss": 2.0983, "step": 228800 }, { "epoch": 0.8709073331151085, "grad_norm": 0.1282651573419571, "learning_rate": 0.0005, "loss": 2.1135, "step": 228810 }, { "epoch": 0.8709453955832313, "grad_norm": 0.13180986046791077, "learning_rate": 0.0005, "loss": 2.1137, "step": 228820 }, { "epoch": 0.8709834580513539, "grad_norm": 0.1403912752866745, "learning_rate": 0.0005, "loss": 2.1062, "step": 228830 }, { "epoch": 0.8710215205194766, "grad_norm": 0.12029944360256195, "learning_rate": 0.0005, "loss": 2.1038, "step": 228840 }, { "epoch": 0.8710595829875992, "grad_norm": 0.13395288586616516, "learning_rate": 0.0005, "loss": 2.1073, "step": 228850 }, { "epoch": 0.8710976454557219, "grad_norm": 0.13040871918201447, "learning_rate": 0.0005, "loss": 2.1011, "step": 228860 }, { "epoch": 0.8711357079238446, "grad_norm": 0.12356771528720856, "learning_rate": 0.0005, "loss": 2.1018, "step": 228870 }, { "epoch": 0.8711737703919673, "grad_norm": 0.12945036590099335, "learning_rate": 0.0005, "loss": 2.0986, "step": 228880 }, { "epoch": 0.87121183286009, "grad_norm": 0.12240960448980331, "learning_rate": 0.0005, "loss": 2.1158, "step": 228890 }, { "epoch": 0.8712498953282126, "grad_norm": 0.12477682530879974, "learning_rate": 0.0005, "loss": 2.1066, "step": 228900 }, { "epoch": 0.8712879577963354, "grad_norm": 0.15387369692325592, "learning_rate": 0.0005, "loss": 2.1126, "step": 228910 }, { "epoch": 0.871326020264458, "grad_norm": 0.14190644025802612, "learning_rate": 0.0005, "loss": 2.1081, "step": 228920 }, { "epoch": 0.8713640827325807, "grad_norm": 0.1381376087665558, "learning_rate": 0.0005, "loss": 2.1104, "step": 228930 }, { "epoch": 0.8714021452007034, "grad_norm": 0.16371963918209076, "learning_rate": 0.0005, "loss": 2.1051, "step": 228940 }, { "epoch": 0.8714402076688261, "grad_norm": 0.13157308101654053, "learning_rate": 0.0005, "loss": 2.1122, "step": 228950 }, { "epoch": 0.8714782701369488, "grad_norm": 0.14220169186592102, "learning_rate": 0.0005, "loss": 2.1214, "step": 228960 }, { "epoch": 0.8715163326050714, "grad_norm": 0.13458815217018127, "learning_rate": 0.0005, "loss": 2.1087, "step": 228970 }, { "epoch": 0.8715543950731941, "grad_norm": 0.13978664577007294, "learning_rate": 0.0005, "loss": 2.093, "step": 228980 }, { "epoch": 0.8715924575413168, "grad_norm": 0.11965165287256241, "learning_rate": 0.0005, "loss": 2.106, "step": 228990 }, { "epoch": 0.8716305200094395, "grad_norm": 0.1364961862564087, "learning_rate": 0.0005, "loss": 2.0934, "step": 229000 }, { "epoch": 0.8716685824775622, "grad_norm": 0.12621454894542694, "learning_rate": 0.0005, "loss": 2.1131, "step": 229010 }, { "epoch": 0.8717066449456848, "grad_norm": 0.12990035116672516, "learning_rate": 0.0005, "loss": 2.1051, "step": 229020 }, { "epoch": 0.8717447074138075, "grad_norm": 0.13059154152870178, "learning_rate": 0.0005, "loss": 2.1021, "step": 229030 }, { "epoch": 0.8717827698819303, "grad_norm": 0.13521890342235565, "learning_rate": 0.0005, "loss": 2.1083, "step": 229040 }, { "epoch": 0.8718208323500529, "grad_norm": 0.12604497373104095, "learning_rate": 0.0005, "loss": 2.1054, "step": 229050 }, { "epoch": 0.8718588948181756, "grad_norm": 0.13183411955833435, "learning_rate": 0.0005, "loss": 2.1054, "step": 229060 }, { "epoch": 0.8718969572862982, "grad_norm": 0.13138261437416077, "learning_rate": 0.0005, "loss": 2.0977, "step": 229070 }, { "epoch": 0.871935019754421, "grad_norm": 0.1265527904033661, "learning_rate": 0.0005, "loss": 2.1147, "step": 229080 }, { "epoch": 0.8719730822225437, "grad_norm": 0.14647045731544495, "learning_rate": 0.0005, "loss": 2.0997, "step": 229090 }, { "epoch": 0.8720111446906663, "grad_norm": 0.12061312049627304, "learning_rate": 0.0005, "loss": 2.0964, "step": 229100 }, { "epoch": 0.872049207158789, "grad_norm": 0.1324048638343811, "learning_rate": 0.0005, "loss": 2.1069, "step": 229110 }, { "epoch": 0.8720872696269116, "grad_norm": 0.12923435866832733, "learning_rate": 0.0005, "loss": 2.111, "step": 229120 }, { "epoch": 0.8721253320950344, "grad_norm": 0.13202157616615295, "learning_rate": 0.0005, "loss": 2.0987, "step": 229130 }, { "epoch": 0.8721633945631571, "grad_norm": 0.12037435173988342, "learning_rate": 0.0005, "loss": 2.122, "step": 229140 }, { "epoch": 0.8722014570312797, "grad_norm": 0.12143620103597641, "learning_rate": 0.0005, "loss": 2.0953, "step": 229150 }, { "epoch": 0.8722395194994024, "grad_norm": 0.1236945390701294, "learning_rate": 0.0005, "loss": 2.0912, "step": 229160 }, { "epoch": 0.8722775819675251, "grad_norm": 0.13332204520702362, "learning_rate": 0.0005, "loss": 2.1042, "step": 229170 }, { "epoch": 0.8723156444356478, "grad_norm": 0.14076372981071472, "learning_rate": 0.0005, "loss": 2.1119, "step": 229180 }, { "epoch": 0.8723537069037705, "grad_norm": 0.13058961927890778, "learning_rate": 0.0005, "loss": 2.0999, "step": 229190 }, { "epoch": 0.8723917693718931, "grad_norm": 0.13476161658763885, "learning_rate": 0.0005, "loss": 2.1062, "step": 229200 }, { "epoch": 0.8724298318400159, "grad_norm": 0.1271485835313797, "learning_rate": 0.0005, "loss": 2.0999, "step": 229210 }, { "epoch": 0.8724678943081385, "grad_norm": 0.12276792526245117, "learning_rate": 0.0005, "loss": 2.0919, "step": 229220 }, { "epoch": 0.8725059567762612, "grad_norm": 0.12720341980457306, "learning_rate": 0.0005, "loss": 2.1121, "step": 229230 }, { "epoch": 0.8725440192443839, "grad_norm": 0.13597077131271362, "learning_rate": 0.0005, "loss": 2.1097, "step": 229240 }, { "epoch": 0.8725820817125066, "grad_norm": 0.1279052197933197, "learning_rate": 0.0005, "loss": 2.0927, "step": 229250 }, { "epoch": 0.8726201441806293, "grad_norm": 0.10936145484447479, "learning_rate": 0.0005, "loss": 2.1162, "step": 229260 }, { "epoch": 0.8726582066487519, "grad_norm": 0.12631534039974213, "learning_rate": 0.0005, "loss": 2.1055, "step": 229270 }, { "epoch": 0.8726962691168746, "grad_norm": 0.12847156822681427, "learning_rate": 0.0005, "loss": 2.1087, "step": 229280 }, { "epoch": 0.8727343315849972, "grad_norm": 0.12804313004016876, "learning_rate": 0.0005, "loss": 2.0898, "step": 229290 }, { "epoch": 0.87277239405312, "grad_norm": 0.1265999674797058, "learning_rate": 0.0005, "loss": 2.1022, "step": 229300 }, { "epoch": 0.8728104565212427, "grad_norm": 0.11968325823545456, "learning_rate": 0.0005, "loss": 2.1042, "step": 229310 }, { "epoch": 0.8728485189893653, "grad_norm": 0.11957523971796036, "learning_rate": 0.0005, "loss": 2.1001, "step": 229320 }, { "epoch": 0.872886581457488, "grad_norm": 0.12701158225536346, "learning_rate": 0.0005, "loss": 2.1061, "step": 229330 }, { "epoch": 0.8729246439256108, "grad_norm": 0.11046332120895386, "learning_rate": 0.0005, "loss": 2.102, "step": 229340 }, { "epoch": 0.8729627063937334, "grad_norm": 0.1372259110212326, "learning_rate": 0.0005, "loss": 2.1079, "step": 229350 }, { "epoch": 0.8730007688618561, "grad_norm": 0.12271147966384888, "learning_rate": 0.0005, "loss": 2.1138, "step": 229360 }, { "epoch": 0.8730388313299787, "grad_norm": 0.11774443835020065, "learning_rate": 0.0005, "loss": 2.1076, "step": 229370 }, { "epoch": 0.8730768937981015, "grad_norm": 0.11719627678394318, "learning_rate": 0.0005, "loss": 2.0932, "step": 229380 }, { "epoch": 0.8731149562662242, "grad_norm": 0.11640815436840057, "learning_rate": 0.0005, "loss": 2.1111, "step": 229390 }, { "epoch": 0.8731530187343468, "grad_norm": 0.1204354465007782, "learning_rate": 0.0005, "loss": 2.0933, "step": 229400 }, { "epoch": 0.8731910812024695, "grad_norm": 0.12202809005975723, "learning_rate": 0.0005, "loss": 2.1066, "step": 229410 }, { "epoch": 0.8732291436705921, "grad_norm": 0.13924041390419006, "learning_rate": 0.0005, "loss": 2.1064, "step": 229420 }, { "epoch": 0.8732672061387149, "grad_norm": 0.11984602361917496, "learning_rate": 0.0005, "loss": 2.0983, "step": 229430 }, { "epoch": 0.8733052686068375, "grad_norm": 0.11162258684635162, "learning_rate": 0.0005, "loss": 2.1003, "step": 229440 }, { "epoch": 0.8733433310749602, "grad_norm": 0.11645910888910294, "learning_rate": 0.0005, "loss": 2.1069, "step": 229450 }, { "epoch": 0.8733813935430829, "grad_norm": 0.1287803053855896, "learning_rate": 0.0005, "loss": 2.1123, "step": 229460 }, { "epoch": 0.8734194560112056, "grad_norm": 0.1326991766691208, "learning_rate": 0.0005, "loss": 2.098, "step": 229470 }, { "epoch": 0.8734575184793283, "grad_norm": 0.13610665500164032, "learning_rate": 0.0005, "loss": 2.0989, "step": 229480 }, { "epoch": 0.873495580947451, "grad_norm": 0.13597998023033142, "learning_rate": 0.0005, "loss": 2.0971, "step": 229490 }, { "epoch": 0.8735336434155736, "grad_norm": 0.14227288961410522, "learning_rate": 0.0005, "loss": 2.1144, "step": 229500 }, { "epoch": 0.8735717058836964, "grad_norm": 0.12824498116970062, "learning_rate": 0.0005, "loss": 2.1035, "step": 229510 }, { "epoch": 0.873609768351819, "grad_norm": 0.1353910118341446, "learning_rate": 0.0005, "loss": 2.1149, "step": 229520 }, { "epoch": 0.8736478308199417, "grad_norm": 0.13321471214294434, "learning_rate": 0.0005, "loss": 2.1089, "step": 229530 }, { "epoch": 0.8736858932880643, "grad_norm": 0.12848351895809174, "learning_rate": 0.0005, "loss": 2.0995, "step": 229540 }, { "epoch": 0.8737239557561871, "grad_norm": 0.12468912452459335, "learning_rate": 0.0005, "loss": 2.095, "step": 229550 }, { "epoch": 0.8737620182243098, "grad_norm": 0.13091306388378143, "learning_rate": 0.0005, "loss": 2.1084, "step": 229560 }, { "epoch": 0.8738000806924324, "grad_norm": 0.12601301074028015, "learning_rate": 0.0005, "loss": 2.1096, "step": 229570 }, { "epoch": 0.8738381431605551, "grad_norm": 0.1305581033229828, "learning_rate": 0.0005, "loss": 2.1127, "step": 229580 }, { "epoch": 0.8738762056286777, "grad_norm": 0.14347653090953827, "learning_rate": 0.0005, "loss": 2.0863, "step": 229590 }, { "epoch": 0.8739142680968005, "grad_norm": 0.12171580642461777, "learning_rate": 0.0005, "loss": 2.1046, "step": 229600 }, { "epoch": 0.8739523305649232, "grad_norm": 0.11917494237422943, "learning_rate": 0.0005, "loss": 2.1178, "step": 229610 }, { "epoch": 0.8739903930330458, "grad_norm": 0.11979630589485168, "learning_rate": 0.0005, "loss": 2.1127, "step": 229620 }, { "epoch": 0.8740284555011685, "grad_norm": 0.12840092182159424, "learning_rate": 0.0005, "loss": 2.103, "step": 229630 }, { "epoch": 0.8740665179692912, "grad_norm": 0.1223602220416069, "learning_rate": 0.0005, "loss": 2.1049, "step": 229640 }, { "epoch": 0.8741045804374139, "grad_norm": 0.12229134887456894, "learning_rate": 0.0005, "loss": 2.1125, "step": 229650 }, { "epoch": 0.8741426429055366, "grad_norm": 0.16060106456279755, "learning_rate": 0.0005, "loss": 2.1146, "step": 229660 }, { "epoch": 0.8741807053736592, "grad_norm": 0.11981988698244095, "learning_rate": 0.0005, "loss": 2.106, "step": 229670 }, { "epoch": 0.874218767841782, "grad_norm": 0.1297769695520401, "learning_rate": 0.0005, "loss": 2.0997, "step": 229680 }, { "epoch": 0.8742568303099046, "grad_norm": 0.12590380012989044, "learning_rate": 0.0005, "loss": 2.098, "step": 229690 }, { "epoch": 0.8742948927780273, "grad_norm": 0.1368873566389084, "learning_rate": 0.0005, "loss": 2.1133, "step": 229700 }, { "epoch": 0.87433295524615, "grad_norm": 0.1381053775548935, "learning_rate": 0.0005, "loss": 2.1146, "step": 229710 }, { "epoch": 0.8743710177142726, "grad_norm": 0.11610874533653259, "learning_rate": 0.0005, "loss": 2.1013, "step": 229720 }, { "epoch": 0.8744090801823954, "grad_norm": 0.12096839398145676, "learning_rate": 0.0005, "loss": 2.1077, "step": 229730 }, { "epoch": 0.874447142650518, "grad_norm": 0.130154088139534, "learning_rate": 0.0005, "loss": 2.1011, "step": 229740 }, { "epoch": 0.8744852051186407, "grad_norm": 0.13154588639736176, "learning_rate": 0.0005, "loss": 2.1013, "step": 229750 }, { "epoch": 0.8745232675867634, "grad_norm": 0.13115628063678741, "learning_rate": 0.0005, "loss": 2.1049, "step": 229760 }, { "epoch": 0.8745613300548861, "grad_norm": 0.1279299259185791, "learning_rate": 0.0005, "loss": 2.0815, "step": 229770 }, { "epoch": 0.8745993925230088, "grad_norm": 0.11625447869300842, "learning_rate": 0.0005, "loss": 2.0931, "step": 229780 }, { "epoch": 0.8746374549911314, "grad_norm": 0.12501411139965057, "learning_rate": 0.0005, "loss": 2.0983, "step": 229790 }, { "epoch": 0.8746755174592541, "grad_norm": 0.12068720906972885, "learning_rate": 0.0005, "loss": 2.1044, "step": 229800 }, { "epoch": 0.8747135799273769, "grad_norm": 0.12535758316516876, "learning_rate": 0.0005, "loss": 2.0978, "step": 229810 }, { "epoch": 0.8747516423954995, "grad_norm": 0.12687882781028748, "learning_rate": 0.0005, "loss": 2.1243, "step": 229820 }, { "epoch": 0.8747897048636222, "grad_norm": 0.127311110496521, "learning_rate": 0.0005, "loss": 2.1063, "step": 229830 }, { "epoch": 0.8748277673317448, "grad_norm": 0.12320202589035034, "learning_rate": 0.0005, "loss": 2.0978, "step": 229840 }, { "epoch": 0.8748658297998675, "grad_norm": 0.12253516167402267, "learning_rate": 0.0005, "loss": 2.1127, "step": 229850 }, { "epoch": 0.8749038922679903, "grad_norm": 0.12297092378139496, "learning_rate": 0.0005, "loss": 2.0947, "step": 229860 }, { "epoch": 0.8749419547361129, "grad_norm": 0.11488507688045502, "learning_rate": 0.0005, "loss": 2.1038, "step": 229870 }, { "epoch": 0.8749800172042356, "grad_norm": 0.12407977879047394, "learning_rate": 0.0005, "loss": 2.1081, "step": 229880 }, { "epoch": 0.8750180796723582, "grad_norm": 0.11904379725456238, "learning_rate": 0.0005, "loss": 2.1028, "step": 229890 }, { "epoch": 0.875056142140481, "grad_norm": 0.13884297013282776, "learning_rate": 0.0005, "loss": 2.1087, "step": 229900 }, { "epoch": 0.8750942046086037, "grad_norm": 0.1380092203617096, "learning_rate": 0.0005, "loss": 2.1019, "step": 229910 }, { "epoch": 0.8751322670767263, "grad_norm": 0.1314605325460434, "learning_rate": 0.0005, "loss": 2.1008, "step": 229920 }, { "epoch": 0.875170329544849, "grad_norm": 0.13077495992183685, "learning_rate": 0.0005, "loss": 2.1132, "step": 229930 }, { "epoch": 0.8752083920129717, "grad_norm": 0.11464505642652512, "learning_rate": 0.0005, "loss": 2.1081, "step": 229940 }, { "epoch": 0.8752464544810944, "grad_norm": 0.1231277585029602, "learning_rate": 0.0005, "loss": 2.107, "step": 229950 }, { "epoch": 0.875284516949217, "grad_norm": 0.11799775063991547, "learning_rate": 0.0005, "loss": 2.0939, "step": 229960 }, { "epoch": 0.8753225794173397, "grad_norm": 0.1273152232170105, "learning_rate": 0.0005, "loss": 2.0995, "step": 229970 }, { "epoch": 0.8753606418854625, "grad_norm": 0.12103879451751709, "learning_rate": 0.0005, "loss": 2.1058, "step": 229980 }, { "epoch": 0.8753987043535851, "grad_norm": 0.12625528872013092, "learning_rate": 0.0005, "loss": 2.1032, "step": 229990 }, { "epoch": 0.8754367668217078, "grad_norm": 0.14012883603572845, "learning_rate": 0.0005, "loss": 2.118, "step": 230000 }, { "epoch": 0.8754748292898304, "grad_norm": 0.14058808982372284, "learning_rate": 0.0005, "loss": 2.1024, "step": 230010 }, { "epoch": 0.8755128917579531, "grad_norm": 0.1262475699186325, "learning_rate": 0.0005, "loss": 2.1048, "step": 230020 }, { "epoch": 0.8755509542260759, "grad_norm": 0.12014681100845337, "learning_rate": 0.0005, "loss": 2.0959, "step": 230030 }, { "epoch": 0.8755890166941985, "grad_norm": 0.12667715549468994, "learning_rate": 0.0005, "loss": 2.1003, "step": 230040 }, { "epoch": 0.8756270791623212, "grad_norm": 0.12279102951288223, "learning_rate": 0.0005, "loss": 2.1008, "step": 230050 }, { "epoch": 0.8756651416304438, "grad_norm": 0.13368308544158936, "learning_rate": 0.0005, "loss": 2.1147, "step": 230060 }, { "epoch": 0.8757032040985666, "grad_norm": 0.12176341563463211, "learning_rate": 0.0005, "loss": 2.113, "step": 230070 }, { "epoch": 0.8757412665666893, "grad_norm": 0.12322124093770981, "learning_rate": 0.0005, "loss": 2.1037, "step": 230080 }, { "epoch": 0.8757793290348119, "grad_norm": 0.12963128089904785, "learning_rate": 0.0005, "loss": 2.0983, "step": 230090 }, { "epoch": 0.8758173915029346, "grad_norm": 0.1318719983100891, "learning_rate": 0.0005, "loss": 2.1024, "step": 230100 }, { "epoch": 0.8758554539710574, "grad_norm": 0.12382566928863525, "learning_rate": 0.0005, "loss": 2.1011, "step": 230110 }, { "epoch": 0.87589351643918, "grad_norm": 0.13396726548671722, "learning_rate": 0.0005, "loss": 2.1058, "step": 230120 }, { "epoch": 0.8759315789073027, "grad_norm": 0.12979789078235626, "learning_rate": 0.0005, "loss": 2.0943, "step": 230130 }, { "epoch": 0.8759696413754253, "grad_norm": 0.12729598581790924, "learning_rate": 0.0005, "loss": 2.0814, "step": 230140 }, { "epoch": 0.876007703843548, "grad_norm": 0.12816545367240906, "learning_rate": 0.0005, "loss": 2.1039, "step": 230150 }, { "epoch": 0.8760457663116707, "grad_norm": 0.1208256185054779, "learning_rate": 0.0005, "loss": 2.0937, "step": 230160 }, { "epoch": 0.8760838287797934, "grad_norm": 0.13537472486495972, "learning_rate": 0.0005, "loss": 2.1224, "step": 230170 }, { "epoch": 0.8761218912479161, "grad_norm": 0.133370503783226, "learning_rate": 0.0005, "loss": 2.114, "step": 230180 }, { "epoch": 0.8761599537160387, "grad_norm": 0.13083863258361816, "learning_rate": 0.0005, "loss": 2.1035, "step": 230190 }, { "epoch": 0.8761980161841615, "grad_norm": 0.13987453281879425, "learning_rate": 0.0005, "loss": 2.0903, "step": 230200 }, { "epoch": 0.8762360786522841, "grad_norm": 0.1147368848323822, "learning_rate": 0.0005, "loss": 2.1126, "step": 230210 }, { "epoch": 0.8762741411204068, "grad_norm": 0.1245139092206955, "learning_rate": 0.0005, "loss": 2.1145, "step": 230220 }, { "epoch": 0.8763122035885295, "grad_norm": 0.13482628762722015, "learning_rate": 0.0005, "loss": 2.1128, "step": 230230 }, { "epoch": 0.8763502660566522, "grad_norm": 0.129679337143898, "learning_rate": 0.0005, "loss": 2.101, "step": 230240 }, { "epoch": 0.8763883285247749, "grad_norm": 0.15952397882938385, "learning_rate": 0.0005, "loss": 2.0997, "step": 230250 }, { "epoch": 0.8764263909928975, "grad_norm": 1.1382399797439575, "learning_rate": 0.0005, "loss": 2.0845, "step": 230260 }, { "epoch": 0.8764644534610202, "grad_norm": 0.14629729092121124, "learning_rate": 0.0005, "loss": 2.1232, "step": 230270 }, { "epoch": 0.8765025159291429, "grad_norm": 0.12604254484176636, "learning_rate": 0.0005, "loss": 2.1067, "step": 230280 }, { "epoch": 0.8765405783972656, "grad_norm": 0.1218118742108345, "learning_rate": 0.0005, "loss": 2.1011, "step": 230290 }, { "epoch": 0.8765786408653883, "grad_norm": 0.13148868083953857, "learning_rate": 0.0005, "loss": 2.0949, "step": 230300 }, { "epoch": 0.8766167033335109, "grad_norm": 0.11712725460529327, "learning_rate": 0.0005, "loss": 2.1163, "step": 230310 }, { "epoch": 0.8766547658016336, "grad_norm": 0.14665967226028442, "learning_rate": 0.0005, "loss": 2.1021, "step": 230320 }, { "epoch": 0.8766928282697564, "grad_norm": 0.12293502688407898, "learning_rate": 0.0005, "loss": 2.1051, "step": 230330 }, { "epoch": 0.876730890737879, "grad_norm": 0.13239432871341705, "learning_rate": 0.0005, "loss": 2.1123, "step": 230340 }, { "epoch": 0.8767689532060017, "grad_norm": 0.11775525659322739, "learning_rate": 0.0005, "loss": 2.1018, "step": 230350 }, { "epoch": 0.8768070156741243, "grad_norm": 0.14179232716560364, "learning_rate": 0.0005, "loss": 2.0927, "step": 230360 }, { "epoch": 0.8768450781422471, "grad_norm": 0.12616828083992004, "learning_rate": 0.0005, "loss": 2.0923, "step": 230370 }, { "epoch": 0.8768831406103698, "grad_norm": 0.12409175932407379, "learning_rate": 0.0005, "loss": 2.0993, "step": 230380 }, { "epoch": 0.8769212030784924, "grad_norm": 0.1291017383337021, "learning_rate": 0.0005, "loss": 2.0999, "step": 230390 }, { "epoch": 0.8769592655466151, "grad_norm": 0.1327333301305771, "learning_rate": 0.0005, "loss": 2.1017, "step": 230400 }, { "epoch": 0.8769973280147378, "grad_norm": 0.12073655426502228, "learning_rate": 0.0005, "loss": 2.1109, "step": 230410 }, { "epoch": 0.8770353904828605, "grad_norm": 0.12345512956380844, "learning_rate": 0.0005, "loss": 2.1137, "step": 230420 }, { "epoch": 0.8770734529509832, "grad_norm": 0.12081936746835709, "learning_rate": 0.0005, "loss": 2.0913, "step": 230430 }, { "epoch": 0.8771115154191058, "grad_norm": 0.1453080177307129, "learning_rate": 0.0005, "loss": 2.1082, "step": 230440 }, { "epoch": 0.8771495778872285, "grad_norm": 0.13501416146755219, "learning_rate": 0.0005, "loss": 2.1121, "step": 230450 }, { "epoch": 0.8771876403553512, "grad_norm": 0.12920066714286804, "learning_rate": 0.0005, "loss": 2.1069, "step": 230460 }, { "epoch": 0.8772257028234739, "grad_norm": 0.13471481204032898, "learning_rate": 0.0005, "loss": 2.1088, "step": 230470 }, { "epoch": 0.8772637652915966, "grad_norm": 0.12981899082660675, "learning_rate": 0.0005, "loss": 2.1185, "step": 230480 }, { "epoch": 0.8773018277597192, "grad_norm": 0.13292816281318665, "learning_rate": 0.0005, "loss": 2.1119, "step": 230490 }, { "epoch": 0.877339890227842, "grad_norm": 0.12929858267307281, "learning_rate": 0.0005, "loss": 2.0955, "step": 230500 }, { "epoch": 0.8773779526959646, "grad_norm": 0.13031207025051117, "learning_rate": 0.0005, "loss": 2.1025, "step": 230510 }, { "epoch": 0.8774160151640873, "grad_norm": 0.11181312799453735, "learning_rate": 0.0005, "loss": 2.1075, "step": 230520 }, { "epoch": 0.87745407763221, "grad_norm": 0.13998934626579285, "learning_rate": 0.0005, "loss": 2.1047, "step": 230530 }, { "epoch": 0.8774921401003327, "grad_norm": 0.13008730113506317, "learning_rate": 0.0005, "loss": 2.1159, "step": 230540 }, { "epoch": 0.8775302025684554, "grad_norm": 0.13119496405124664, "learning_rate": 0.0005, "loss": 2.1051, "step": 230550 }, { "epoch": 0.877568265036578, "grad_norm": 0.12499556690454483, "learning_rate": 0.0005, "loss": 2.0987, "step": 230560 }, { "epoch": 0.8776063275047007, "grad_norm": 0.1330575793981552, "learning_rate": 0.0005, "loss": 2.083, "step": 230570 }, { "epoch": 0.8776443899728233, "grad_norm": 0.13125604391098022, "learning_rate": 0.0005, "loss": 2.1172, "step": 230580 }, { "epoch": 0.8776824524409461, "grad_norm": 0.13973161578178406, "learning_rate": 0.0005, "loss": 2.0932, "step": 230590 }, { "epoch": 0.8777205149090688, "grad_norm": 0.12531033158302307, "learning_rate": 0.0005, "loss": 2.1082, "step": 230600 }, { "epoch": 0.8777585773771914, "grad_norm": 0.14181895554065704, "learning_rate": 0.0005, "loss": 2.0942, "step": 230610 }, { "epoch": 0.8777966398453141, "grad_norm": 0.14905595779418945, "learning_rate": 0.0005, "loss": 2.1135, "step": 230620 }, { "epoch": 0.8778347023134369, "grad_norm": 0.1413782238960266, "learning_rate": 0.0005, "loss": 2.1049, "step": 230630 }, { "epoch": 0.8778727647815595, "grad_norm": 0.12952205538749695, "learning_rate": 0.0005, "loss": 2.1131, "step": 230640 }, { "epoch": 0.8779108272496822, "grad_norm": 0.12465883791446686, "learning_rate": 0.0005, "loss": 2.1128, "step": 230650 }, { "epoch": 0.8779488897178048, "grad_norm": 0.13702189922332764, "learning_rate": 0.0005, "loss": 2.1005, "step": 230660 }, { "epoch": 0.8779869521859276, "grad_norm": 0.15412265062332153, "learning_rate": 0.0005, "loss": 2.1005, "step": 230670 }, { "epoch": 0.8780250146540503, "grad_norm": 0.13058574497699738, "learning_rate": 0.0005, "loss": 2.1026, "step": 230680 }, { "epoch": 0.8780630771221729, "grad_norm": 0.12237841635942459, "learning_rate": 0.0005, "loss": 2.1053, "step": 230690 }, { "epoch": 0.8781011395902956, "grad_norm": 0.13826708495616913, "learning_rate": 0.0005, "loss": 2.1055, "step": 230700 }, { "epoch": 0.8781392020584182, "grad_norm": 0.13739526271820068, "learning_rate": 0.0005, "loss": 2.0878, "step": 230710 }, { "epoch": 0.878177264526541, "grad_norm": 0.12953074276447296, "learning_rate": 0.0005, "loss": 2.0939, "step": 230720 }, { "epoch": 0.8782153269946636, "grad_norm": 0.13328810036182404, "learning_rate": 0.0005, "loss": 2.1017, "step": 230730 }, { "epoch": 0.8782533894627863, "grad_norm": 0.11889463663101196, "learning_rate": 0.0005, "loss": 2.1154, "step": 230740 }, { "epoch": 0.878291451930909, "grad_norm": 0.12260544300079346, "learning_rate": 0.0005, "loss": 2.1054, "step": 230750 }, { "epoch": 0.8783295143990317, "grad_norm": 0.12881432473659515, "learning_rate": 0.0005, "loss": 2.0973, "step": 230760 }, { "epoch": 0.8783675768671544, "grad_norm": 0.12546837329864502, "learning_rate": 0.0005, "loss": 2.1022, "step": 230770 }, { "epoch": 0.878405639335277, "grad_norm": 0.13221587240695953, "learning_rate": 0.0005, "loss": 2.1059, "step": 230780 }, { "epoch": 0.8784437018033997, "grad_norm": 0.128005713224411, "learning_rate": 0.0005, "loss": 2.0956, "step": 230790 }, { "epoch": 0.8784817642715225, "grad_norm": 0.11627799272537231, "learning_rate": 0.0005, "loss": 2.0746, "step": 230800 }, { "epoch": 0.8785198267396451, "grad_norm": 0.13420972228050232, "learning_rate": 0.0005, "loss": 2.1057, "step": 230810 }, { "epoch": 0.8785578892077678, "grad_norm": 0.14359724521636963, "learning_rate": 0.0005, "loss": 2.1017, "step": 230820 }, { "epoch": 0.8785959516758904, "grad_norm": 0.12914758920669556, "learning_rate": 0.0005, "loss": 2.0951, "step": 230830 }, { "epoch": 0.8786340141440132, "grad_norm": 0.13227564096450806, "learning_rate": 0.0005, "loss": 2.1098, "step": 230840 }, { "epoch": 0.8786720766121359, "grad_norm": 0.12736381590366364, "learning_rate": 0.0005, "loss": 2.1206, "step": 230850 }, { "epoch": 0.8787101390802585, "grad_norm": 0.13434436917304993, "learning_rate": 0.0005, "loss": 2.1028, "step": 230860 }, { "epoch": 0.8787482015483812, "grad_norm": 0.12227039039134979, "learning_rate": 0.0005, "loss": 2.1049, "step": 230870 }, { "epoch": 0.8787862640165038, "grad_norm": 0.12851709127426147, "learning_rate": 0.0005, "loss": 2.0935, "step": 230880 }, { "epoch": 0.8788243264846266, "grad_norm": 0.12752105295658112, "learning_rate": 0.0005, "loss": 2.1136, "step": 230890 }, { "epoch": 0.8788623889527493, "grad_norm": 0.12396000325679779, "learning_rate": 0.0005, "loss": 2.1066, "step": 230900 }, { "epoch": 0.8789004514208719, "grad_norm": 0.12684553861618042, "learning_rate": 0.0005, "loss": 2.0909, "step": 230910 }, { "epoch": 0.8789385138889946, "grad_norm": 0.1198529377579689, "learning_rate": 0.0005, "loss": 2.1122, "step": 230920 }, { "epoch": 0.8789765763571173, "grad_norm": 0.13882936537265778, "learning_rate": 0.0005, "loss": 2.0874, "step": 230930 }, { "epoch": 0.87901463882524, "grad_norm": 0.12034550309181213, "learning_rate": 0.0005, "loss": 2.0992, "step": 230940 }, { "epoch": 0.8790527012933627, "grad_norm": 0.12130250036716461, "learning_rate": 0.0005, "loss": 2.1278, "step": 230950 }, { "epoch": 0.8790907637614853, "grad_norm": 0.13701075315475464, "learning_rate": 0.0005, "loss": 2.0874, "step": 230960 }, { "epoch": 0.8791288262296081, "grad_norm": 0.1234058365225792, "learning_rate": 0.0005, "loss": 2.0886, "step": 230970 }, { "epoch": 0.8791668886977307, "grad_norm": 0.13145093619823456, "learning_rate": 0.0005, "loss": 2.1064, "step": 230980 }, { "epoch": 0.8792049511658534, "grad_norm": 0.12291737645864487, "learning_rate": 0.0005, "loss": 2.1075, "step": 230990 }, { "epoch": 0.8792430136339761, "grad_norm": 0.13090135157108307, "learning_rate": 0.0005, "loss": 2.0976, "step": 231000 }, { "epoch": 0.8792810761020987, "grad_norm": 0.1326054185628891, "learning_rate": 0.0005, "loss": 2.1042, "step": 231010 }, { "epoch": 0.8793191385702215, "grad_norm": 0.1226671040058136, "learning_rate": 0.0005, "loss": 2.1175, "step": 231020 }, { "epoch": 0.8793572010383441, "grad_norm": 0.1419045627117157, "learning_rate": 0.0005, "loss": 2.0936, "step": 231030 }, { "epoch": 0.8793952635064668, "grad_norm": 0.1294616311788559, "learning_rate": 0.0005, "loss": 2.1118, "step": 231040 }, { "epoch": 0.8794333259745895, "grad_norm": 0.12092513591051102, "learning_rate": 0.0005, "loss": 2.0836, "step": 231050 }, { "epoch": 0.8794713884427122, "grad_norm": 0.12082849442958832, "learning_rate": 0.0005, "loss": 2.1128, "step": 231060 }, { "epoch": 0.8795094509108349, "grad_norm": 0.14724799990653992, "learning_rate": 0.0005, "loss": 2.1174, "step": 231070 }, { "epoch": 0.8795475133789575, "grad_norm": 0.1306437999010086, "learning_rate": 0.0005, "loss": 2.1111, "step": 231080 }, { "epoch": 0.8795855758470802, "grad_norm": 0.13486473262310028, "learning_rate": 0.0005, "loss": 2.1065, "step": 231090 }, { "epoch": 0.879623638315203, "grad_norm": 0.12858137488365173, "learning_rate": 0.0005, "loss": 2.1025, "step": 231100 }, { "epoch": 0.8796617007833256, "grad_norm": 0.12715476751327515, "learning_rate": 0.0005, "loss": 2.0971, "step": 231110 }, { "epoch": 0.8796997632514483, "grad_norm": 0.11756472289562225, "learning_rate": 0.0005, "loss": 2.0902, "step": 231120 }, { "epoch": 0.8797378257195709, "grad_norm": 0.11899585276842117, "learning_rate": 0.0005, "loss": 2.1003, "step": 231130 }, { "epoch": 0.8797758881876936, "grad_norm": 0.12455402314662933, "learning_rate": 0.0005, "loss": 2.1013, "step": 231140 }, { "epoch": 0.8798139506558164, "grad_norm": 0.12440775334835052, "learning_rate": 0.0005, "loss": 2.1083, "step": 231150 }, { "epoch": 0.879852013123939, "grad_norm": 0.13117524981498718, "learning_rate": 0.0005, "loss": 2.1004, "step": 231160 }, { "epoch": 0.8798900755920617, "grad_norm": 0.11392761021852493, "learning_rate": 0.0005, "loss": 2.1157, "step": 231170 }, { "epoch": 0.8799281380601843, "grad_norm": 0.15131865441799164, "learning_rate": 0.0005, "loss": 2.108, "step": 231180 }, { "epoch": 0.8799662005283071, "grad_norm": 0.1557011753320694, "learning_rate": 0.0005, "loss": 2.0987, "step": 231190 }, { "epoch": 0.8800042629964298, "grad_norm": 0.1395118534564972, "learning_rate": 0.0005, "loss": 2.1021, "step": 231200 }, { "epoch": 0.8800423254645524, "grad_norm": 0.13964758813381195, "learning_rate": 0.0005, "loss": 2.1078, "step": 231210 }, { "epoch": 0.8800803879326751, "grad_norm": 0.13237391412258148, "learning_rate": 0.0005, "loss": 2.1026, "step": 231220 }, { "epoch": 0.8801184504007978, "grad_norm": 0.12277648597955704, "learning_rate": 0.0005, "loss": 2.1071, "step": 231230 }, { "epoch": 0.8801565128689205, "grad_norm": 0.12859274446964264, "learning_rate": 0.0005, "loss": 2.1071, "step": 231240 }, { "epoch": 0.8801945753370432, "grad_norm": 0.13040199875831604, "learning_rate": 0.0005, "loss": 2.1136, "step": 231250 }, { "epoch": 0.8802326378051658, "grad_norm": 0.12296934425830841, "learning_rate": 0.0005, "loss": 2.1035, "step": 231260 }, { "epoch": 0.8802707002732886, "grad_norm": 0.12698037922382355, "learning_rate": 0.0005, "loss": 2.0972, "step": 231270 }, { "epoch": 0.8803087627414112, "grad_norm": 0.11731021106243134, "learning_rate": 0.0005, "loss": 2.0915, "step": 231280 }, { "epoch": 0.8803468252095339, "grad_norm": 0.13041014969348907, "learning_rate": 0.0005, "loss": 2.1008, "step": 231290 }, { "epoch": 0.8803848876776565, "grad_norm": 0.11453888565301895, "learning_rate": 0.0005, "loss": 2.1024, "step": 231300 }, { "epoch": 0.8804229501457792, "grad_norm": 0.14653709530830383, "learning_rate": 0.0005, "loss": 2.0992, "step": 231310 }, { "epoch": 0.880461012613902, "grad_norm": 0.13944950699806213, "learning_rate": 0.0005, "loss": 2.1226, "step": 231320 }, { "epoch": 0.8804990750820246, "grad_norm": 0.11421007663011551, "learning_rate": 0.0005, "loss": 2.0897, "step": 231330 }, { "epoch": 0.8805371375501473, "grad_norm": 0.11948207765817642, "learning_rate": 0.0005, "loss": 2.1012, "step": 231340 }, { "epoch": 0.8805752000182699, "grad_norm": 0.12842802703380585, "learning_rate": 0.0005, "loss": 2.0914, "step": 231350 }, { "epoch": 0.8806132624863927, "grad_norm": 0.12044843286275864, "learning_rate": 0.0005, "loss": 2.0998, "step": 231360 }, { "epoch": 0.8806513249545154, "grad_norm": 0.12985649704933167, "learning_rate": 0.0005, "loss": 2.122, "step": 231370 }, { "epoch": 0.880689387422638, "grad_norm": 0.13624177873134613, "learning_rate": 0.0005, "loss": 2.1144, "step": 231380 }, { "epoch": 0.8807274498907607, "grad_norm": 0.1136452928185463, "learning_rate": 0.0005, "loss": 2.1015, "step": 231390 }, { "epoch": 0.8807655123588835, "grad_norm": 0.13304318487644196, "learning_rate": 0.0005, "loss": 2.092, "step": 231400 }, { "epoch": 0.8808035748270061, "grad_norm": 0.13884195685386658, "learning_rate": 0.0005, "loss": 2.1107, "step": 231410 }, { "epoch": 0.8808416372951288, "grad_norm": 0.13247938454151154, "learning_rate": 0.0005, "loss": 2.1031, "step": 231420 }, { "epoch": 0.8808796997632514, "grad_norm": 0.1284450888633728, "learning_rate": 0.0005, "loss": 2.1043, "step": 231430 }, { "epoch": 0.8809177622313741, "grad_norm": 0.12137272953987122, "learning_rate": 0.0005, "loss": 2.1041, "step": 231440 }, { "epoch": 0.8809558246994968, "grad_norm": 0.13511404395103455, "learning_rate": 0.0005, "loss": 2.1104, "step": 231450 }, { "epoch": 0.8809938871676195, "grad_norm": 0.12272574752569199, "learning_rate": 0.0005, "loss": 2.1172, "step": 231460 }, { "epoch": 0.8810319496357422, "grad_norm": 0.14283962547779083, "learning_rate": 0.0005, "loss": 2.1226, "step": 231470 }, { "epoch": 0.8810700121038648, "grad_norm": 0.11777593195438385, "learning_rate": 0.0005, "loss": 2.1064, "step": 231480 }, { "epoch": 0.8811080745719876, "grad_norm": 0.12992756068706512, "learning_rate": 0.0005, "loss": 2.0946, "step": 231490 }, { "epoch": 0.8811461370401102, "grad_norm": 0.11626632511615753, "learning_rate": 0.0005, "loss": 2.0943, "step": 231500 }, { "epoch": 0.8811841995082329, "grad_norm": 0.12404317408800125, "learning_rate": 0.0005, "loss": 2.0916, "step": 231510 }, { "epoch": 0.8812222619763556, "grad_norm": 0.12410927563905716, "learning_rate": 0.0005, "loss": 2.1086, "step": 231520 }, { "epoch": 0.8812603244444783, "grad_norm": 0.1409444361925125, "learning_rate": 0.0005, "loss": 2.1187, "step": 231530 }, { "epoch": 0.881298386912601, "grad_norm": 0.12675026059150696, "learning_rate": 0.0005, "loss": 2.1013, "step": 231540 }, { "epoch": 0.8813364493807236, "grad_norm": 0.13229568302631378, "learning_rate": 0.0005, "loss": 2.0949, "step": 231550 }, { "epoch": 0.8813745118488463, "grad_norm": 0.12156939506530762, "learning_rate": 0.0005, "loss": 2.1003, "step": 231560 }, { "epoch": 0.881412574316969, "grad_norm": 0.12610271573066711, "learning_rate": 0.0005, "loss": 2.1173, "step": 231570 }, { "epoch": 0.8814506367850917, "grad_norm": 0.12162590771913528, "learning_rate": 0.0005, "loss": 2.0958, "step": 231580 }, { "epoch": 0.8814886992532144, "grad_norm": 0.16064129769802094, "learning_rate": 0.0005, "loss": 2.0996, "step": 231590 }, { "epoch": 0.881526761721337, "grad_norm": 0.13760524988174438, "learning_rate": 0.0005, "loss": 2.113, "step": 231600 }, { "epoch": 0.8815648241894597, "grad_norm": 0.13056804239749908, "learning_rate": 0.0005, "loss": 2.1073, "step": 231610 }, { "epoch": 0.8816028866575825, "grad_norm": 0.1253260225057602, "learning_rate": 0.0005, "loss": 2.1019, "step": 231620 }, { "epoch": 0.8816409491257051, "grad_norm": 0.12314766645431519, "learning_rate": 0.0005, "loss": 2.0978, "step": 231630 }, { "epoch": 0.8816790115938278, "grad_norm": 0.12430446594953537, "learning_rate": 0.0005, "loss": 2.1115, "step": 231640 }, { "epoch": 0.8817170740619504, "grad_norm": 0.12304916977882385, "learning_rate": 0.0005, "loss": 2.095, "step": 231650 }, { "epoch": 0.8817551365300732, "grad_norm": 0.12238744646310806, "learning_rate": 0.0005, "loss": 2.0945, "step": 231660 }, { "epoch": 0.8817931989981959, "grad_norm": 0.12784691154956818, "learning_rate": 0.0005, "loss": 2.0979, "step": 231670 }, { "epoch": 0.8818312614663185, "grad_norm": 0.12692983448505402, "learning_rate": 0.0005, "loss": 2.104, "step": 231680 }, { "epoch": 0.8818693239344412, "grad_norm": 0.11969311535358429, "learning_rate": 0.0005, "loss": 2.1167, "step": 231690 }, { "epoch": 0.8819073864025639, "grad_norm": 0.24011379480361938, "learning_rate": 0.0005, "loss": 2.1055, "step": 231700 }, { "epoch": 0.8819454488706866, "grad_norm": 0.11667758971452713, "learning_rate": 0.0005, "loss": 2.1166, "step": 231710 }, { "epoch": 0.8819835113388093, "grad_norm": 0.12892790138721466, "learning_rate": 0.0005, "loss": 2.1062, "step": 231720 }, { "epoch": 0.8820215738069319, "grad_norm": 0.12426768988370895, "learning_rate": 0.0005, "loss": 2.1147, "step": 231730 }, { "epoch": 0.8820596362750546, "grad_norm": 0.13126088678836823, "learning_rate": 0.0005, "loss": 2.1129, "step": 231740 }, { "epoch": 0.8820976987431773, "grad_norm": 0.13232356309890747, "learning_rate": 0.0005, "loss": 2.1103, "step": 231750 }, { "epoch": 0.8821357612113, "grad_norm": 0.13249552249908447, "learning_rate": 0.0005, "loss": 2.1205, "step": 231760 }, { "epoch": 0.8821738236794227, "grad_norm": 0.11961726099252701, "learning_rate": 0.0005, "loss": 2.0971, "step": 231770 }, { "epoch": 0.8822118861475453, "grad_norm": 0.1333591789007187, "learning_rate": 0.0005, "loss": 2.1156, "step": 231780 }, { "epoch": 0.8822499486156681, "grad_norm": 0.13824038207530975, "learning_rate": 0.0005, "loss": 2.114, "step": 231790 }, { "epoch": 0.8822880110837907, "grad_norm": 0.1226683259010315, "learning_rate": 0.0005, "loss": 2.0986, "step": 231800 }, { "epoch": 0.8823260735519134, "grad_norm": 0.16337880492210388, "learning_rate": 0.0005, "loss": 2.1055, "step": 231810 }, { "epoch": 0.882364136020036, "grad_norm": 0.1357499063014984, "learning_rate": 0.0005, "loss": 2.0927, "step": 231820 }, { "epoch": 0.8824021984881588, "grad_norm": 0.12789271771907806, "learning_rate": 0.0005, "loss": 2.1182, "step": 231830 }, { "epoch": 0.8824402609562815, "grad_norm": 0.12225945293903351, "learning_rate": 0.0005, "loss": 2.1153, "step": 231840 }, { "epoch": 0.8824783234244041, "grad_norm": 0.11710258573293686, "learning_rate": 0.0005, "loss": 2.1065, "step": 231850 }, { "epoch": 0.8825163858925268, "grad_norm": 0.11904674023389816, "learning_rate": 0.0005, "loss": 2.1195, "step": 231860 }, { "epoch": 0.8825544483606494, "grad_norm": 0.12898290157318115, "learning_rate": 0.0005, "loss": 2.1045, "step": 231870 }, { "epoch": 0.8825925108287722, "grad_norm": 0.1373780220746994, "learning_rate": 0.0005, "loss": 2.1069, "step": 231880 }, { "epoch": 0.8826305732968949, "grad_norm": 0.12841899693012238, "learning_rate": 0.0005, "loss": 2.1004, "step": 231890 }, { "epoch": 0.8826686357650175, "grad_norm": 0.12314864248037338, "learning_rate": 0.0005, "loss": 2.0986, "step": 231900 }, { "epoch": 0.8827066982331402, "grad_norm": 0.13329792022705078, "learning_rate": 0.0005, "loss": 2.0985, "step": 231910 }, { "epoch": 0.882744760701263, "grad_norm": 0.14434537291526794, "learning_rate": 0.0005, "loss": 2.1001, "step": 231920 }, { "epoch": 0.8827828231693856, "grad_norm": 0.11754970997571945, "learning_rate": 0.0005, "loss": 2.0996, "step": 231930 }, { "epoch": 0.8828208856375083, "grad_norm": 0.12650670111179352, "learning_rate": 0.0005, "loss": 2.114, "step": 231940 }, { "epoch": 0.8828589481056309, "grad_norm": 0.121844083070755, "learning_rate": 0.0005, "loss": 2.0899, "step": 231950 }, { "epoch": 0.8828970105737537, "grad_norm": 0.11975842714309692, "learning_rate": 0.0005, "loss": 2.0966, "step": 231960 }, { "epoch": 0.8829350730418764, "grad_norm": 0.14225952327251434, "learning_rate": 0.0005, "loss": 2.109, "step": 231970 }, { "epoch": 0.882973135509999, "grad_norm": 0.12241010367870331, "learning_rate": 0.0005, "loss": 2.1043, "step": 231980 }, { "epoch": 0.8830111979781217, "grad_norm": 0.12553659081459045, "learning_rate": 0.0005, "loss": 2.0906, "step": 231990 }, { "epoch": 0.8830492604462443, "grad_norm": 0.14470550417900085, "learning_rate": 0.0005, "loss": 2.1102, "step": 232000 }, { "epoch": 0.8830873229143671, "grad_norm": 0.11787319928407669, "learning_rate": 0.0005, "loss": 2.1052, "step": 232010 }, { "epoch": 0.8831253853824897, "grad_norm": 0.12110399454832077, "learning_rate": 0.0005, "loss": 2.1061, "step": 232020 }, { "epoch": 0.8831634478506124, "grad_norm": 0.14320997893810272, "learning_rate": 0.0005, "loss": 2.0868, "step": 232030 }, { "epoch": 0.8832015103187351, "grad_norm": 0.1214723140001297, "learning_rate": 0.0005, "loss": 2.1177, "step": 232040 }, { "epoch": 0.8832395727868578, "grad_norm": 0.13799428939819336, "learning_rate": 0.0005, "loss": 2.1146, "step": 232050 }, { "epoch": 0.8832776352549805, "grad_norm": 0.13196426630020142, "learning_rate": 0.0005, "loss": 2.1115, "step": 232060 }, { "epoch": 0.8833156977231031, "grad_norm": 0.11245911568403244, "learning_rate": 0.0005, "loss": 2.095, "step": 232070 }, { "epoch": 0.8833537601912258, "grad_norm": 0.12483595311641693, "learning_rate": 0.0005, "loss": 2.1031, "step": 232080 }, { "epoch": 0.8833918226593486, "grad_norm": 0.12437473982572556, "learning_rate": 0.0005, "loss": 2.0902, "step": 232090 }, { "epoch": 0.8834298851274712, "grad_norm": 0.1243564561009407, "learning_rate": 0.0005, "loss": 2.1091, "step": 232100 }, { "epoch": 0.8834679475955939, "grad_norm": 0.12160544097423553, "learning_rate": 0.0005, "loss": 2.1113, "step": 232110 }, { "epoch": 0.8835060100637165, "grad_norm": 0.13572929799556732, "learning_rate": 0.0005, "loss": 2.1056, "step": 232120 }, { "epoch": 0.8835440725318393, "grad_norm": 0.12826408445835114, "learning_rate": 0.0005, "loss": 2.0908, "step": 232130 }, { "epoch": 0.883582134999962, "grad_norm": 0.12957674264907837, "learning_rate": 0.0005, "loss": 2.1036, "step": 232140 }, { "epoch": 0.8836201974680846, "grad_norm": 0.12144125998020172, "learning_rate": 0.0005, "loss": 2.1132, "step": 232150 }, { "epoch": 0.8836582599362073, "grad_norm": 0.12279727309942245, "learning_rate": 0.0005, "loss": 2.1198, "step": 232160 }, { "epoch": 0.8836963224043299, "grad_norm": 0.13412638008594513, "learning_rate": 0.0005, "loss": 2.1251, "step": 232170 }, { "epoch": 0.8837343848724527, "grad_norm": 0.12279703468084335, "learning_rate": 0.0005, "loss": 2.088, "step": 232180 }, { "epoch": 0.8837724473405754, "grad_norm": 0.12408280372619629, "learning_rate": 0.0005, "loss": 2.1013, "step": 232190 }, { "epoch": 0.883810509808698, "grad_norm": 0.1378985494375229, "learning_rate": 0.0005, "loss": 2.1064, "step": 232200 }, { "epoch": 0.8838485722768207, "grad_norm": 0.12556681036949158, "learning_rate": 0.0005, "loss": 2.1126, "step": 232210 }, { "epoch": 0.8838866347449434, "grad_norm": 4.76461935043335, "learning_rate": 0.0005, "loss": 2.1026, "step": 232220 }, { "epoch": 0.8839246972130661, "grad_norm": 0.22819869220256805, "learning_rate": 0.0005, "loss": 2.1196, "step": 232230 }, { "epoch": 0.8839627596811888, "grad_norm": 0.13506510853767395, "learning_rate": 0.0005, "loss": 2.1075, "step": 232240 }, { "epoch": 0.8840008221493114, "grad_norm": 0.12306538969278336, "learning_rate": 0.0005, "loss": 2.1241, "step": 232250 }, { "epoch": 0.8840388846174342, "grad_norm": 0.1268211454153061, "learning_rate": 0.0005, "loss": 2.1023, "step": 232260 }, { "epoch": 0.8840769470855568, "grad_norm": 0.13606832921504974, "learning_rate": 0.0005, "loss": 2.11, "step": 232270 }, { "epoch": 0.8841150095536795, "grad_norm": 0.1291336715221405, "learning_rate": 0.0005, "loss": 2.1195, "step": 232280 }, { "epoch": 0.8841530720218022, "grad_norm": 0.11495401710271835, "learning_rate": 0.0005, "loss": 2.112, "step": 232290 }, { "epoch": 0.8841911344899248, "grad_norm": 0.12901656329631805, "learning_rate": 0.0005, "loss": 2.11, "step": 232300 }, { "epoch": 0.8842291969580476, "grad_norm": 0.14306919276714325, "learning_rate": 0.0005, "loss": 2.1025, "step": 232310 }, { "epoch": 0.8842672594261702, "grad_norm": 0.1347128301858902, "learning_rate": 0.0005, "loss": 2.1199, "step": 232320 }, { "epoch": 0.8843053218942929, "grad_norm": 0.13565176725387573, "learning_rate": 0.0005, "loss": 2.1087, "step": 232330 }, { "epoch": 0.8843433843624156, "grad_norm": 0.12219851464033127, "learning_rate": 0.0005, "loss": 2.105, "step": 232340 }, { "epoch": 0.8843814468305383, "grad_norm": 0.12421177327632904, "learning_rate": 0.0005, "loss": 2.1187, "step": 232350 }, { "epoch": 0.884419509298661, "grad_norm": 0.1276005059480667, "learning_rate": 0.0005, "loss": 2.1252, "step": 232360 }, { "epoch": 0.8844575717667836, "grad_norm": 0.12903755903244019, "learning_rate": 0.0005, "loss": 2.1012, "step": 232370 }, { "epoch": 0.8844956342349063, "grad_norm": 0.12208840996026993, "learning_rate": 0.0005, "loss": 2.1034, "step": 232380 }, { "epoch": 0.8845336967030291, "grad_norm": 0.1322821080684662, "learning_rate": 0.0005, "loss": 2.1143, "step": 232390 }, { "epoch": 0.8845717591711517, "grad_norm": 0.14976076781749725, "learning_rate": 0.0005, "loss": 2.1109, "step": 232400 }, { "epoch": 0.8846098216392744, "grad_norm": 0.12396979331970215, "learning_rate": 0.0005, "loss": 2.1125, "step": 232410 }, { "epoch": 0.884647884107397, "grad_norm": 0.13356992602348328, "learning_rate": 0.0005, "loss": 2.0891, "step": 232420 }, { "epoch": 0.8846859465755197, "grad_norm": 0.11742783337831497, "learning_rate": 0.0005, "loss": 2.1039, "step": 232430 }, { "epoch": 0.8847240090436425, "grad_norm": 0.12508390843868256, "learning_rate": 0.0005, "loss": 2.0971, "step": 232440 }, { "epoch": 0.8847620715117651, "grad_norm": 0.11658743023872375, "learning_rate": 0.0005, "loss": 2.119, "step": 232450 }, { "epoch": 0.8848001339798878, "grad_norm": 0.11587092280387878, "learning_rate": 0.0005, "loss": 2.0923, "step": 232460 }, { "epoch": 0.8848381964480104, "grad_norm": 0.12888342142105103, "learning_rate": 0.0005, "loss": 2.0918, "step": 232470 }, { "epoch": 0.8848762589161332, "grad_norm": 0.12616628408432007, "learning_rate": 0.0005, "loss": 2.1083, "step": 232480 }, { "epoch": 0.8849143213842559, "grad_norm": 0.13656558096408844, "learning_rate": 0.0005, "loss": 2.0967, "step": 232490 }, { "epoch": 0.8849523838523785, "grad_norm": 0.14839287102222443, "learning_rate": 0.0005, "loss": 2.118, "step": 232500 }, { "epoch": 0.8849904463205012, "grad_norm": 0.1278732717037201, "learning_rate": 0.0005, "loss": 2.1062, "step": 232510 }, { "epoch": 0.8850285087886239, "grad_norm": 0.5294134020805359, "learning_rate": 0.0005, "loss": 2.1079, "step": 232520 }, { "epoch": 0.8850665712567466, "grad_norm": 0.14765897393226624, "learning_rate": 0.0005, "loss": 2.111, "step": 232530 }, { "epoch": 0.8851046337248692, "grad_norm": 0.12571071088314056, "learning_rate": 0.0005, "loss": 2.1164, "step": 232540 }, { "epoch": 0.8851426961929919, "grad_norm": 0.12959067523479462, "learning_rate": 0.0005, "loss": 2.1124, "step": 232550 }, { "epoch": 0.8851807586611147, "grad_norm": 0.14037704467773438, "learning_rate": 0.0005, "loss": 2.0978, "step": 232560 }, { "epoch": 0.8852188211292373, "grad_norm": 0.12762996554374695, "learning_rate": 0.0005, "loss": 2.102, "step": 232570 }, { "epoch": 0.88525688359736, "grad_norm": 0.12175919860601425, "learning_rate": 0.0005, "loss": 2.1067, "step": 232580 }, { "epoch": 0.8852949460654826, "grad_norm": 0.1340930014848709, "learning_rate": 0.0005, "loss": 2.0877, "step": 232590 }, { "epoch": 0.8853330085336053, "grad_norm": 0.12818540632724762, "learning_rate": 0.0005, "loss": 2.1065, "step": 232600 }, { "epoch": 0.8853710710017281, "grad_norm": 0.11314734071493149, "learning_rate": 0.0005, "loss": 2.1038, "step": 232610 }, { "epoch": 0.8854091334698507, "grad_norm": 0.1258586347103119, "learning_rate": 0.0005, "loss": 2.0868, "step": 232620 }, { "epoch": 0.8854471959379734, "grad_norm": 0.131071999669075, "learning_rate": 0.0005, "loss": 2.1077, "step": 232630 }, { "epoch": 0.885485258406096, "grad_norm": 0.1313721090555191, "learning_rate": 0.0005, "loss": 2.1163, "step": 232640 }, { "epoch": 0.8855233208742188, "grad_norm": 0.11949295550584793, "learning_rate": 0.0005, "loss": 2.1011, "step": 232650 }, { "epoch": 0.8855613833423415, "grad_norm": 0.13298510015010834, "learning_rate": 0.0005, "loss": 2.0999, "step": 232660 }, { "epoch": 0.8855994458104641, "grad_norm": 0.12202002853155136, "learning_rate": 0.0005, "loss": 2.0873, "step": 232670 }, { "epoch": 0.8856375082785868, "grad_norm": 0.11990275233983994, "learning_rate": 0.0005, "loss": 2.102, "step": 232680 }, { "epoch": 0.8856755707467096, "grad_norm": 0.11966723203659058, "learning_rate": 0.0005, "loss": 2.1184, "step": 232690 }, { "epoch": 0.8857136332148322, "grad_norm": 0.12311986833810806, "learning_rate": 0.0005, "loss": 2.1118, "step": 232700 }, { "epoch": 0.8857516956829549, "grad_norm": 0.1322653889656067, "learning_rate": 0.0005, "loss": 2.09, "step": 232710 }, { "epoch": 0.8857897581510775, "grad_norm": 0.1241132989525795, "learning_rate": 0.0005, "loss": 2.112, "step": 232720 }, { "epoch": 0.8858278206192002, "grad_norm": 0.112840935587883, "learning_rate": 0.0005, "loss": 2.0986, "step": 232730 }, { "epoch": 0.885865883087323, "grad_norm": 0.14808683097362518, "learning_rate": 0.0005, "loss": 2.1035, "step": 232740 }, { "epoch": 0.8859039455554456, "grad_norm": 0.1410990208387375, "learning_rate": 0.0005, "loss": 2.0967, "step": 232750 }, { "epoch": 0.8859420080235683, "grad_norm": 0.12336340546607971, "learning_rate": 0.0005, "loss": 2.1047, "step": 232760 }, { "epoch": 0.8859800704916909, "grad_norm": 0.1287340223789215, "learning_rate": 0.0005, "loss": 2.108, "step": 232770 }, { "epoch": 0.8860181329598137, "grad_norm": 0.14114344120025635, "learning_rate": 0.0005, "loss": 2.0966, "step": 232780 }, { "epoch": 0.8860561954279363, "grad_norm": 0.13871510326862335, "learning_rate": 0.0005, "loss": 2.1214, "step": 232790 }, { "epoch": 0.886094257896059, "grad_norm": 0.11456874012947083, "learning_rate": 0.0005, "loss": 2.0962, "step": 232800 }, { "epoch": 0.8861323203641817, "grad_norm": 0.13572253286838531, "learning_rate": 0.0005, "loss": 2.1093, "step": 232810 }, { "epoch": 0.8861703828323044, "grad_norm": 0.12444180250167847, "learning_rate": 0.0005, "loss": 2.0842, "step": 232820 }, { "epoch": 0.8862084453004271, "grad_norm": 0.134059876203537, "learning_rate": 0.0005, "loss": 2.1013, "step": 232830 }, { "epoch": 0.8862465077685497, "grad_norm": 0.13002780079841614, "learning_rate": 0.0005, "loss": 2.1112, "step": 232840 }, { "epoch": 0.8862845702366724, "grad_norm": 0.1284944862127304, "learning_rate": 0.0005, "loss": 2.0962, "step": 232850 }, { "epoch": 0.886322632704795, "grad_norm": 0.3809811770915985, "learning_rate": 0.0005, "loss": 2.111, "step": 232860 }, { "epoch": 0.8863606951729178, "grad_norm": 0.13519035279750824, "learning_rate": 0.0005, "loss": 2.0949, "step": 232870 }, { "epoch": 0.8863987576410405, "grad_norm": 0.12790334224700928, "learning_rate": 0.0005, "loss": 2.0823, "step": 232880 }, { "epoch": 0.8864368201091631, "grad_norm": 0.12451233714818954, "learning_rate": 0.0005, "loss": 2.1163, "step": 232890 }, { "epoch": 0.8864748825772858, "grad_norm": 0.12516069412231445, "learning_rate": 0.0005, "loss": 2.1045, "step": 232900 }, { "epoch": 0.8865129450454086, "grad_norm": 0.15014207363128662, "learning_rate": 0.0005, "loss": 2.1003, "step": 232910 }, { "epoch": 0.8865510075135312, "grad_norm": 0.126219242811203, "learning_rate": 0.0005, "loss": 2.1157, "step": 232920 }, { "epoch": 0.8865890699816539, "grad_norm": 0.1344812512397766, "learning_rate": 0.0005, "loss": 2.1014, "step": 232930 }, { "epoch": 0.8866271324497765, "grad_norm": 0.12141722440719604, "learning_rate": 0.0005, "loss": 2.1051, "step": 232940 }, { "epoch": 0.8866651949178993, "grad_norm": 0.1323108971118927, "learning_rate": 0.0005, "loss": 2.1046, "step": 232950 }, { "epoch": 0.886703257386022, "grad_norm": 0.12559595704078674, "learning_rate": 0.0005, "loss": 2.0891, "step": 232960 }, { "epoch": 0.8867413198541446, "grad_norm": 0.11912598460912704, "learning_rate": 0.0005, "loss": 2.1047, "step": 232970 }, { "epoch": 0.8867793823222673, "grad_norm": 0.12086867541074753, "learning_rate": 0.0005, "loss": 2.1191, "step": 232980 }, { "epoch": 0.88681744479039, "grad_norm": 0.13172176480293274, "learning_rate": 0.0005, "loss": 2.1016, "step": 232990 }, { "epoch": 0.8868555072585127, "grad_norm": 0.12233125418424606, "learning_rate": 0.0005, "loss": 2.1009, "step": 233000 }, { "epoch": 0.8868935697266354, "grad_norm": 0.11781726777553558, "learning_rate": 0.0005, "loss": 2.1017, "step": 233010 }, { "epoch": 0.886931632194758, "grad_norm": 0.126663938164711, "learning_rate": 0.0005, "loss": 2.1024, "step": 233020 }, { "epoch": 0.8869696946628807, "grad_norm": 0.11784598231315613, "learning_rate": 0.0005, "loss": 2.0906, "step": 233030 }, { "epoch": 0.8870077571310034, "grad_norm": 0.12991993129253387, "learning_rate": 0.0005, "loss": 2.1134, "step": 233040 }, { "epoch": 0.8870458195991261, "grad_norm": 0.11740230768918991, "learning_rate": 0.0005, "loss": 2.1169, "step": 233050 }, { "epoch": 0.8870838820672488, "grad_norm": 0.1238425225019455, "learning_rate": 0.0005, "loss": 2.1092, "step": 233060 }, { "epoch": 0.8871219445353714, "grad_norm": 0.13130146265029907, "learning_rate": 0.0005, "loss": 2.0911, "step": 233070 }, { "epoch": 0.8871600070034942, "grad_norm": 0.12228170037269592, "learning_rate": 0.0005, "loss": 2.0954, "step": 233080 }, { "epoch": 0.8871980694716168, "grad_norm": 0.13704465329647064, "learning_rate": 0.0005, "loss": 2.1156, "step": 233090 }, { "epoch": 0.8872361319397395, "grad_norm": 0.15340986847877502, "learning_rate": 0.0005, "loss": 2.1087, "step": 233100 }, { "epoch": 0.8872741944078621, "grad_norm": 0.12370522320270538, "learning_rate": 0.0005, "loss": 2.1039, "step": 233110 }, { "epoch": 0.8873122568759849, "grad_norm": 0.11676563322544098, "learning_rate": 0.0005, "loss": 2.1195, "step": 233120 }, { "epoch": 0.8873503193441076, "grad_norm": 0.12067238241434097, "learning_rate": 0.0005, "loss": 2.1128, "step": 233130 }, { "epoch": 0.8873883818122302, "grad_norm": 0.1201990470290184, "learning_rate": 0.0005, "loss": 2.0893, "step": 233140 }, { "epoch": 0.8874264442803529, "grad_norm": 0.11820308119058609, "learning_rate": 0.0005, "loss": 2.0933, "step": 233150 }, { "epoch": 0.8874645067484755, "grad_norm": 0.13470560312271118, "learning_rate": 0.0005, "loss": 2.0986, "step": 233160 }, { "epoch": 0.8875025692165983, "grad_norm": 0.13587632775306702, "learning_rate": 0.0005, "loss": 2.1131, "step": 233170 }, { "epoch": 0.887540631684721, "grad_norm": 0.13131999969482422, "learning_rate": 0.0005, "loss": 2.1061, "step": 233180 }, { "epoch": 0.8875786941528436, "grad_norm": 0.1288503259420395, "learning_rate": 0.0005, "loss": 2.0842, "step": 233190 }, { "epoch": 0.8876167566209663, "grad_norm": 0.1321592479944229, "learning_rate": 0.0005, "loss": 2.1116, "step": 233200 }, { "epoch": 0.887654819089089, "grad_norm": 0.8052067160606384, "learning_rate": 0.0005, "loss": 2.1078, "step": 233210 }, { "epoch": 0.8876928815572117, "grad_norm": 0.13146033883094788, "learning_rate": 0.0005, "loss": 2.1021, "step": 233220 }, { "epoch": 0.8877309440253344, "grad_norm": 0.11876517534255981, "learning_rate": 0.0005, "loss": 2.0993, "step": 233230 }, { "epoch": 0.887769006493457, "grad_norm": 0.12811189889907837, "learning_rate": 0.0005, "loss": 2.1018, "step": 233240 }, { "epoch": 0.8878070689615798, "grad_norm": 0.13559778034687042, "learning_rate": 0.0005, "loss": 2.0977, "step": 233250 }, { "epoch": 0.8878451314297024, "grad_norm": 0.12814271450042725, "learning_rate": 0.0005, "loss": 2.0921, "step": 233260 }, { "epoch": 0.8878831938978251, "grad_norm": 0.1480410397052765, "learning_rate": 0.0005, "loss": 2.0954, "step": 233270 }, { "epoch": 0.8879212563659478, "grad_norm": 0.12418772280216217, "learning_rate": 0.0005, "loss": 2.1003, "step": 233280 }, { "epoch": 0.8879593188340704, "grad_norm": 0.1289658099412918, "learning_rate": 0.0005, "loss": 2.101, "step": 233290 }, { "epoch": 0.8879973813021932, "grad_norm": 0.12939070165157318, "learning_rate": 0.0005, "loss": 2.1247, "step": 233300 }, { "epoch": 0.8880354437703158, "grad_norm": 0.11741060763597488, "learning_rate": 0.0005, "loss": 2.1141, "step": 233310 }, { "epoch": 0.8880735062384385, "grad_norm": 0.12273950129747391, "learning_rate": 0.0005, "loss": 2.0916, "step": 233320 }, { "epoch": 0.8881115687065612, "grad_norm": 0.12378660589456558, "learning_rate": 0.0005, "loss": 2.0973, "step": 233330 }, { "epoch": 0.8881496311746839, "grad_norm": 0.1443643718957901, "learning_rate": 0.0005, "loss": 2.0957, "step": 233340 }, { "epoch": 0.8881876936428066, "grad_norm": 0.1268627941608429, "learning_rate": 0.0005, "loss": 2.0959, "step": 233350 }, { "epoch": 0.8882257561109292, "grad_norm": 0.1410587728023529, "learning_rate": 0.0005, "loss": 2.1195, "step": 233360 }, { "epoch": 0.8882638185790519, "grad_norm": 0.14428554475307465, "learning_rate": 0.0005, "loss": 2.1108, "step": 233370 }, { "epoch": 0.8883018810471747, "grad_norm": 0.1303177773952484, "learning_rate": 0.0005, "loss": 2.1082, "step": 233380 }, { "epoch": 0.8883399435152973, "grad_norm": 0.1261586993932724, "learning_rate": 0.0005, "loss": 2.1004, "step": 233390 }, { "epoch": 0.88837800598342, "grad_norm": 0.13833171129226685, "learning_rate": 0.0005, "loss": 2.0917, "step": 233400 }, { "epoch": 0.8884160684515426, "grad_norm": 0.12124264240264893, "learning_rate": 0.0005, "loss": 2.0948, "step": 233410 }, { "epoch": 0.8884541309196654, "grad_norm": 0.11245989799499512, "learning_rate": 0.0005, "loss": 2.0989, "step": 233420 }, { "epoch": 0.8884921933877881, "grad_norm": 0.11601608991622925, "learning_rate": 0.0005, "loss": 2.0987, "step": 233430 }, { "epoch": 0.8885302558559107, "grad_norm": 0.1283676028251648, "learning_rate": 0.0005, "loss": 2.1007, "step": 233440 }, { "epoch": 0.8885683183240334, "grad_norm": 0.1229342520236969, "learning_rate": 0.0005, "loss": 2.114, "step": 233450 }, { "epoch": 0.888606380792156, "grad_norm": 0.1284010112285614, "learning_rate": 0.0005, "loss": 2.1244, "step": 233460 }, { "epoch": 0.8886444432602788, "grad_norm": 0.12559211254119873, "learning_rate": 0.0005, "loss": 2.1162, "step": 233470 }, { "epoch": 0.8886825057284015, "grad_norm": 0.13134633004665375, "learning_rate": 0.0005, "loss": 2.1016, "step": 233480 }, { "epoch": 0.8887205681965241, "grad_norm": 0.1325225532054901, "learning_rate": 0.0005, "loss": 2.1046, "step": 233490 }, { "epoch": 0.8887586306646468, "grad_norm": 0.1136227697134018, "learning_rate": 0.0005, "loss": 2.1117, "step": 233500 }, { "epoch": 0.8887966931327695, "grad_norm": 0.12274109572172165, "learning_rate": 0.0005, "loss": 2.099, "step": 233510 }, { "epoch": 0.8888347556008922, "grad_norm": 0.14137963950634003, "learning_rate": 0.0005, "loss": 2.0909, "step": 233520 }, { "epoch": 0.8888728180690149, "grad_norm": 0.13006393611431122, "learning_rate": 0.0005, "loss": 2.0874, "step": 233530 }, { "epoch": 0.8889108805371375, "grad_norm": 0.13273341953754425, "learning_rate": 0.0005, "loss": 2.1079, "step": 233540 }, { "epoch": 0.8889489430052603, "grad_norm": 0.12141282111406326, "learning_rate": 0.0005, "loss": 2.1085, "step": 233550 }, { "epoch": 0.8889870054733829, "grad_norm": 0.13110850751399994, "learning_rate": 0.0005, "loss": 2.1001, "step": 233560 }, { "epoch": 0.8890250679415056, "grad_norm": 0.1146354153752327, "learning_rate": 0.0005, "loss": 2.0922, "step": 233570 }, { "epoch": 0.8890631304096283, "grad_norm": 0.13466612994670868, "learning_rate": 0.0005, "loss": 2.0943, "step": 233580 }, { "epoch": 0.8891011928777509, "grad_norm": 0.1334793120622635, "learning_rate": 0.0005, "loss": 2.0902, "step": 233590 }, { "epoch": 0.8891392553458737, "grad_norm": 0.14149747788906097, "learning_rate": 0.0005, "loss": 2.0949, "step": 233600 }, { "epoch": 0.8891773178139963, "grad_norm": 0.13406780362129211, "learning_rate": 0.0005, "loss": 2.1051, "step": 233610 }, { "epoch": 0.889215380282119, "grad_norm": 0.12005721032619476, "learning_rate": 0.0005, "loss": 2.0909, "step": 233620 }, { "epoch": 0.8892534427502417, "grad_norm": 0.1475459784269333, "learning_rate": 0.0005, "loss": 2.1087, "step": 233630 }, { "epoch": 0.8892915052183644, "grad_norm": 0.12235203385353088, "learning_rate": 0.0005, "loss": 2.1035, "step": 233640 }, { "epoch": 0.8893295676864871, "grad_norm": 0.12408211827278137, "learning_rate": 0.0005, "loss": 2.0776, "step": 233650 }, { "epoch": 0.8893676301546097, "grad_norm": 0.12142856419086456, "learning_rate": 0.0005, "loss": 2.0987, "step": 233660 }, { "epoch": 0.8894056926227324, "grad_norm": 0.13211260735988617, "learning_rate": 0.0005, "loss": 2.0975, "step": 233670 }, { "epoch": 0.8894437550908552, "grad_norm": 0.13942740857601166, "learning_rate": 0.0005, "loss": 2.1018, "step": 233680 }, { "epoch": 0.8894818175589778, "grad_norm": 0.12578365206718445, "learning_rate": 0.0005, "loss": 2.1029, "step": 233690 }, { "epoch": 0.8895198800271005, "grad_norm": 0.11925357580184937, "learning_rate": 0.0005, "loss": 2.1027, "step": 233700 }, { "epoch": 0.8895579424952231, "grad_norm": 0.13358432054519653, "learning_rate": 0.0005, "loss": 2.1158, "step": 233710 }, { "epoch": 0.8895960049633458, "grad_norm": 0.13842090964317322, "learning_rate": 0.0005, "loss": 2.0865, "step": 233720 }, { "epoch": 0.8896340674314686, "grad_norm": 0.13777171075344086, "learning_rate": 0.0005, "loss": 2.1056, "step": 233730 }, { "epoch": 0.8896721298995912, "grad_norm": 0.12120426446199417, "learning_rate": 0.0005, "loss": 2.1055, "step": 233740 }, { "epoch": 0.8897101923677139, "grad_norm": 0.12940014898777008, "learning_rate": 0.0005, "loss": 2.1023, "step": 233750 }, { "epoch": 0.8897482548358365, "grad_norm": 0.11755038052797318, "learning_rate": 0.0005, "loss": 2.093, "step": 233760 }, { "epoch": 0.8897863173039593, "grad_norm": 0.11646275222301483, "learning_rate": 0.0005, "loss": 2.1083, "step": 233770 }, { "epoch": 0.889824379772082, "grad_norm": 0.13029074668884277, "learning_rate": 0.0005, "loss": 2.1, "step": 233780 }, { "epoch": 0.8898624422402046, "grad_norm": 0.12717968225479126, "learning_rate": 0.0005, "loss": 2.0935, "step": 233790 }, { "epoch": 0.8899005047083273, "grad_norm": 0.11478909105062485, "learning_rate": 0.0005, "loss": 2.0916, "step": 233800 }, { "epoch": 0.88993856717645, "grad_norm": 0.11571209132671356, "learning_rate": 0.0005, "loss": 2.0975, "step": 233810 }, { "epoch": 0.8899766296445727, "grad_norm": 0.13696546852588654, "learning_rate": 0.0005, "loss": 2.1138, "step": 233820 }, { "epoch": 0.8900146921126953, "grad_norm": 0.11886905133724213, "learning_rate": 0.0005, "loss": 2.0937, "step": 233830 }, { "epoch": 0.890052754580818, "grad_norm": 0.1315298080444336, "learning_rate": 0.0005, "loss": 2.1115, "step": 233840 }, { "epoch": 0.8900908170489408, "grad_norm": 0.11898674815893173, "learning_rate": 0.0005, "loss": 2.1072, "step": 233850 }, { "epoch": 0.8901288795170634, "grad_norm": 0.124471515417099, "learning_rate": 0.0005, "loss": 2.1034, "step": 233860 }, { "epoch": 0.8901669419851861, "grad_norm": 0.13916394114494324, "learning_rate": 0.0005, "loss": 2.1058, "step": 233870 }, { "epoch": 0.8902050044533087, "grad_norm": 0.13877750933170319, "learning_rate": 0.0005, "loss": 2.1085, "step": 233880 }, { "epoch": 0.8902430669214314, "grad_norm": 0.12235158681869507, "learning_rate": 0.0005, "loss": 2.1065, "step": 233890 }, { "epoch": 0.8902811293895542, "grad_norm": 0.12139211595058441, "learning_rate": 0.0005, "loss": 2.0987, "step": 233900 }, { "epoch": 0.8903191918576768, "grad_norm": 0.12321995943784714, "learning_rate": 0.0005, "loss": 2.0903, "step": 233910 }, { "epoch": 0.8903572543257995, "grad_norm": 0.1337067186832428, "learning_rate": 0.0005, "loss": 2.098, "step": 233920 }, { "epoch": 0.8903953167939221, "grad_norm": 0.17027568817138672, "learning_rate": 0.0005, "loss": 2.102, "step": 233930 }, { "epoch": 0.8904333792620449, "grad_norm": 0.14118517935276031, "learning_rate": 0.0005, "loss": 2.1197, "step": 233940 }, { "epoch": 0.8904714417301676, "grad_norm": 0.1275780349969864, "learning_rate": 0.0005, "loss": 2.0945, "step": 233950 }, { "epoch": 0.8905095041982902, "grad_norm": 0.16922014951705933, "learning_rate": 0.0005, "loss": 2.0891, "step": 233960 }, { "epoch": 0.8905475666664129, "grad_norm": 0.13354507088661194, "learning_rate": 0.0005, "loss": 2.1143, "step": 233970 }, { "epoch": 0.8905856291345357, "grad_norm": 0.1327764391899109, "learning_rate": 0.0005, "loss": 2.1037, "step": 233980 }, { "epoch": 0.8906236916026583, "grad_norm": 0.12903861701488495, "learning_rate": 0.0005, "loss": 2.0952, "step": 233990 }, { "epoch": 0.890661754070781, "grad_norm": 0.13357609510421753, "learning_rate": 0.0005, "loss": 2.0967, "step": 234000 }, { "epoch": 0.8906998165389036, "grad_norm": 0.15990236401557922, "learning_rate": 0.0005, "loss": 2.0917, "step": 234010 }, { "epoch": 0.8907378790070263, "grad_norm": 0.13477084040641785, "learning_rate": 0.0005, "loss": 2.1194, "step": 234020 }, { "epoch": 0.890775941475149, "grad_norm": 0.12353883683681488, "learning_rate": 0.0005, "loss": 2.101, "step": 234030 }, { "epoch": 0.8908140039432717, "grad_norm": 0.12096250802278519, "learning_rate": 0.0005, "loss": 2.0869, "step": 234040 }, { "epoch": 0.8908520664113944, "grad_norm": 0.12207420915365219, "learning_rate": 0.0005, "loss": 2.09, "step": 234050 }, { "epoch": 0.890890128879517, "grad_norm": 0.13806971907615662, "learning_rate": 0.0005, "loss": 2.1141, "step": 234060 }, { "epoch": 0.8909281913476398, "grad_norm": 0.12487991154193878, "learning_rate": 0.0005, "loss": 2.1147, "step": 234070 }, { "epoch": 0.8909662538157624, "grad_norm": 0.12291319668292999, "learning_rate": 0.0005, "loss": 2.1077, "step": 234080 }, { "epoch": 0.8910043162838851, "grad_norm": 0.12147248536348343, "learning_rate": 0.0005, "loss": 2.0934, "step": 234090 }, { "epoch": 0.8910423787520078, "grad_norm": 0.120960533618927, "learning_rate": 0.0005, "loss": 2.0961, "step": 234100 }, { "epoch": 0.8910804412201305, "grad_norm": 0.11734280735254288, "learning_rate": 0.0005, "loss": 2.0945, "step": 234110 }, { "epoch": 0.8911185036882532, "grad_norm": 0.11988954246044159, "learning_rate": 0.0005, "loss": 2.1092, "step": 234120 }, { "epoch": 0.8911565661563758, "grad_norm": 0.11701469123363495, "learning_rate": 0.0005, "loss": 2.1209, "step": 234130 }, { "epoch": 0.8911946286244985, "grad_norm": 0.12386269122362137, "learning_rate": 0.0005, "loss": 2.1079, "step": 234140 }, { "epoch": 0.8912326910926213, "grad_norm": 0.14417067170143127, "learning_rate": 0.0005, "loss": 2.1095, "step": 234150 }, { "epoch": 0.8912707535607439, "grad_norm": 0.14032790064811707, "learning_rate": 0.0005, "loss": 2.0941, "step": 234160 }, { "epoch": 0.8913088160288666, "grad_norm": 0.1238948404788971, "learning_rate": 0.0005, "loss": 2.1135, "step": 234170 }, { "epoch": 0.8913468784969892, "grad_norm": 0.13059444725513458, "learning_rate": 0.0005, "loss": 2.0998, "step": 234180 }, { "epoch": 0.8913849409651119, "grad_norm": 0.1279069483280182, "learning_rate": 0.0005, "loss": 2.0986, "step": 234190 }, { "epoch": 0.8914230034332347, "grad_norm": 0.12548679113388062, "learning_rate": 0.0005, "loss": 2.1085, "step": 234200 }, { "epoch": 0.8914610659013573, "grad_norm": 0.12830105423927307, "learning_rate": 0.0005, "loss": 2.1018, "step": 234210 }, { "epoch": 0.89149912836948, "grad_norm": 0.12883508205413818, "learning_rate": 0.0005, "loss": 2.1227, "step": 234220 }, { "epoch": 0.8915371908376026, "grad_norm": 0.12640762329101562, "learning_rate": 0.0005, "loss": 2.0874, "step": 234230 }, { "epoch": 0.8915752533057254, "grad_norm": 0.12490357458591461, "learning_rate": 0.0005, "loss": 2.096, "step": 234240 }, { "epoch": 0.8916133157738481, "grad_norm": 0.11947716027498245, "learning_rate": 0.0005, "loss": 2.0975, "step": 234250 }, { "epoch": 0.8916513782419707, "grad_norm": 0.11541569977998734, "learning_rate": 0.0005, "loss": 2.1139, "step": 234260 }, { "epoch": 0.8916894407100934, "grad_norm": 0.1117173358798027, "learning_rate": 0.0005, "loss": 2.0908, "step": 234270 }, { "epoch": 0.8917275031782161, "grad_norm": 0.13849619030952454, "learning_rate": 0.0005, "loss": 2.0917, "step": 234280 }, { "epoch": 0.8917655656463388, "grad_norm": 0.12830138206481934, "learning_rate": 0.0005, "loss": 2.0959, "step": 234290 }, { "epoch": 0.8918036281144615, "grad_norm": 0.13390886783599854, "learning_rate": 0.0005, "loss": 2.1012, "step": 234300 }, { "epoch": 0.8918416905825841, "grad_norm": 0.11898814886808395, "learning_rate": 0.0005, "loss": 2.1053, "step": 234310 }, { "epoch": 0.8918797530507068, "grad_norm": 0.12488134950399399, "learning_rate": 0.0005, "loss": 2.1045, "step": 234320 }, { "epoch": 0.8919178155188295, "grad_norm": 0.1258251667022705, "learning_rate": 0.0005, "loss": 2.1022, "step": 234330 }, { "epoch": 0.8919558779869522, "grad_norm": 0.1283978968858719, "learning_rate": 0.0005, "loss": 2.0971, "step": 234340 }, { "epoch": 0.8919939404550749, "grad_norm": 0.11223854869604111, "learning_rate": 0.0005, "loss": 2.1218, "step": 234350 }, { "epoch": 0.8920320029231975, "grad_norm": 0.13979001343250275, "learning_rate": 0.0005, "loss": 2.0949, "step": 234360 }, { "epoch": 0.8920700653913203, "grad_norm": 0.1531330645084381, "learning_rate": 0.0005, "loss": 2.0913, "step": 234370 }, { "epoch": 0.8921081278594429, "grad_norm": 0.12277109175920486, "learning_rate": 0.0005, "loss": 2.0968, "step": 234380 }, { "epoch": 0.8921461903275656, "grad_norm": 0.12363363057374954, "learning_rate": 0.0005, "loss": 2.1055, "step": 234390 }, { "epoch": 0.8921842527956882, "grad_norm": 0.14143170416355133, "learning_rate": 0.0005, "loss": 2.1173, "step": 234400 }, { "epoch": 0.892222315263811, "grad_norm": 0.12829793989658356, "learning_rate": 0.0005, "loss": 2.0973, "step": 234410 }, { "epoch": 0.8922603777319337, "grad_norm": 0.13423499464988708, "learning_rate": 0.0005, "loss": 2.0968, "step": 234420 }, { "epoch": 0.8922984402000563, "grad_norm": 0.12552112340927124, "learning_rate": 0.0005, "loss": 2.1039, "step": 234430 }, { "epoch": 0.892336502668179, "grad_norm": 0.14071422815322876, "learning_rate": 0.0005, "loss": 2.1136, "step": 234440 }, { "epoch": 0.8923745651363016, "grad_norm": 0.1370924413204193, "learning_rate": 0.0005, "loss": 2.0879, "step": 234450 }, { "epoch": 0.8924126276044244, "grad_norm": 0.12983131408691406, "learning_rate": 0.0005, "loss": 2.1053, "step": 234460 }, { "epoch": 0.8924506900725471, "grad_norm": 0.1345546841621399, "learning_rate": 0.0005, "loss": 2.1058, "step": 234470 }, { "epoch": 0.8924887525406697, "grad_norm": 0.1273956000804901, "learning_rate": 0.0005, "loss": 2.1095, "step": 234480 }, { "epoch": 0.8925268150087924, "grad_norm": 0.14083106815814972, "learning_rate": 0.0005, "loss": 2.0981, "step": 234490 }, { "epoch": 0.8925648774769152, "grad_norm": 0.1267554610967636, "learning_rate": 0.0005, "loss": 2.1059, "step": 234500 }, { "epoch": 0.8926029399450378, "grad_norm": 0.1280146986246109, "learning_rate": 0.0005, "loss": 2.1045, "step": 234510 }, { "epoch": 0.8926410024131605, "grad_norm": 0.1163170114159584, "learning_rate": 0.0005, "loss": 2.0969, "step": 234520 }, { "epoch": 0.8926790648812831, "grad_norm": 0.14170005917549133, "learning_rate": 0.0005, "loss": 2.1146, "step": 234530 }, { "epoch": 0.8927171273494059, "grad_norm": 0.12691031396389008, "learning_rate": 0.0005, "loss": 2.1125, "step": 234540 }, { "epoch": 0.8927551898175285, "grad_norm": 0.1331014186143875, "learning_rate": 0.0005, "loss": 2.1171, "step": 234550 }, { "epoch": 0.8927932522856512, "grad_norm": 0.1329735368490219, "learning_rate": 0.0005, "loss": 2.0899, "step": 234560 }, { "epoch": 0.8928313147537739, "grad_norm": 0.11880216747522354, "learning_rate": 0.0005, "loss": 2.1125, "step": 234570 }, { "epoch": 0.8928693772218966, "grad_norm": 0.1179804652929306, "learning_rate": 0.0005, "loss": 2.111, "step": 234580 }, { "epoch": 0.8929074396900193, "grad_norm": 0.12276770919561386, "learning_rate": 0.0005, "loss": 2.0959, "step": 234590 }, { "epoch": 0.892945502158142, "grad_norm": 0.13497260212898254, "learning_rate": 0.0005, "loss": 2.1069, "step": 234600 }, { "epoch": 0.8929835646262646, "grad_norm": 0.12063473463058472, "learning_rate": 0.0005, "loss": 2.1114, "step": 234610 }, { "epoch": 0.8930216270943873, "grad_norm": 0.13163186609745026, "learning_rate": 0.0005, "loss": 2.0942, "step": 234620 }, { "epoch": 0.89305968956251, "grad_norm": 0.14808395504951477, "learning_rate": 0.0005, "loss": 2.0989, "step": 234630 }, { "epoch": 0.8930977520306327, "grad_norm": 0.11621870845556259, "learning_rate": 0.0005, "loss": 2.1032, "step": 234640 }, { "epoch": 0.8931358144987553, "grad_norm": 0.1356925368309021, "learning_rate": 0.0005, "loss": 2.1159, "step": 234650 }, { "epoch": 0.893173876966878, "grad_norm": 0.12438897788524628, "learning_rate": 0.0005, "loss": 2.1234, "step": 234660 }, { "epoch": 0.8932119394350008, "grad_norm": 0.12834861874580383, "learning_rate": 0.0005, "loss": 2.1085, "step": 234670 }, { "epoch": 0.8932500019031234, "grad_norm": 0.12933696806430817, "learning_rate": 0.0005, "loss": 2.1136, "step": 234680 }, { "epoch": 0.8932880643712461, "grad_norm": 0.11973818391561508, "learning_rate": 0.0005, "loss": 2.0867, "step": 234690 }, { "epoch": 0.8933261268393687, "grad_norm": 0.12239322066307068, "learning_rate": 0.0005, "loss": 2.1156, "step": 234700 }, { "epoch": 0.8933641893074915, "grad_norm": 0.12869210541248322, "learning_rate": 0.0005, "loss": 2.1119, "step": 234710 }, { "epoch": 0.8934022517756142, "grad_norm": 0.12452254444360733, "learning_rate": 0.0005, "loss": 2.1041, "step": 234720 }, { "epoch": 0.8934403142437368, "grad_norm": 0.1302383542060852, "learning_rate": 0.0005, "loss": 2.1073, "step": 234730 }, { "epoch": 0.8934783767118595, "grad_norm": 0.15368464589118958, "learning_rate": 0.0005, "loss": 2.1108, "step": 234740 }, { "epoch": 0.8935164391799821, "grad_norm": 0.17728041112422943, "learning_rate": 0.0005, "loss": 2.0895, "step": 234750 }, { "epoch": 0.8935545016481049, "grad_norm": 0.13167928159236908, "learning_rate": 0.0005, "loss": 2.0899, "step": 234760 }, { "epoch": 0.8935925641162276, "grad_norm": 0.13544803857803345, "learning_rate": 0.0005, "loss": 2.1193, "step": 234770 }, { "epoch": 0.8936306265843502, "grad_norm": 0.1211390420794487, "learning_rate": 0.0005, "loss": 2.0968, "step": 234780 }, { "epoch": 0.8936686890524729, "grad_norm": 0.13180460035800934, "learning_rate": 0.0005, "loss": 2.1027, "step": 234790 }, { "epoch": 0.8937067515205956, "grad_norm": 0.12960641086101532, "learning_rate": 0.0005, "loss": 2.0915, "step": 234800 }, { "epoch": 0.8937448139887183, "grad_norm": 0.12918585538864136, "learning_rate": 0.0005, "loss": 2.0959, "step": 234810 }, { "epoch": 0.893782876456841, "grad_norm": 0.18785136938095093, "learning_rate": 0.0005, "loss": 2.102, "step": 234820 }, { "epoch": 0.8938209389249636, "grad_norm": 0.11553603410720825, "learning_rate": 0.0005, "loss": 2.0971, "step": 234830 }, { "epoch": 0.8938590013930864, "grad_norm": 0.12703922390937805, "learning_rate": 0.0005, "loss": 2.1051, "step": 234840 }, { "epoch": 0.893897063861209, "grad_norm": 0.13607758283615112, "learning_rate": 0.0005, "loss": 2.1036, "step": 234850 }, { "epoch": 0.8939351263293317, "grad_norm": 0.12318916618824005, "learning_rate": 0.0005, "loss": 2.1106, "step": 234860 }, { "epoch": 0.8939731887974544, "grad_norm": 0.13668863475322723, "learning_rate": 0.0005, "loss": 2.0971, "step": 234870 }, { "epoch": 0.894011251265577, "grad_norm": 0.1230866089463234, "learning_rate": 0.0005, "loss": 2.0942, "step": 234880 }, { "epoch": 0.8940493137336998, "grad_norm": 0.1301426887512207, "learning_rate": 0.0005, "loss": 2.1039, "step": 234890 }, { "epoch": 0.8940873762018224, "grad_norm": 0.2735763490200043, "learning_rate": 0.0005, "loss": 2.1024, "step": 234900 }, { "epoch": 0.8941254386699451, "grad_norm": 0.12629179656505585, "learning_rate": 0.0005, "loss": 2.1052, "step": 234910 }, { "epoch": 0.8941635011380678, "grad_norm": 0.1262362003326416, "learning_rate": 0.0005, "loss": 2.1142, "step": 234920 }, { "epoch": 0.8942015636061905, "grad_norm": 0.1381853222846985, "learning_rate": 0.0005, "loss": 2.1075, "step": 234930 }, { "epoch": 0.8942396260743132, "grad_norm": 0.12422885000705719, "learning_rate": 0.0005, "loss": 2.104, "step": 234940 }, { "epoch": 0.8942776885424358, "grad_norm": 0.15908923745155334, "learning_rate": 0.0005, "loss": 2.1181, "step": 234950 }, { "epoch": 0.8943157510105585, "grad_norm": 0.12250334769487381, "learning_rate": 0.0005, "loss": 2.113, "step": 234960 }, { "epoch": 0.8943538134786813, "grad_norm": 0.1374390572309494, "learning_rate": 0.0005, "loss": 2.0923, "step": 234970 }, { "epoch": 0.8943918759468039, "grad_norm": 0.1280045360326767, "learning_rate": 0.0005, "loss": 2.1043, "step": 234980 }, { "epoch": 0.8944299384149266, "grad_norm": 0.13371485471725464, "learning_rate": 0.0005, "loss": 2.0907, "step": 234990 }, { "epoch": 0.8944680008830492, "grad_norm": 0.14468924701213837, "learning_rate": 0.0005, "loss": 2.085, "step": 235000 }, { "epoch": 0.894506063351172, "grad_norm": 0.12231716513633728, "learning_rate": 0.0005, "loss": 2.1011, "step": 235010 }, { "epoch": 0.8945441258192947, "grad_norm": 0.12150655686855316, "learning_rate": 0.0005, "loss": 2.0854, "step": 235020 }, { "epoch": 0.8945821882874173, "grad_norm": 0.13228917121887207, "learning_rate": 0.0005, "loss": 2.1248, "step": 235030 }, { "epoch": 0.89462025075554, "grad_norm": 0.12393835932016373, "learning_rate": 0.0005, "loss": 2.0931, "step": 235040 }, { "epoch": 0.8946583132236626, "grad_norm": 0.13002344965934753, "learning_rate": 0.0005, "loss": 2.1003, "step": 235050 }, { "epoch": 0.8946963756917854, "grad_norm": 0.12541408836841583, "learning_rate": 0.0005, "loss": 2.1122, "step": 235060 }, { "epoch": 0.894734438159908, "grad_norm": 0.12563663721084595, "learning_rate": 0.0005, "loss": 2.0954, "step": 235070 }, { "epoch": 0.8947725006280307, "grad_norm": 0.12658190727233887, "learning_rate": 0.0005, "loss": 2.0949, "step": 235080 }, { "epoch": 0.8948105630961534, "grad_norm": 0.13006722927093506, "learning_rate": 0.0005, "loss": 2.1109, "step": 235090 }, { "epoch": 0.8948486255642761, "grad_norm": 0.12455962598323822, "learning_rate": 0.0005, "loss": 2.1, "step": 235100 }, { "epoch": 0.8948866880323988, "grad_norm": 0.13397879898548126, "learning_rate": 0.0005, "loss": 2.1165, "step": 235110 }, { "epoch": 0.8949247505005214, "grad_norm": 0.13239069283008575, "learning_rate": 0.0005, "loss": 2.1059, "step": 235120 }, { "epoch": 0.8949628129686441, "grad_norm": 0.13646048307418823, "learning_rate": 0.0005, "loss": 2.0966, "step": 235130 }, { "epoch": 0.8950008754367669, "grad_norm": 0.11922208964824677, "learning_rate": 0.0005, "loss": 2.0844, "step": 235140 }, { "epoch": 0.8950389379048895, "grad_norm": 0.13137061893939972, "learning_rate": 0.0005, "loss": 2.1066, "step": 235150 }, { "epoch": 0.8950770003730122, "grad_norm": 0.13950665295124054, "learning_rate": 0.0005, "loss": 2.0981, "step": 235160 }, { "epoch": 0.8951150628411348, "grad_norm": 0.1411396861076355, "learning_rate": 0.0005, "loss": 2.1069, "step": 235170 }, { "epoch": 0.8951531253092575, "grad_norm": 0.13766488432884216, "learning_rate": 0.0005, "loss": 2.1269, "step": 235180 }, { "epoch": 0.8951911877773803, "grad_norm": 0.13621293008327484, "learning_rate": 0.0005, "loss": 2.1086, "step": 235190 }, { "epoch": 0.8952292502455029, "grad_norm": 0.13348594307899475, "learning_rate": 0.0005, "loss": 2.1044, "step": 235200 }, { "epoch": 0.8952673127136256, "grad_norm": 0.2628757357597351, "learning_rate": 0.0005, "loss": 2.1101, "step": 235210 }, { "epoch": 0.8953053751817482, "grad_norm": 0.1402920037508011, "learning_rate": 0.0005, "loss": 2.117, "step": 235220 }, { "epoch": 0.895343437649871, "grad_norm": 0.11941851675510406, "learning_rate": 0.0005, "loss": 2.1116, "step": 235230 }, { "epoch": 0.8953815001179937, "grad_norm": 0.1464409977197647, "learning_rate": 0.0005, "loss": 2.1095, "step": 235240 }, { "epoch": 0.8954195625861163, "grad_norm": 0.11677876114845276, "learning_rate": 0.0005, "loss": 2.1034, "step": 235250 }, { "epoch": 0.895457625054239, "grad_norm": 0.11923077702522278, "learning_rate": 0.0005, "loss": 2.1008, "step": 235260 }, { "epoch": 0.8954956875223617, "grad_norm": 0.13116028904914856, "learning_rate": 0.0005, "loss": 2.1058, "step": 235270 }, { "epoch": 0.8955337499904844, "grad_norm": 0.12417915463447571, "learning_rate": 0.0005, "loss": 2.0887, "step": 235280 }, { "epoch": 0.8955718124586071, "grad_norm": 0.12923891842365265, "learning_rate": 0.0005, "loss": 2.1196, "step": 235290 }, { "epoch": 0.8956098749267297, "grad_norm": 0.13769130408763885, "learning_rate": 0.0005, "loss": 2.1092, "step": 235300 }, { "epoch": 0.8956479373948524, "grad_norm": 0.13752809166908264, "learning_rate": 0.0005, "loss": 2.0907, "step": 235310 }, { "epoch": 0.8956859998629751, "grad_norm": 0.12434390932321548, "learning_rate": 0.0005, "loss": 2.1038, "step": 235320 }, { "epoch": 0.8957240623310978, "grad_norm": 0.1192469522356987, "learning_rate": 0.0005, "loss": 2.1057, "step": 235330 }, { "epoch": 0.8957621247992205, "grad_norm": 0.13500575721263885, "learning_rate": 0.0005, "loss": 2.0928, "step": 235340 }, { "epoch": 0.8958001872673431, "grad_norm": 0.11191660910844803, "learning_rate": 0.0005, "loss": 2.1077, "step": 235350 }, { "epoch": 0.8958382497354659, "grad_norm": 0.1157965213060379, "learning_rate": 0.0005, "loss": 2.0919, "step": 235360 }, { "epoch": 0.8958763122035885, "grad_norm": 0.14252077043056488, "learning_rate": 0.0005, "loss": 2.0914, "step": 235370 }, { "epoch": 0.8959143746717112, "grad_norm": 0.1264662891626358, "learning_rate": 0.0005, "loss": 2.0957, "step": 235380 }, { "epoch": 0.8959524371398339, "grad_norm": 0.13862596452236176, "learning_rate": 0.0005, "loss": 2.1039, "step": 235390 }, { "epoch": 0.8959904996079566, "grad_norm": 0.14109665155410767, "learning_rate": 0.0005, "loss": 2.101, "step": 235400 }, { "epoch": 0.8960285620760793, "grad_norm": 0.1267188936471939, "learning_rate": 0.0005, "loss": 2.115, "step": 235410 }, { "epoch": 0.8960666245442019, "grad_norm": 0.121976338326931, "learning_rate": 0.0005, "loss": 2.1042, "step": 235420 }, { "epoch": 0.8961046870123246, "grad_norm": 0.12344034016132355, "learning_rate": 0.0005, "loss": 2.1091, "step": 235430 }, { "epoch": 0.8961427494804474, "grad_norm": 0.12784519791603088, "learning_rate": 0.0005, "loss": 2.1082, "step": 235440 }, { "epoch": 0.89618081194857, "grad_norm": 0.1280445009469986, "learning_rate": 0.0005, "loss": 2.0979, "step": 235450 }, { "epoch": 0.8962188744166927, "grad_norm": 0.11634043604135513, "learning_rate": 0.0005, "loss": 2.0942, "step": 235460 }, { "epoch": 0.8962569368848153, "grad_norm": 0.12300094217061996, "learning_rate": 0.0005, "loss": 2.0836, "step": 235470 }, { "epoch": 0.896294999352938, "grad_norm": 0.1236441358923912, "learning_rate": 0.0005, "loss": 2.1138, "step": 235480 }, { "epoch": 0.8963330618210608, "grad_norm": 0.14748676121234894, "learning_rate": 0.0005, "loss": 2.1002, "step": 235490 }, { "epoch": 0.8963711242891834, "grad_norm": 0.11680328845977783, "learning_rate": 0.0005, "loss": 2.1059, "step": 235500 }, { "epoch": 0.8964091867573061, "grad_norm": 0.12132323533296585, "learning_rate": 0.0005, "loss": 2.1138, "step": 235510 }, { "epoch": 0.8964472492254287, "grad_norm": 0.1263308972120285, "learning_rate": 0.0005, "loss": 2.1091, "step": 235520 }, { "epoch": 0.8964853116935515, "grad_norm": 0.13237321376800537, "learning_rate": 0.0005, "loss": 2.1112, "step": 235530 }, { "epoch": 0.8965233741616742, "grad_norm": 0.1214015781879425, "learning_rate": 0.0005, "loss": 2.1029, "step": 235540 }, { "epoch": 0.8965614366297968, "grad_norm": 0.11785169690847397, "learning_rate": 0.0005, "loss": 2.1051, "step": 235550 }, { "epoch": 0.8965994990979195, "grad_norm": 0.13144147396087646, "learning_rate": 0.0005, "loss": 2.1017, "step": 235560 }, { "epoch": 0.8966375615660422, "grad_norm": 0.12580817937850952, "learning_rate": 0.0005, "loss": 2.1045, "step": 235570 }, { "epoch": 0.8966756240341649, "grad_norm": 0.12729611992835999, "learning_rate": 0.0005, "loss": 2.0839, "step": 235580 }, { "epoch": 0.8967136865022876, "grad_norm": 0.13856258988380432, "learning_rate": 0.0005, "loss": 2.0889, "step": 235590 }, { "epoch": 0.8967517489704102, "grad_norm": 0.13701985776424408, "learning_rate": 0.0005, "loss": 2.0944, "step": 235600 }, { "epoch": 0.8967898114385329, "grad_norm": 0.12763841450214386, "learning_rate": 0.0005, "loss": 2.0996, "step": 235610 }, { "epoch": 0.8968278739066556, "grad_norm": 0.12884309887886047, "learning_rate": 0.0005, "loss": 2.092, "step": 235620 }, { "epoch": 0.8968659363747783, "grad_norm": 0.10915088653564453, "learning_rate": 0.0005, "loss": 2.1043, "step": 235630 }, { "epoch": 0.896903998842901, "grad_norm": 0.1206916868686676, "learning_rate": 0.0005, "loss": 2.1065, "step": 235640 }, { "epoch": 0.8969420613110236, "grad_norm": 0.13930000364780426, "learning_rate": 0.0005, "loss": 2.1049, "step": 235650 }, { "epoch": 0.8969801237791464, "grad_norm": 0.13301430642604828, "learning_rate": 0.0005, "loss": 2.1069, "step": 235660 }, { "epoch": 0.897018186247269, "grad_norm": 0.12809635698795319, "learning_rate": 0.0005, "loss": 2.1081, "step": 235670 }, { "epoch": 0.8970562487153917, "grad_norm": 0.13747407495975494, "learning_rate": 0.0005, "loss": 2.108, "step": 235680 }, { "epoch": 0.8970943111835143, "grad_norm": 0.12985946238040924, "learning_rate": 0.0005, "loss": 2.107, "step": 235690 }, { "epoch": 0.8971323736516371, "grad_norm": 0.12377568334341049, "learning_rate": 0.0005, "loss": 2.1068, "step": 235700 }, { "epoch": 0.8971704361197598, "grad_norm": 0.14152388274669647, "learning_rate": 0.0005, "loss": 2.1063, "step": 235710 }, { "epoch": 0.8972084985878824, "grad_norm": 0.11914193630218506, "learning_rate": 0.0005, "loss": 2.1011, "step": 235720 }, { "epoch": 0.8972465610560051, "grad_norm": 0.12625829875469208, "learning_rate": 0.0005, "loss": 2.0972, "step": 235730 }, { "epoch": 0.8972846235241277, "grad_norm": 0.11790535598993301, "learning_rate": 0.0005, "loss": 2.1002, "step": 235740 }, { "epoch": 0.8973226859922505, "grad_norm": 0.11743942648172379, "learning_rate": 0.0005, "loss": 2.1045, "step": 235750 }, { "epoch": 0.8973607484603732, "grad_norm": 0.1302875429391861, "learning_rate": 0.0005, "loss": 2.1101, "step": 235760 }, { "epoch": 0.8973988109284958, "grad_norm": 0.1282423734664917, "learning_rate": 0.0005, "loss": 2.1074, "step": 235770 }, { "epoch": 0.8974368733966185, "grad_norm": 0.13597077131271362, "learning_rate": 0.0005, "loss": 2.0918, "step": 235780 }, { "epoch": 0.8974749358647413, "grad_norm": 0.13141882419586182, "learning_rate": 0.0005, "loss": 2.0927, "step": 235790 }, { "epoch": 0.8975129983328639, "grad_norm": 0.12061822414398193, "learning_rate": 0.0005, "loss": 2.1139, "step": 235800 }, { "epoch": 0.8975510608009866, "grad_norm": 0.138809934258461, "learning_rate": 0.0005, "loss": 2.0989, "step": 235810 }, { "epoch": 0.8975891232691092, "grad_norm": 0.1366889476776123, "learning_rate": 0.0005, "loss": 2.118, "step": 235820 }, { "epoch": 0.897627185737232, "grad_norm": 0.12433522194623947, "learning_rate": 0.0005, "loss": 2.1019, "step": 235830 }, { "epoch": 0.8976652482053546, "grad_norm": 0.12865471839904785, "learning_rate": 0.0005, "loss": 2.1066, "step": 235840 }, { "epoch": 0.8977033106734773, "grad_norm": 0.12241709977388382, "learning_rate": 0.0005, "loss": 2.1154, "step": 235850 }, { "epoch": 0.8977413731416, "grad_norm": 0.11950081586837769, "learning_rate": 0.0005, "loss": 2.1021, "step": 235860 }, { "epoch": 0.8977794356097227, "grad_norm": 0.12112652510404587, "learning_rate": 0.0005, "loss": 2.1132, "step": 235870 }, { "epoch": 0.8978174980778454, "grad_norm": 0.14162234961986542, "learning_rate": 0.0005, "loss": 2.109, "step": 235880 }, { "epoch": 0.897855560545968, "grad_norm": 0.12865613400936127, "learning_rate": 0.0005, "loss": 2.1113, "step": 235890 }, { "epoch": 0.8978936230140907, "grad_norm": 0.1257365494966507, "learning_rate": 0.0005, "loss": 2.1041, "step": 235900 }, { "epoch": 0.8979316854822134, "grad_norm": 0.11937166750431061, "learning_rate": 0.0005, "loss": 2.1011, "step": 235910 }, { "epoch": 0.8979697479503361, "grad_norm": 0.12168044596910477, "learning_rate": 0.0005, "loss": 2.1109, "step": 235920 }, { "epoch": 0.8980078104184588, "grad_norm": 0.1285332292318344, "learning_rate": 0.0005, "loss": 2.0979, "step": 235930 }, { "epoch": 0.8980458728865814, "grad_norm": 0.11787950992584229, "learning_rate": 0.0005, "loss": 2.1144, "step": 235940 }, { "epoch": 0.8980839353547041, "grad_norm": 0.13091526925563812, "learning_rate": 0.0005, "loss": 2.1016, "step": 235950 }, { "epoch": 0.8981219978228269, "grad_norm": 0.12218698859214783, "learning_rate": 0.0005, "loss": 2.1096, "step": 235960 }, { "epoch": 0.8981600602909495, "grad_norm": 0.12118630111217499, "learning_rate": 0.0005, "loss": 2.0933, "step": 235970 }, { "epoch": 0.8981981227590722, "grad_norm": 0.12844808399677277, "learning_rate": 0.0005, "loss": 2.1083, "step": 235980 }, { "epoch": 0.8982361852271948, "grad_norm": 0.11266063153743744, "learning_rate": 0.0005, "loss": 2.0973, "step": 235990 }, { "epoch": 0.8982742476953176, "grad_norm": 0.13366252183914185, "learning_rate": 0.0005, "loss": 2.0957, "step": 236000 }, { "epoch": 0.8983123101634403, "grad_norm": 0.12028734385967255, "learning_rate": 0.0005, "loss": 2.0935, "step": 236010 }, { "epoch": 0.8983503726315629, "grad_norm": 0.14484402537345886, "learning_rate": 0.0005, "loss": 2.1148, "step": 236020 }, { "epoch": 0.8983884350996856, "grad_norm": 0.13890573382377625, "learning_rate": 0.0005, "loss": 2.1087, "step": 236030 }, { "epoch": 0.8984264975678082, "grad_norm": 0.12853313982486725, "learning_rate": 0.0005, "loss": 2.0932, "step": 236040 }, { "epoch": 0.898464560035931, "grad_norm": 0.12683875858783722, "learning_rate": 0.0005, "loss": 2.1164, "step": 236050 }, { "epoch": 0.8985026225040537, "grad_norm": 0.12236892431974411, "learning_rate": 0.0005, "loss": 2.097, "step": 236060 }, { "epoch": 0.8985406849721763, "grad_norm": 0.13218963146209717, "learning_rate": 0.0005, "loss": 2.1066, "step": 236070 }, { "epoch": 0.898578747440299, "grad_norm": 0.12956587970256805, "learning_rate": 0.0005, "loss": 2.105, "step": 236080 }, { "epoch": 0.8986168099084217, "grad_norm": 0.12366285920143127, "learning_rate": 0.0005, "loss": 2.104, "step": 236090 }, { "epoch": 0.8986548723765444, "grad_norm": 0.12479374557733536, "learning_rate": 0.0005, "loss": 2.0983, "step": 236100 }, { "epoch": 0.8986929348446671, "grad_norm": 0.11998150497674942, "learning_rate": 0.0005, "loss": 2.1072, "step": 236110 }, { "epoch": 0.8987309973127897, "grad_norm": 0.12145683914422989, "learning_rate": 0.0005, "loss": 2.1041, "step": 236120 }, { "epoch": 0.8987690597809125, "grad_norm": 0.13119207322597504, "learning_rate": 0.0005, "loss": 2.1181, "step": 236130 }, { "epoch": 0.8988071222490351, "grad_norm": 0.12737643718719482, "learning_rate": 0.0005, "loss": 2.0835, "step": 236140 }, { "epoch": 0.8988451847171578, "grad_norm": 0.13847488164901733, "learning_rate": 0.0005, "loss": 2.1126, "step": 236150 }, { "epoch": 0.8988832471852805, "grad_norm": 0.12293814867734909, "learning_rate": 0.0005, "loss": 2.1009, "step": 236160 }, { "epoch": 0.8989213096534031, "grad_norm": 0.10784052312374115, "learning_rate": 0.0005, "loss": 2.1067, "step": 236170 }, { "epoch": 0.8989593721215259, "grad_norm": 0.13122645020484924, "learning_rate": 0.0005, "loss": 2.1073, "step": 236180 }, { "epoch": 0.8989974345896485, "grad_norm": 0.12222135812044144, "learning_rate": 0.0005, "loss": 2.1015, "step": 236190 }, { "epoch": 0.8990354970577712, "grad_norm": 0.1274719387292862, "learning_rate": 0.0005, "loss": 2.108, "step": 236200 }, { "epoch": 0.8990735595258939, "grad_norm": 0.12640856206417084, "learning_rate": 0.0005, "loss": 2.102, "step": 236210 }, { "epoch": 0.8991116219940166, "grad_norm": 0.13322730362415314, "learning_rate": 0.0005, "loss": 2.113, "step": 236220 }, { "epoch": 0.8991496844621393, "grad_norm": 0.11743305623531342, "learning_rate": 0.0005, "loss": 2.1151, "step": 236230 }, { "epoch": 0.8991877469302619, "grad_norm": 0.14595948159694672, "learning_rate": 0.0005, "loss": 2.1057, "step": 236240 }, { "epoch": 0.8992258093983846, "grad_norm": 0.12401731312274933, "learning_rate": 0.0005, "loss": 2.1017, "step": 236250 }, { "epoch": 0.8992638718665074, "grad_norm": 0.11821137368679047, "learning_rate": 0.0005, "loss": 2.1113, "step": 236260 }, { "epoch": 0.89930193433463, "grad_norm": 0.12861000001430511, "learning_rate": 0.0005, "loss": 2.1062, "step": 236270 }, { "epoch": 0.8993399968027527, "grad_norm": 0.12914568185806274, "learning_rate": 0.0005, "loss": 2.1021, "step": 236280 }, { "epoch": 0.8993780592708753, "grad_norm": 0.13119269907474518, "learning_rate": 0.0005, "loss": 2.1039, "step": 236290 }, { "epoch": 0.8994161217389981, "grad_norm": 0.14866691827774048, "learning_rate": 0.0005, "loss": 2.1034, "step": 236300 }, { "epoch": 0.8994541842071208, "grad_norm": 0.14841854572296143, "learning_rate": 0.0005, "loss": 2.101, "step": 236310 }, { "epoch": 0.8994922466752434, "grad_norm": 0.12997667491436005, "learning_rate": 0.0005, "loss": 2.0945, "step": 236320 }, { "epoch": 0.8995303091433661, "grad_norm": 0.118070587515831, "learning_rate": 0.0005, "loss": 2.115, "step": 236330 }, { "epoch": 0.8995683716114887, "grad_norm": 0.12368416786193848, "learning_rate": 0.0005, "loss": 2.1037, "step": 236340 }, { "epoch": 0.8996064340796115, "grad_norm": 0.1380939930677414, "learning_rate": 0.0005, "loss": 2.1056, "step": 236350 }, { "epoch": 0.8996444965477342, "grad_norm": 0.1202736422419548, "learning_rate": 0.0005, "loss": 2.1154, "step": 236360 }, { "epoch": 0.8996825590158568, "grad_norm": 0.12209721654653549, "learning_rate": 0.0005, "loss": 2.106, "step": 236370 }, { "epoch": 0.8997206214839795, "grad_norm": 0.12462722510099411, "learning_rate": 0.0005, "loss": 2.1046, "step": 236380 }, { "epoch": 0.8997586839521022, "grad_norm": 0.12407328188419342, "learning_rate": 0.0005, "loss": 2.104, "step": 236390 }, { "epoch": 0.8997967464202249, "grad_norm": 0.11674037575721741, "learning_rate": 0.0005, "loss": 2.1139, "step": 236400 }, { "epoch": 0.8998348088883475, "grad_norm": 0.12624992430210114, "learning_rate": 0.0005, "loss": 2.1199, "step": 236410 }, { "epoch": 0.8998728713564702, "grad_norm": 0.12524190545082092, "learning_rate": 0.0005, "loss": 2.1081, "step": 236420 }, { "epoch": 0.899910933824593, "grad_norm": 0.1276724636554718, "learning_rate": 0.0005, "loss": 2.0976, "step": 236430 }, { "epoch": 0.8999489962927156, "grad_norm": 0.12621335685253143, "learning_rate": 0.0005, "loss": 2.1033, "step": 236440 }, { "epoch": 0.8999870587608383, "grad_norm": 0.12861864268779755, "learning_rate": 0.0005, "loss": 2.1042, "step": 236450 }, { "epoch": 0.900025121228961, "grad_norm": 0.13620729744434357, "learning_rate": 0.0005, "loss": 2.1011, "step": 236460 }, { "epoch": 0.9000631836970836, "grad_norm": 0.13242807984352112, "learning_rate": 0.0005, "loss": 2.1038, "step": 236470 }, { "epoch": 0.9001012461652064, "grad_norm": 0.12402066588401794, "learning_rate": 0.0005, "loss": 2.1025, "step": 236480 }, { "epoch": 0.900139308633329, "grad_norm": 0.14862869679927826, "learning_rate": 0.0005, "loss": 2.1031, "step": 236490 }, { "epoch": 0.9001773711014517, "grad_norm": 0.11755998432636261, "learning_rate": 0.0005, "loss": 2.1067, "step": 236500 }, { "epoch": 0.9002154335695743, "grad_norm": 0.134490966796875, "learning_rate": 0.0005, "loss": 2.1062, "step": 236510 }, { "epoch": 0.9002534960376971, "grad_norm": 0.13029280304908752, "learning_rate": 0.0005, "loss": 2.1184, "step": 236520 }, { "epoch": 0.9002915585058198, "grad_norm": 0.13604643940925598, "learning_rate": 0.0005, "loss": 2.0962, "step": 236530 }, { "epoch": 0.9003296209739424, "grad_norm": 0.11948616057634354, "learning_rate": 0.0005, "loss": 2.1077, "step": 236540 }, { "epoch": 0.9003676834420651, "grad_norm": 0.1403426080942154, "learning_rate": 0.0005, "loss": 2.0941, "step": 236550 }, { "epoch": 0.9004057459101878, "grad_norm": 0.12944534420967102, "learning_rate": 0.0005, "loss": 2.0947, "step": 236560 }, { "epoch": 0.9004438083783105, "grad_norm": 0.13606569170951843, "learning_rate": 0.0005, "loss": 2.0843, "step": 236570 }, { "epoch": 0.9004818708464332, "grad_norm": 0.1257597804069519, "learning_rate": 0.0005, "loss": 2.1082, "step": 236580 }, { "epoch": 0.9005199333145558, "grad_norm": 0.13176938891410828, "learning_rate": 0.0005, "loss": 2.1053, "step": 236590 }, { "epoch": 0.9005579957826785, "grad_norm": 0.14309963583946228, "learning_rate": 0.0005, "loss": 2.102, "step": 236600 }, { "epoch": 0.9005960582508012, "grad_norm": 0.12653017044067383, "learning_rate": 0.0005, "loss": 2.1238, "step": 236610 }, { "epoch": 0.9006341207189239, "grad_norm": 0.13162977993488312, "learning_rate": 0.0005, "loss": 2.1073, "step": 236620 }, { "epoch": 0.9006721831870466, "grad_norm": 0.13455240428447723, "learning_rate": 0.0005, "loss": 2.1186, "step": 236630 }, { "epoch": 0.9007102456551692, "grad_norm": 0.13237057626247406, "learning_rate": 0.0005, "loss": 2.1064, "step": 236640 }, { "epoch": 0.900748308123292, "grad_norm": 0.12127892673015594, "learning_rate": 0.0005, "loss": 2.0899, "step": 236650 }, { "epoch": 0.9007863705914146, "grad_norm": 0.12416072934865952, "learning_rate": 0.0005, "loss": 2.101, "step": 236660 }, { "epoch": 0.9008244330595373, "grad_norm": 0.1350620687007904, "learning_rate": 0.0005, "loss": 2.0934, "step": 236670 }, { "epoch": 0.90086249552766, "grad_norm": 0.1300247311592102, "learning_rate": 0.0005, "loss": 2.1072, "step": 236680 }, { "epoch": 0.9009005579957827, "grad_norm": 0.12768010795116425, "learning_rate": 0.0005, "loss": 2.0963, "step": 236690 }, { "epoch": 0.9009386204639054, "grad_norm": 0.1444215327501297, "learning_rate": 0.0005, "loss": 2.0875, "step": 236700 }, { "epoch": 0.900976682932028, "grad_norm": 0.1194644421339035, "learning_rate": 0.0005, "loss": 2.0919, "step": 236710 }, { "epoch": 0.9010147454001507, "grad_norm": 0.1391426920890808, "learning_rate": 0.0005, "loss": 2.1102, "step": 236720 }, { "epoch": 0.9010528078682735, "grad_norm": 0.11720550805330276, "learning_rate": 0.0005, "loss": 2.1001, "step": 236730 }, { "epoch": 0.9010908703363961, "grad_norm": 0.12966321408748627, "learning_rate": 0.0005, "loss": 2.1224, "step": 236740 }, { "epoch": 0.9011289328045188, "grad_norm": 0.1279650330543518, "learning_rate": 0.0005, "loss": 2.1076, "step": 236750 }, { "epoch": 0.9011669952726414, "grad_norm": 0.11716605722904205, "learning_rate": 0.0005, "loss": 2.1033, "step": 236760 }, { "epoch": 0.9012050577407641, "grad_norm": 0.15291838347911835, "learning_rate": 0.0005, "loss": 2.0868, "step": 236770 }, { "epoch": 0.9012431202088869, "grad_norm": 0.12723152339458466, "learning_rate": 0.0005, "loss": 2.1203, "step": 236780 }, { "epoch": 0.9012811826770095, "grad_norm": 0.1147589385509491, "learning_rate": 0.0005, "loss": 2.1167, "step": 236790 }, { "epoch": 0.9013192451451322, "grad_norm": 0.13322225213050842, "learning_rate": 0.0005, "loss": 2.0919, "step": 236800 }, { "epoch": 0.9013573076132548, "grad_norm": 0.13659736514091492, "learning_rate": 0.0005, "loss": 2.1115, "step": 236810 }, { "epoch": 0.9013953700813776, "grad_norm": 0.13309742510318756, "learning_rate": 0.0005, "loss": 2.1071, "step": 236820 }, { "epoch": 0.9014334325495003, "grad_norm": 0.13266639411449432, "learning_rate": 0.0005, "loss": 2.1086, "step": 236830 }, { "epoch": 0.9014714950176229, "grad_norm": 0.12524521350860596, "learning_rate": 0.0005, "loss": 2.1122, "step": 236840 }, { "epoch": 0.9015095574857456, "grad_norm": 0.15074117481708527, "learning_rate": 0.0005, "loss": 2.1079, "step": 236850 }, { "epoch": 0.9015476199538683, "grad_norm": 0.12030625343322754, "learning_rate": 0.0005, "loss": 2.1064, "step": 236860 }, { "epoch": 0.901585682421991, "grad_norm": 0.13095256686210632, "learning_rate": 0.0005, "loss": 2.0951, "step": 236870 }, { "epoch": 0.9016237448901137, "grad_norm": 0.11550428718328476, "learning_rate": 0.0005, "loss": 2.0883, "step": 236880 }, { "epoch": 0.9016618073582363, "grad_norm": 0.11969823390245438, "learning_rate": 0.0005, "loss": 2.1039, "step": 236890 }, { "epoch": 0.901699869826359, "grad_norm": 0.11549341678619385, "learning_rate": 0.0005, "loss": 2.1084, "step": 236900 }, { "epoch": 0.9017379322944817, "grad_norm": 0.12132531404495239, "learning_rate": 0.0005, "loss": 2.0942, "step": 236910 }, { "epoch": 0.9017759947626044, "grad_norm": 0.11665021628141403, "learning_rate": 0.0005, "loss": 2.1042, "step": 236920 }, { "epoch": 0.901814057230727, "grad_norm": 0.1289794147014618, "learning_rate": 0.0005, "loss": 2.1018, "step": 236930 }, { "epoch": 0.9018521196988497, "grad_norm": 0.14646443724632263, "learning_rate": 0.0005, "loss": 2.1069, "step": 236940 }, { "epoch": 0.9018901821669725, "grad_norm": 0.1430707573890686, "learning_rate": 0.0005, "loss": 2.0956, "step": 236950 }, { "epoch": 0.9019282446350951, "grad_norm": 0.13452860713005066, "learning_rate": 0.0005, "loss": 2.1043, "step": 236960 }, { "epoch": 0.9019663071032178, "grad_norm": 0.12012603133916855, "learning_rate": 0.0005, "loss": 2.1004, "step": 236970 }, { "epoch": 0.9020043695713404, "grad_norm": 0.11895259469747543, "learning_rate": 0.0005, "loss": 2.0845, "step": 236980 }, { "epoch": 0.9020424320394632, "grad_norm": 0.1259036660194397, "learning_rate": 0.0005, "loss": 2.1016, "step": 236990 }, { "epoch": 0.9020804945075859, "grad_norm": 0.14254164695739746, "learning_rate": 0.0005, "loss": 2.098, "step": 237000 }, { "epoch": 0.9021185569757085, "grad_norm": 0.1326291710138321, "learning_rate": 0.0005, "loss": 2.1023, "step": 237010 }, { "epoch": 0.9021566194438312, "grad_norm": 0.12728415429592133, "learning_rate": 0.0005, "loss": 2.1019, "step": 237020 }, { "epoch": 0.9021946819119538, "grad_norm": 0.13301852345466614, "learning_rate": 0.0005, "loss": 2.1167, "step": 237030 }, { "epoch": 0.9022327443800766, "grad_norm": 0.14580120146274567, "learning_rate": 0.0005, "loss": 2.1138, "step": 237040 }, { "epoch": 0.9022708068481993, "grad_norm": 0.12128158658742905, "learning_rate": 0.0005, "loss": 2.0766, "step": 237050 }, { "epoch": 0.9023088693163219, "grad_norm": 0.14106401801109314, "learning_rate": 0.0005, "loss": 2.1005, "step": 237060 }, { "epoch": 0.9023469317844446, "grad_norm": 0.1238996684551239, "learning_rate": 0.0005, "loss": 2.1056, "step": 237070 }, { "epoch": 0.9023849942525674, "grad_norm": 0.12480529397726059, "learning_rate": 0.0005, "loss": 2.092, "step": 237080 }, { "epoch": 0.90242305672069, "grad_norm": 0.1321299970149994, "learning_rate": 0.0005, "loss": 2.0994, "step": 237090 }, { "epoch": 0.9024611191888127, "grad_norm": 0.1273770034313202, "learning_rate": 0.0005, "loss": 2.1098, "step": 237100 }, { "epoch": 0.9024991816569353, "grad_norm": 0.11618199944496155, "learning_rate": 0.0005, "loss": 2.0924, "step": 237110 }, { "epoch": 0.9025372441250581, "grad_norm": 0.13283830881118774, "learning_rate": 0.0005, "loss": 2.1042, "step": 237120 }, { "epoch": 0.9025753065931807, "grad_norm": 0.12719158828258514, "learning_rate": 0.0005, "loss": 2.1175, "step": 237130 }, { "epoch": 0.9026133690613034, "grad_norm": 0.11212098598480225, "learning_rate": 0.0005, "loss": 2.1008, "step": 237140 }, { "epoch": 0.9026514315294261, "grad_norm": 0.13038401305675507, "learning_rate": 0.0005, "loss": 2.1073, "step": 237150 }, { "epoch": 0.9026894939975488, "grad_norm": 0.1276264190673828, "learning_rate": 0.0005, "loss": 2.1042, "step": 237160 }, { "epoch": 0.9027275564656715, "grad_norm": 0.13317757844924927, "learning_rate": 0.0005, "loss": 2.1118, "step": 237170 }, { "epoch": 0.9027656189337941, "grad_norm": 0.13702648878097534, "learning_rate": 0.0005, "loss": 2.1086, "step": 237180 }, { "epoch": 0.9028036814019168, "grad_norm": 0.13050617277622223, "learning_rate": 0.0005, "loss": 2.1095, "step": 237190 }, { "epoch": 0.9028417438700395, "grad_norm": 0.12711739540100098, "learning_rate": 0.0005, "loss": 2.1055, "step": 237200 }, { "epoch": 0.9028798063381622, "grad_norm": 0.12161190062761307, "learning_rate": 0.0005, "loss": 2.1007, "step": 237210 }, { "epoch": 0.9029178688062849, "grad_norm": 0.12170030176639557, "learning_rate": 0.0005, "loss": 2.0887, "step": 237220 }, { "epoch": 0.9029559312744075, "grad_norm": 0.11940809339284897, "learning_rate": 0.0005, "loss": 2.11, "step": 237230 }, { "epoch": 0.9029939937425302, "grad_norm": 0.12572605907917023, "learning_rate": 0.0005, "loss": 2.1121, "step": 237240 }, { "epoch": 0.903032056210653, "grad_norm": 0.14780136942863464, "learning_rate": 0.0005, "loss": 2.1104, "step": 237250 }, { "epoch": 0.9030701186787756, "grad_norm": 0.13027071952819824, "learning_rate": 0.0005, "loss": 2.0981, "step": 237260 }, { "epoch": 0.9031081811468983, "grad_norm": 0.14059773087501526, "learning_rate": 0.0005, "loss": 2.0957, "step": 237270 }, { "epoch": 0.9031462436150209, "grad_norm": 0.13311892747879028, "learning_rate": 0.0005, "loss": 2.1094, "step": 237280 }, { "epoch": 0.9031843060831437, "grad_norm": 0.1414981335401535, "learning_rate": 0.0005, "loss": 2.0976, "step": 237290 }, { "epoch": 0.9032223685512664, "grad_norm": 0.13014385104179382, "learning_rate": 0.0005, "loss": 2.1043, "step": 237300 }, { "epoch": 0.903260431019389, "grad_norm": 0.12036735564470291, "learning_rate": 0.0005, "loss": 2.1001, "step": 237310 }, { "epoch": 0.9032984934875117, "grad_norm": 0.13092973828315735, "learning_rate": 0.0005, "loss": 2.1066, "step": 237320 }, { "epoch": 0.9033365559556343, "grad_norm": 0.11806660890579224, "learning_rate": 0.0005, "loss": 2.1102, "step": 237330 }, { "epoch": 0.9033746184237571, "grad_norm": 0.13571125268936157, "learning_rate": 0.0005, "loss": 2.0955, "step": 237340 }, { "epoch": 0.9034126808918798, "grad_norm": 0.1258023977279663, "learning_rate": 0.0005, "loss": 2.0916, "step": 237350 }, { "epoch": 0.9034507433600024, "grad_norm": 0.13625076413154602, "learning_rate": 0.0005, "loss": 2.1005, "step": 237360 }, { "epoch": 0.9034888058281251, "grad_norm": 0.12940800189971924, "learning_rate": 0.0005, "loss": 2.0991, "step": 237370 }, { "epoch": 0.9035268682962478, "grad_norm": 0.13219799101352692, "learning_rate": 0.0005, "loss": 2.1011, "step": 237380 }, { "epoch": 0.9035649307643705, "grad_norm": 0.13905245065689087, "learning_rate": 0.0005, "loss": 2.103, "step": 237390 }, { "epoch": 0.9036029932324932, "grad_norm": 0.13367201387882233, "learning_rate": 0.0005, "loss": 2.1112, "step": 237400 }, { "epoch": 0.9036410557006158, "grad_norm": 0.12304285168647766, "learning_rate": 0.0005, "loss": 2.1174, "step": 237410 }, { "epoch": 0.9036791181687386, "grad_norm": 0.13298951089382172, "learning_rate": 0.0005, "loss": 2.1018, "step": 237420 }, { "epoch": 0.9037171806368612, "grad_norm": 0.12021772563457489, "learning_rate": 0.0005, "loss": 2.0978, "step": 237430 }, { "epoch": 0.9037552431049839, "grad_norm": 0.12623293697834015, "learning_rate": 0.0005, "loss": 2.1211, "step": 237440 }, { "epoch": 0.9037933055731066, "grad_norm": 0.14011913537979126, "learning_rate": 0.0005, "loss": 2.1115, "step": 237450 }, { "epoch": 0.9038313680412292, "grad_norm": 0.1234155222773552, "learning_rate": 0.0005, "loss": 2.1007, "step": 237460 }, { "epoch": 0.903869430509352, "grad_norm": 0.12620636820793152, "learning_rate": 0.0005, "loss": 2.1084, "step": 237470 }, { "epoch": 0.9039074929774746, "grad_norm": 0.12419416755437851, "learning_rate": 0.0005, "loss": 2.1083, "step": 237480 }, { "epoch": 0.9039455554455973, "grad_norm": 0.13159045577049255, "learning_rate": 0.0005, "loss": 2.103, "step": 237490 }, { "epoch": 0.90398361791372, "grad_norm": 0.14370512962341309, "learning_rate": 0.0005, "loss": 2.1115, "step": 237500 }, { "epoch": 0.9040216803818427, "grad_norm": 0.14260618388652802, "learning_rate": 0.0005, "loss": 2.0962, "step": 237510 }, { "epoch": 0.9040597428499654, "grad_norm": 0.11814543604850769, "learning_rate": 0.0005, "loss": 2.1053, "step": 237520 }, { "epoch": 0.904097805318088, "grad_norm": 0.1222439780831337, "learning_rate": 0.0005, "loss": 2.1115, "step": 237530 }, { "epoch": 0.9041358677862107, "grad_norm": 0.140053853392601, "learning_rate": 0.0005, "loss": 2.1032, "step": 237540 }, { "epoch": 0.9041739302543335, "grad_norm": 0.1309879869222641, "learning_rate": 0.0005, "loss": 2.1016, "step": 237550 }, { "epoch": 0.9042119927224561, "grad_norm": 0.1283019781112671, "learning_rate": 0.0005, "loss": 2.1124, "step": 237560 }, { "epoch": 0.9042500551905788, "grad_norm": 0.13409188389778137, "learning_rate": 0.0005, "loss": 2.0908, "step": 237570 }, { "epoch": 0.9042881176587014, "grad_norm": 0.12143038213253021, "learning_rate": 0.0005, "loss": 2.1098, "step": 237580 }, { "epoch": 0.9043261801268242, "grad_norm": 0.11686935275793076, "learning_rate": 0.0005, "loss": 2.0949, "step": 237590 }, { "epoch": 0.9043642425949469, "grad_norm": 0.12011512368917465, "learning_rate": 0.0005, "loss": 2.1028, "step": 237600 }, { "epoch": 0.9044023050630695, "grad_norm": 0.11764641851186752, "learning_rate": 0.0005, "loss": 2.0903, "step": 237610 }, { "epoch": 0.9044403675311922, "grad_norm": 0.13327255845069885, "learning_rate": 0.0005, "loss": 2.0935, "step": 237620 }, { "epoch": 0.9044784299993148, "grad_norm": 0.12383707612752914, "learning_rate": 0.0005, "loss": 2.096, "step": 237630 }, { "epoch": 0.9045164924674376, "grad_norm": 0.14364486932754517, "learning_rate": 0.0005, "loss": 2.0974, "step": 237640 }, { "epoch": 0.9045545549355603, "grad_norm": 0.12070348113775253, "learning_rate": 0.0005, "loss": 2.1026, "step": 237650 }, { "epoch": 0.9045926174036829, "grad_norm": 0.12772414088249207, "learning_rate": 0.0005, "loss": 2.0856, "step": 237660 }, { "epoch": 0.9046306798718056, "grad_norm": 0.12472674250602722, "learning_rate": 0.0005, "loss": 2.0894, "step": 237670 }, { "epoch": 0.9046687423399283, "grad_norm": 0.14813371002674103, "learning_rate": 0.0005, "loss": 2.1046, "step": 237680 }, { "epoch": 0.904706804808051, "grad_norm": 0.1288592517375946, "learning_rate": 0.0005, "loss": 2.1077, "step": 237690 }, { "epoch": 0.9047448672761736, "grad_norm": 0.1278732568025589, "learning_rate": 0.0005, "loss": 2.0924, "step": 237700 }, { "epoch": 0.9047829297442963, "grad_norm": 0.12233841419219971, "learning_rate": 0.0005, "loss": 2.1058, "step": 237710 }, { "epoch": 0.9048209922124191, "grad_norm": 0.12657716870307922, "learning_rate": 0.0005, "loss": 2.1158, "step": 237720 }, { "epoch": 0.9048590546805417, "grad_norm": 0.1324680596590042, "learning_rate": 0.0005, "loss": 2.1075, "step": 237730 }, { "epoch": 0.9048971171486644, "grad_norm": 0.12798066437244415, "learning_rate": 0.0005, "loss": 2.0974, "step": 237740 }, { "epoch": 0.904935179616787, "grad_norm": 0.12066149711608887, "learning_rate": 0.0005, "loss": 2.1102, "step": 237750 }, { "epoch": 0.9049732420849097, "grad_norm": 0.1185355931520462, "learning_rate": 0.0005, "loss": 2.1193, "step": 237760 }, { "epoch": 0.9050113045530325, "grad_norm": 0.12513957917690277, "learning_rate": 0.0005, "loss": 2.1085, "step": 237770 }, { "epoch": 0.9050493670211551, "grad_norm": 0.12734851241111755, "learning_rate": 0.0005, "loss": 2.1011, "step": 237780 }, { "epoch": 0.9050874294892778, "grad_norm": 0.12355585396289825, "learning_rate": 0.0005, "loss": 2.1149, "step": 237790 }, { "epoch": 0.9051254919574004, "grad_norm": 0.1301615983247757, "learning_rate": 0.0005, "loss": 2.1055, "step": 237800 }, { "epoch": 0.9051635544255232, "grad_norm": 0.13630014657974243, "learning_rate": 0.0005, "loss": 2.1144, "step": 237810 }, { "epoch": 0.9052016168936459, "grad_norm": 0.14217840135097504, "learning_rate": 0.0005, "loss": 2.0922, "step": 237820 }, { "epoch": 0.9052396793617685, "grad_norm": 0.1299649029970169, "learning_rate": 0.0005, "loss": 2.0966, "step": 237830 }, { "epoch": 0.9052777418298912, "grad_norm": 0.11644427478313446, "learning_rate": 0.0005, "loss": 2.0961, "step": 237840 }, { "epoch": 0.905315804298014, "grad_norm": 0.12850260734558105, "learning_rate": 0.0005, "loss": 2.0986, "step": 237850 }, { "epoch": 0.9053538667661366, "grad_norm": 0.2166110724210739, "learning_rate": 0.0005, "loss": 2.1106, "step": 237860 }, { "epoch": 0.9053919292342593, "grad_norm": 0.12297400087118149, "learning_rate": 0.0005, "loss": 2.096, "step": 237870 }, { "epoch": 0.9054299917023819, "grad_norm": 0.12361843138933182, "learning_rate": 0.0005, "loss": 2.0971, "step": 237880 }, { "epoch": 0.9054680541705046, "grad_norm": 0.12942881882190704, "learning_rate": 0.0005, "loss": 2.1023, "step": 237890 }, { "epoch": 0.9055061166386273, "grad_norm": 0.12694664299488068, "learning_rate": 0.0005, "loss": 2.098, "step": 237900 }, { "epoch": 0.90554417910675, "grad_norm": 0.1355399787425995, "learning_rate": 0.0005, "loss": 2.113, "step": 237910 }, { "epoch": 0.9055822415748727, "grad_norm": 0.12172191590070724, "learning_rate": 0.0005, "loss": 2.082, "step": 237920 }, { "epoch": 0.9056203040429953, "grad_norm": 0.12276661396026611, "learning_rate": 0.0005, "loss": 2.1042, "step": 237930 }, { "epoch": 0.9056583665111181, "grad_norm": 0.12394405156373978, "learning_rate": 0.0005, "loss": 2.0988, "step": 237940 }, { "epoch": 0.9056964289792407, "grad_norm": 0.12693005800247192, "learning_rate": 0.0005, "loss": 2.1036, "step": 237950 }, { "epoch": 0.9057344914473634, "grad_norm": 0.12156771123409271, "learning_rate": 0.0005, "loss": 2.0988, "step": 237960 }, { "epoch": 0.9057725539154861, "grad_norm": 0.2160923331975937, "learning_rate": 0.0005, "loss": 2.0978, "step": 237970 }, { "epoch": 0.9058106163836088, "grad_norm": 0.127583846449852, "learning_rate": 0.0005, "loss": 2.1069, "step": 237980 }, { "epoch": 0.9058486788517315, "grad_norm": 0.1311924308538437, "learning_rate": 0.0005, "loss": 2.1153, "step": 237990 }, { "epoch": 0.9058867413198541, "grad_norm": 0.1340932697057724, "learning_rate": 0.0005, "loss": 2.1063, "step": 238000 }, { "epoch": 0.9059248037879768, "grad_norm": 0.13224120438098907, "learning_rate": 0.0005, "loss": 2.1011, "step": 238010 }, { "epoch": 0.9059628662560996, "grad_norm": 0.12841913104057312, "learning_rate": 0.0005, "loss": 2.126, "step": 238020 }, { "epoch": 0.9060009287242222, "grad_norm": 0.1256815791130066, "learning_rate": 0.0005, "loss": 2.0918, "step": 238030 }, { "epoch": 0.9060389911923449, "grad_norm": 0.12731921672821045, "learning_rate": 0.0005, "loss": 2.1067, "step": 238040 }, { "epoch": 0.9060770536604675, "grad_norm": 0.125631183385849, "learning_rate": 0.0005, "loss": 2.0843, "step": 238050 }, { "epoch": 0.9061151161285902, "grad_norm": 0.1256406456232071, "learning_rate": 0.0005, "loss": 2.0891, "step": 238060 }, { "epoch": 0.906153178596713, "grad_norm": 0.10923466831445694, "learning_rate": 0.0005, "loss": 2.1102, "step": 238070 }, { "epoch": 0.9061912410648356, "grad_norm": 0.12627013027668, "learning_rate": 0.0005, "loss": 2.107, "step": 238080 }, { "epoch": 0.9062293035329583, "grad_norm": 0.11679789423942566, "learning_rate": 0.0005, "loss": 2.0896, "step": 238090 }, { "epoch": 0.9062673660010809, "grad_norm": 0.12997809052467346, "learning_rate": 0.0005, "loss": 2.0994, "step": 238100 }, { "epoch": 0.9063054284692037, "grad_norm": 0.13287369906902313, "learning_rate": 0.0005, "loss": 2.1165, "step": 238110 }, { "epoch": 0.9063434909373264, "grad_norm": 0.12381280958652496, "learning_rate": 0.0005, "loss": 2.1027, "step": 238120 }, { "epoch": 0.906381553405449, "grad_norm": 0.12879888713359833, "learning_rate": 0.0005, "loss": 2.0966, "step": 238130 }, { "epoch": 0.9064196158735717, "grad_norm": 0.12576310336589813, "learning_rate": 0.0005, "loss": 2.1009, "step": 238140 }, { "epoch": 0.9064576783416944, "grad_norm": 0.12751945853233337, "learning_rate": 0.0005, "loss": 2.09, "step": 238150 }, { "epoch": 0.9064957408098171, "grad_norm": 0.11907423287630081, "learning_rate": 0.0005, "loss": 2.0893, "step": 238160 }, { "epoch": 0.9065338032779398, "grad_norm": 0.13061079382896423, "learning_rate": 0.0005, "loss": 2.1149, "step": 238170 }, { "epoch": 0.9065718657460624, "grad_norm": 0.13230901956558228, "learning_rate": 0.0005, "loss": 2.1071, "step": 238180 }, { "epoch": 0.9066099282141851, "grad_norm": 0.11777165532112122, "learning_rate": 0.0005, "loss": 2.1032, "step": 238190 }, { "epoch": 0.9066479906823078, "grad_norm": 0.12985430657863617, "learning_rate": 0.0005, "loss": 2.101, "step": 238200 }, { "epoch": 0.9066860531504305, "grad_norm": 0.12661513686180115, "learning_rate": 0.0005, "loss": 2.091, "step": 238210 }, { "epoch": 0.9067241156185532, "grad_norm": 0.1334647685289383, "learning_rate": 0.0005, "loss": 2.104, "step": 238220 }, { "epoch": 0.9067621780866758, "grad_norm": 0.12028210610151291, "learning_rate": 0.0005, "loss": 2.0865, "step": 238230 }, { "epoch": 0.9068002405547986, "grad_norm": 0.12762492895126343, "learning_rate": 0.0005, "loss": 2.1245, "step": 238240 }, { "epoch": 0.9068383030229212, "grad_norm": 0.13555355370044708, "learning_rate": 0.0005, "loss": 2.1083, "step": 238250 }, { "epoch": 0.9068763654910439, "grad_norm": 0.12440907210111618, "learning_rate": 0.0005, "loss": 2.1028, "step": 238260 }, { "epoch": 0.9069144279591665, "grad_norm": 0.13013915717601776, "learning_rate": 0.0005, "loss": 2.1034, "step": 238270 }, { "epoch": 0.9069524904272893, "grad_norm": 0.1273365169763565, "learning_rate": 0.0005, "loss": 2.0811, "step": 238280 }, { "epoch": 0.906990552895412, "grad_norm": 0.13053053617477417, "learning_rate": 0.0005, "loss": 2.1054, "step": 238290 }, { "epoch": 0.9070286153635346, "grad_norm": 0.12159951776266098, "learning_rate": 0.0005, "loss": 2.0798, "step": 238300 }, { "epoch": 0.9070666778316573, "grad_norm": 0.13156504929065704, "learning_rate": 0.0005, "loss": 2.0915, "step": 238310 }, { "epoch": 0.9071047402997799, "grad_norm": 0.12892203032970428, "learning_rate": 0.0005, "loss": 2.117, "step": 238320 }, { "epoch": 0.9071428027679027, "grad_norm": 0.1343413144350052, "learning_rate": 0.0005, "loss": 2.1057, "step": 238330 }, { "epoch": 0.9071808652360254, "grad_norm": 0.12478955090045929, "learning_rate": 0.0005, "loss": 2.1084, "step": 238340 }, { "epoch": 0.907218927704148, "grad_norm": 0.12486769258975983, "learning_rate": 0.0005, "loss": 2.1102, "step": 238350 }, { "epoch": 0.9072569901722707, "grad_norm": 0.12930084764957428, "learning_rate": 0.0005, "loss": 2.1181, "step": 238360 }, { "epoch": 0.9072950526403935, "grad_norm": 0.13934142887592316, "learning_rate": 0.0005, "loss": 2.1037, "step": 238370 }, { "epoch": 0.9073331151085161, "grad_norm": 0.1408960372209549, "learning_rate": 0.0005, "loss": 2.1128, "step": 238380 }, { "epoch": 0.9073711775766388, "grad_norm": 0.1512349545955658, "learning_rate": 0.0005, "loss": 2.1005, "step": 238390 }, { "epoch": 0.9074092400447614, "grad_norm": 0.12842883169651031, "learning_rate": 0.0005, "loss": 2.101, "step": 238400 }, { "epoch": 0.9074473025128842, "grad_norm": 0.1381392478942871, "learning_rate": 0.0005, "loss": 2.1021, "step": 238410 }, { "epoch": 0.9074853649810068, "grad_norm": 0.12094287574291229, "learning_rate": 0.0005, "loss": 2.096, "step": 238420 }, { "epoch": 0.9075234274491295, "grad_norm": 0.1263532191514969, "learning_rate": 0.0005, "loss": 2.1116, "step": 238430 }, { "epoch": 0.9075614899172522, "grad_norm": 0.14776651561260223, "learning_rate": 0.0005, "loss": 2.0833, "step": 238440 }, { "epoch": 0.9075995523853749, "grad_norm": 0.12728571891784668, "learning_rate": 0.0005, "loss": 2.1074, "step": 238450 }, { "epoch": 0.9076376148534976, "grad_norm": 0.13688482344150543, "learning_rate": 0.0005, "loss": 2.1038, "step": 238460 }, { "epoch": 0.9076756773216202, "grad_norm": 0.15688008069992065, "learning_rate": 0.0005, "loss": 2.1043, "step": 238470 }, { "epoch": 0.9077137397897429, "grad_norm": 0.12313451617956161, "learning_rate": 0.0005, "loss": 2.1051, "step": 238480 }, { "epoch": 0.9077518022578656, "grad_norm": 0.12344785779714584, "learning_rate": 0.0005, "loss": 2.1236, "step": 238490 }, { "epoch": 0.9077898647259883, "grad_norm": 0.13766978681087494, "learning_rate": 0.0005, "loss": 2.0984, "step": 238500 }, { "epoch": 0.907827927194111, "grad_norm": 0.14033202826976776, "learning_rate": 0.0005, "loss": 2.1033, "step": 238510 }, { "epoch": 0.9078659896622336, "grad_norm": 0.12385424226522446, "learning_rate": 0.0005, "loss": 2.1111, "step": 238520 }, { "epoch": 0.9079040521303563, "grad_norm": 0.12292386591434479, "learning_rate": 0.0005, "loss": 2.1084, "step": 238530 }, { "epoch": 0.9079421145984791, "grad_norm": 0.11781243979930878, "learning_rate": 0.0005, "loss": 2.1201, "step": 238540 }, { "epoch": 0.9079801770666017, "grad_norm": 0.12393909692764282, "learning_rate": 0.0005, "loss": 2.1071, "step": 238550 }, { "epoch": 0.9080182395347244, "grad_norm": 0.14330510795116425, "learning_rate": 0.0005, "loss": 2.1065, "step": 238560 }, { "epoch": 0.908056302002847, "grad_norm": 0.12844721972942352, "learning_rate": 0.0005, "loss": 2.1053, "step": 238570 }, { "epoch": 0.9080943644709698, "grad_norm": 0.13164971768856049, "learning_rate": 0.0005, "loss": 2.0821, "step": 238580 }, { "epoch": 0.9081324269390925, "grad_norm": 0.11971738189458847, "learning_rate": 0.0005, "loss": 2.0965, "step": 238590 }, { "epoch": 0.9081704894072151, "grad_norm": 0.12422315776348114, "learning_rate": 0.0005, "loss": 2.1041, "step": 238600 }, { "epoch": 0.9082085518753378, "grad_norm": 0.1429583877325058, "learning_rate": 0.0005, "loss": 2.1072, "step": 238610 }, { "epoch": 0.9082466143434604, "grad_norm": 0.12299343943595886, "learning_rate": 0.0005, "loss": 2.107, "step": 238620 }, { "epoch": 0.9082846768115832, "grad_norm": 0.13192951679229736, "learning_rate": 0.0005, "loss": 2.1036, "step": 238630 }, { "epoch": 0.9083227392797059, "grad_norm": 0.12766271829605103, "learning_rate": 0.0005, "loss": 2.0959, "step": 238640 }, { "epoch": 0.9083608017478285, "grad_norm": 0.13206838071346283, "learning_rate": 0.0005, "loss": 2.1073, "step": 238650 }, { "epoch": 0.9083988642159512, "grad_norm": 0.13054607808589935, "learning_rate": 0.0005, "loss": 2.0899, "step": 238660 }, { "epoch": 0.9084369266840739, "grad_norm": 0.1256096065044403, "learning_rate": 0.0005, "loss": 2.1171, "step": 238670 }, { "epoch": 0.9084749891521966, "grad_norm": 0.13709591329097748, "learning_rate": 0.0005, "loss": 2.1122, "step": 238680 }, { "epoch": 0.9085130516203193, "grad_norm": 0.1344529390335083, "learning_rate": 0.0005, "loss": 2.0868, "step": 238690 }, { "epoch": 0.9085511140884419, "grad_norm": 0.11625105142593384, "learning_rate": 0.0005, "loss": 2.1071, "step": 238700 }, { "epoch": 0.9085891765565647, "grad_norm": 0.13110487163066864, "learning_rate": 0.0005, "loss": 2.1136, "step": 238710 }, { "epoch": 0.9086272390246873, "grad_norm": 0.11854494363069534, "learning_rate": 0.0005, "loss": 2.1034, "step": 238720 }, { "epoch": 0.90866530149281, "grad_norm": 0.1250758171081543, "learning_rate": 0.0005, "loss": 2.0843, "step": 238730 }, { "epoch": 0.9087033639609327, "grad_norm": 0.11900221556425095, "learning_rate": 0.0005, "loss": 2.1087, "step": 238740 }, { "epoch": 0.9087414264290554, "grad_norm": 0.12195499986410141, "learning_rate": 0.0005, "loss": 2.0932, "step": 238750 }, { "epoch": 0.9087794888971781, "grad_norm": 0.14227333664894104, "learning_rate": 0.0005, "loss": 2.1044, "step": 238760 }, { "epoch": 0.9088175513653007, "grad_norm": 0.12002543359994888, "learning_rate": 0.0005, "loss": 2.1047, "step": 238770 }, { "epoch": 0.9088556138334234, "grad_norm": 0.11458992213010788, "learning_rate": 0.0005, "loss": 2.0972, "step": 238780 }, { "epoch": 0.908893676301546, "grad_norm": 0.12165806442499161, "learning_rate": 0.0005, "loss": 2.0972, "step": 238790 }, { "epoch": 0.9089317387696688, "grad_norm": 0.1254490166902542, "learning_rate": 0.0005, "loss": 2.0819, "step": 238800 }, { "epoch": 0.9089698012377915, "grad_norm": 0.12584739923477173, "learning_rate": 0.0005, "loss": 2.0887, "step": 238810 }, { "epoch": 0.9090078637059141, "grad_norm": 0.14241082966327667, "learning_rate": 0.0005, "loss": 2.113, "step": 238820 }, { "epoch": 0.9090459261740368, "grad_norm": 0.13195203244686127, "learning_rate": 0.0005, "loss": 2.1049, "step": 238830 }, { "epoch": 0.9090839886421596, "grad_norm": 0.12545497715473175, "learning_rate": 0.0005, "loss": 2.1243, "step": 238840 }, { "epoch": 0.9091220511102822, "grad_norm": 0.12716884911060333, "learning_rate": 0.0005, "loss": 2.0926, "step": 238850 }, { "epoch": 0.9091601135784049, "grad_norm": 0.1222204640507698, "learning_rate": 0.0005, "loss": 2.0983, "step": 238860 }, { "epoch": 0.9091981760465275, "grad_norm": 0.12676088511943817, "learning_rate": 0.0005, "loss": 2.0906, "step": 238870 }, { "epoch": 0.9092362385146503, "grad_norm": 0.14713385701179504, "learning_rate": 0.0005, "loss": 2.1142, "step": 238880 }, { "epoch": 0.909274300982773, "grad_norm": 0.1335698366165161, "learning_rate": 0.0005, "loss": 2.1106, "step": 238890 }, { "epoch": 0.9093123634508956, "grad_norm": 0.13682667911052704, "learning_rate": 0.0005, "loss": 2.1036, "step": 238900 }, { "epoch": 0.9093504259190183, "grad_norm": 0.1311189830303192, "learning_rate": 0.0005, "loss": 2.1067, "step": 238910 }, { "epoch": 0.9093884883871409, "grad_norm": 0.13310740888118744, "learning_rate": 0.0005, "loss": 2.0996, "step": 238920 }, { "epoch": 0.9094265508552637, "grad_norm": 0.1309422254562378, "learning_rate": 0.0005, "loss": 2.1146, "step": 238930 }, { "epoch": 0.9094646133233864, "grad_norm": 0.12251695990562439, "learning_rate": 0.0005, "loss": 2.1065, "step": 238940 }, { "epoch": 0.909502675791509, "grad_norm": 0.13915124535560608, "learning_rate": 0.0005, "loss": 2.1109, "step": 238950 }, { "epoch": 0.9095407382596317, "grad_norm": 0.1315782219171524, "learning_rate": 0.0005, "loss": 2.0938, "step": 238960 }, { "epoch": 0.9095788007277544, "grad_norm": 0.14065399765968323, "learning_rate": 0.0005, "loss": 2.0993, "step": 238970 }, { "epoch": 0.9096168631958771, "grad_norm": 0.14490070939064026, "learning_rate": 0.0005, "loss": 2.1123, "step": 238980 }, { "epoch": 0.9096549256639997, "grad_norm": 0.132430762052536, "learning_rate": 0.0005, "loss": 2.0868, "step": 238990 }, { "epoch": 0.9096929881321224, "grad_norm": 0.1288212686777115, "learning_rate": 0.0005, "loss": 2.106, "step": 239000 }, { "epoch": 0.9097310506002452, "grad_norm": 0.1340613067150116, "learning_rate": 0.0005, "loss": 2.111, "step": 239010 }, { "epoch": 0.9097691130683678, "grad_norm": 0.13787075877189636, "learning_rate": 0.0005, "loss": 2.1096, "step": 239020 }, { "epoch": 0.9098071755364905, "grad_norm": 0.13590595126152039, "learning_rate": 0.0005, "loss": 2.0955, "step": 239030 }, { "epoch": 0.9098452380046131, "grad_norm": 0.12815529108047485, "learning_rate": 0.0005, "loss": 2.1125, "step": 239040 }, { "epoch": 0.9098833004727358, "grad_norm": 0.12662175297737122, "learning_rate": 0.0005, "loss": 2.116, "step": 239050 }, { "epoch": 0.9099213629408586, "grad_norm": 0.13380499184131622, "learning_rate": 0.0005, "loss": 2.115, "step": 239060 }, { "epoch": 0.9099594254089812, "grad_norm": 0.11831115186214447, "learning_rate": 0.0005, "loss": 2.1013, "step": 239070 }, { "epoch": 0.9099974878771039, "grad_norm": 0.12954314053058624, "learning_rate": 0.0005, "loss": 2.113, "step": 239080 }, { "epoch": 0.9100355503452265, "grad_norm": 0.12330293655395508, "learning_rate": 0.0005, "loss": 2.1085, "step": 239090 }, { "epoch": 0.9100736128133493, "grad_norm": 0.12839968502521515, "learning_rate": 0.0005, "loss": 2.1088, "step": 239100 }, { "epoch": 0.910111675281472, "grad_norm": 0.13956953585147858, "learning_rate": 0.0005, "loss": 2.1129, "step": 239110 }, { "epoch": 0.9101497377495946, "grad_norm": 0.1369396448135376, "learning_rate": 0.0005, "loss": 2.079, "step": 239120 }, { "epoch": 0.9101878002177173, "grad_norm": 0.1352585405111313, "learning_rate": 0.0005, "loss": 2.1114, "step": 239130 }, { "epoch": 0.91022586268584, "grad_norm": 0.13882745802402496, "learning_rate": 0.0005, "loss": 2.095, "step": 239140 }, { "epoch": 0.9102639251539627, "grad_norm": 0.12504839897155762, "learning_rate": 0.0005, "loss": 2.095, "step": 239150 }, { "epoch": 0.9103019876220854, "grad_norm": 0.1293504387140274, "learning_rate": 0.0005, "loss": 2.0844, "step": 239160 }, { "epoch": 0.910340050090208, "grad_norm": 0.11434295773506165, "learning_rate": 0.0005, "loss": 2.1052, "step": 239170 }, { "epoch": 0.9103781125583308, "grad_norm": 0.12427566945552826, "learning_rate": 0.0005, "loss": 2.0976, "step": 239180 }, { "epoch": 0.9104161750264534, "grad_norm": 0.12932242453098297, "learning_rate": 0.0005, "loss": 2.1074, "step": 239190 }, { "epoch": 0.9104542374945761, "grad_norm": 0.13227252662181854, "learning_rate": 0.0005, "loss": 2.1123, "step": 239200 }, { "epoch": 0.9104922999626988, "grad_norm": 0.13035479187965393, "learning_rate": 0.0005, "loss": 2.1053, "step": 239210 }, { "epoch": 0.9105303624308214, "grad_norm": 0.12500816583633423, "learning_rate": 0.0005, "loss": 2.1072, "step": 239220 }, { "epoch": 0.9105684248989442, "grad_norm": 0.1311212033033371, "learning_rate": 0.0005, "loss": 2.1046, "step": 239230 }, { "epoch": 0.9106064873670668, "grad_norm": 0.14410890638828278, "learning_rate": 0.0005, "loss": 2.1137, "step": 239240 }, { "epoch": 0.9106445498351895, "grad_norm": 0.1327977031469345, "learning_rate": 0.0005, "loss": 2.101, "step": 239250 }, { "epoch": 0.9106826123033122, "grad_norm": 0.12608478963375092, "learning_rate": 0.0005, "loss": 2.099, "step": 239260 }, { "epoch": 0.9107206747714349, "grad_norm": 0.1240050345659256, "learning_rate": 0.0005, "loss": 2.1085, "step": 239270 }, { "epoch": 0.9107587372395576, "grad_norm": 0.1272159367799759, "learning_rate": 0.0005, "loss": 2.1003, "step": 239280 }, { "epoch": 0.9107967997076802, "grad_norm": 0.12495238333940506, "learning_rate": 0.0005, "loss": 2.0952, "step": 239290 }, { "epoch": 0.9108348621758029, "grad_norm": 0.13057975471019745, "learning_rate": 0.0005, "loss": 2.1039, "step": 239300 }, { "epoch": 0.9108729246439257, "grad_norm": 0.14395946264266968, "learning_rate": 0.0005, "loss": 2.1039, "step": 239310 }, { "epoch": 0.9109109871120483, "grad_norm": 0.1312178671360016, "learning_rate": 0.0005, "loss": 2.107, "step": 239320 }, { "epoch": 0.910949049580171, "grad_norm": 0.13741567730903625, "learning_rate": 0.0005, "loss": 2.0995, "step": 239330 }, { "epoch": 0.9109871120482936, "grad_norm": 0.13207745552062988, "learning_rate": 0.0005, "loss": 2.0937, "step": 239340 }, { "epoch": 0.9110251745164163, "grad_norm": 0.13339263200759888, "learning_rate": 0.0005, "loss": 2.1145, "step": 239350 }, { "epoch": 0.9110632369845391, "grad_norm": 0.1387874186038971, "learning_rate": 0.0005, "loss": 2.0911, "step": 239360 }, { "epoch": 0.9111012994526617, "grad_norm": 0.13634562492370605, "learning_rate": 0.0005, "loss": 2.1074, "step": 239370 }, { "epoch": 0.9111393619207844, "grad_norm": 0.13025477528572083, "learning_rate": 0.0005, "loss": 2.1155, "step": 239380 }, { "epoch": 0.911177424388907, "grad_norm": 0.12511643767356873, "learning_rate": 0.0005, "loss": 2.1145, "step": 239390 }, { "epoch": 0.9112154868570298, "grad_norm": 0.13627535104751587, "learning_rate": 0.0005, "loss": 2.1107, "step": 239400 }, { "epoch": 0.9112535493251525, "grad_norm": 0.1424589306116104, "learning_rate": 0.0005, "loss": 2.1108, "step": 239410 }, { "epoch": 0.9112916117932751, "grad_norm": 0.12316667288541794, "learning_rate": 0.0005, "loss": 2.1111, "step": 239420 }, { "epoch": 0.9113296742613978, "grad_norm": 0.12137805670499802, "learning_rate": 0.0005, "loss": 2.096, "step": 239430 }, { "epoch": 0.9113677367295205, "grad_norm": 0.12696003913879395, "learning_rate": 0.0005, "loss": 2.0904, "step": 239440 }, { "epoch": 0.9114057991976432, "grad_norm": 0.11263412237167358, "learning_rate": 0.0005, "loss": 2.09, "step": 239450 }, { "epoch": 0.9114438616657659, "grad_norm": 0.12840403616428375, "learning_rate": 0.0005, "loss": 2.1049, "step": 239460 }, { "epoch": 0.9114819241338885, "grad_norm": 0.13457255065441132, "learning_rate": 0.0005, "loss": 2.0978, "step": 239470 }, { "epoch": 0.9115199866020112, "grad_norm": 0.12429932504892349, "learning_rate": 0.0005, "loss": 2.099, "step": 239480 }, { "epoch": 0.9115580490701339, "grad_norm": 0.12521515786647797, "learning_rate": 0.0005, "loss": 2.1085, "step": 239490 }, { "epoch": 0.9115961115382566, "grad_norm": 0.12820985913276672, "learning_rate": 0.0005, "loss": 2.113, "step": 239500 }, { "epoch": 0.9116341740063792, "grad_norm": 0.12908533215522766, "learning_rate": 0.0005, "loss": 2.0993, "step": 239510 }, { "epoch": 0.9116722364745019, "grad_norm": 0.12380233407020569, "learning_rate": 0.0005, "loss": 2.0984, "step": 239520 }, { "epoch": 0.9117102989426247, "grad_norm": 0.14130914211273193, "learning_rate": 0.0005, "loss": 2.0975, "step": 239530 }, { "epoch": 0.9117483614107473, "grad_norm": 0.132679283618927, "learning_rate": 0.0005, "loss": 2.088, "step": 239540 }, { "epoch": 0.91178642387887, "grad_norm": 0.13694171607494354, "learning_rate": 0.0005, "loss": 2.0992, "step": 239550 }, { "epoch": 0.9118244863469926, "grad_norm": 0.12591597437858582, "learning_rate": 0.0005, "loss": 2.0814, "step": 239560 }, { "epoch": 0.9118625488151154, "grad_norm": 0.11692557483911514, "learning_rate": 0.0005, "loss": 2.0929, "step": 239570 }, { "epoch": 0.9119006112832381, "grad_norm": 0.13992343842983246, "learning_rate": 0.0005, "loss": 2.1053, "step": 239580 }, { "epoch": 0.9119386737513607, "grad_norm": 0.1163182258605957, "learning_rate": 0.0005, "loss": 2.1039, "step": 239590 }, { "epoch": 0.9119767362194834, "grad_norm": 0.11891079694032669, "learning_rate": 0.0005, "loss": 2.096, "step": 239600 }, { "epoch": 0.9120147986876062, "grad_norm": 0.12675254046916962, "learning_rate": 0.0005, "loss": 2.1206, "step": 239610 }, { "epoch": 0.9120528611557288, "grad_norm": 0.12357204407453537, "learning_rate": 0.0005, "loss": 2.1081, "step": 239620 }, { "epoch": 0.9120909236238515, "grad_norm": 0.11951776593923569, "learning_rate": 0.0005, "loss": 2.102, "step": 239630 }, { "epoch": 0.9121289860919741, "grad_norm": 0.12621699273586273, "learning_rate": 0.0005, "loss": 2.1108, "step": 239640 }, { "epoch": 0.9121670485600968, "grad_norm": 0.12371443957090378, "learning_rate": 0.0005, "loss": 2.1006, "step": 239650 }, { "epoch": 0.9122051110282196, "grad_norm": 0.13789597153663635, "learning_rate": 0.0005, "loss": 2.0982, "step": 239660 }, { "epoch": 0.9122431734963422, "grad_norm": 0.12520888447761536, "learning_rate": 0.0005, "loss": 2.1109, "step": 239670 }, { "epoch": 0.9122812359644649, "grad_norm": 0.1457749903202057, "learning_rate": 0.0005, "loss": 2.1168, "step": 239680 }, { "epoch": 0.9123192984325875, "grad_norm": 0.13121797144412994, "learning_rate": 0.0005, "loss": 2.0919, "step": 239690 }, { "epoch": 0.9123573609007103, "grad_norm": 0.14107409119606018, "learning_rate": 0.0005, "loss": 2.0929, "step": 239700 }, { "epoch": 0.912395423368833, "grad_norm": 0.12807787954807281, "learning_rate": 0.0005, "loss": 2.0948, "step": 239710 }, { "epoch": 0.9124334858369556, "grad_norm": 0.13790073990821838, "learning_rate": 0.0005, "loss": 2.1105, "step": 239720 }, { "epoch": 0.9124715483050783, "grad_norm": 0.13001273572444916, "learning_rate": 0.0005, "loss": 2.1053, "step": 239730 }, { "epoch": 0.912509610773201, "grad_norm": 0.12587277591228485, "learning_rate": 0.0005, "loss": 2.0977, "step": 239740 }, { "epoch": 0.9125476732413237, "grad_norm": 0.14173942804336548, "learning_rate": 0.0005, "loss": 2.1022, "step": 239750 }, { "epoch": 0.9125857357094463, "grad_norm": 0.12748290598392487, "learning_rate": 0.0005, "loss": 2.0953, "step": 239760 }, { "epoch": 0.912623798177569, "grad_norm": 0.12559473514556885, "learning_rate": 0.0005, "loss": 2.1039, "step": 239770 }, { "epoch": 0.9126618606456917, "grad_norm": 0.1296173632144928, "learning_rate": 0.0005, "loss": 2.1206, "step": 239780 }, { "epoch": 0.9126999231138144, "grad_norm": 0.12084129452705383, "learning_rate": 0.0005, "loss": 2.107, "step": 239790 }, { "epoch": 0.9127379855819371, "grad_norm": 0.1363648772239685, "learning_rate": 0.0005, "loss": 2.1076, "step": 239800 }, { "epoch": 0.9127760480500597, "grad_norm": 0.13141706585884094, "learning_rate": 0.0005, "loss": 2.1032, "step": 239810 }, { "epoch": 0.9128141105181824, "grad_norm": 0.1180909052491188, "learning_rate": 0.0005, "loss": 2.0905, "step": 239820 }, { "epoch": 0.9128521729863052, "grad_norm": 0.1213419958949089, "learning_rate": 0.0005, "loss": 2.1136, "step": 239830 }, { "epoch": 0.9128902354544278, "grad_norm": 0.13057322800159454, "learning_rate": 0.0005, "loss": 2.1055, "step": 239840 }, { "epoch": 0.9129282979225505, "grad_norm": 0.14257821440696716, "learning_rate": 0.0005, "loss": 2.102, "step": 239850 }, { "epoch": 0.9129663603906731, "grad_norm": 0.1380070596933365, "learning_rate": 0.0005, "loss": 2.114, "step": 239860 }, { "epoch": 0.9130044228587959, "grad_norm": 0.1317712664604187, "learning_rate": 0.0005, "loss": 2.1132, "step": 239870 }, { "epoch": 0.9130424853269186, "grad_norm": 0.14122363924980164, "learning_rate": 0.0005, "loss": 2.1024, "step": 239880 }, { "epoch": 0.9130805477950412, "grad_norm": 0.11707179248332977, "learning_rate": 0.0005, "loss": 2.0888, "step": 239890 }, { "epoch": 0.9131186102631639, "grad_norm": 0.13957612216472626, "learning_rate": 0.0005, "loss": 2.118, "step": 239900 }, { "epoch": 0.9131566727312865, "grad_norm": 0.13124236464500427, "learning_rate": 0.0005, "loss": 2.0931, "step": 239910 }, { "epoch": 0.9131947351994093, "grad_norm": 0.12704578042030334, "learning_rate": 0.0005, "loss": 2.1131, "step": 239920 }, { "epoch": 0.913232797667532, "grad_norm": 0.1228540688753128, "learning_rate": 0.0005, "loss": 2.107, "step": 239930 }, { "epoch": 0.9132708601356546, "grad_norm": 0.11537936329841614, "learning_rate": 0.0005, "loss": 2.1111, "step": 239940 }, { "epoch": 0.9133089226037773, "grad_norm": 0.12744903564453125, "learning_rate": 0.0005, "loss": 2.1048, "step": 239950 }, { "epoch": 0.9133469850719, "grad_norm": 0.1313735693693161, "learning_rate": 0.0005, "loss": 2.1068, "step": 239960 }, { "epoch": 0.9133850475400227, "grad_norm": 0.14830102026462555, "learning_rate": 0.0005, "loss": 2.104, "step": 239970 }, { "epoch": 0.9134231100081454, "grad_norm": 0.1338607221841812, "learning_rate": 0.0005, "loss": 2.0951, "step": 239980 }, { "epoch": 0.913461172476268, "grad_norm": 0.1361265778541565, "learning_rate": 0.0005, "loss": 2.0934, "step": 239990 }, { "epoch": 0.9134992349443908, "grad_norm": 0.11743402481079102, "learning_rate": 0.0005, "loss": 2.0992, "step": 240000 }, { "epoch": 0.9135372974125134, "grad_norm": 0.12837345898151398, "learning_rate": 0.0005, "loss": 2.0893, "step": 240010 }, { "epoch": 0.9135753598806361, "grad_norm": 0.13669757544994354, "learning_rate": 0.0005, "loss": 2.1127, "step": 240020 }, { "epoch": 0.9136134223487588, "grad_norm": 0.155908465385437, "learning_rate": 0.0005, "loss": 2.1137, "step": 240030 }, { "epoch": 0.9136514848168815, "grad_norm": 0.11326774209737778, "learning_rate": 0.0005, "loss": 2.1018, "step": 240040 }, { "epoch": 0.9136895472850042, "grad_norm": 0.12513118982315063, "learning_rate": 0.0005, "loss": 2.0939, "step": 240050 }, { "epoch": 0.9137276097531268, "grad_norm": 0.11165674775838852, "learning_rate": 0.0005, "loss": 2.1065, "step": 240060 }, { "epoch": 0.9137656722212495, "grad_norm": 0.1229131743311882, "learning_rate": 0.0005, "loss": 2.1056, "step": 240070 }, { "epoch": 0.9138037346893721, "grad_norm": 0.12102916091680527, "learning_rate": 0.0005, "loss": 2.1023, "step": 240080 }, { "epoch": 0.9138417971574949, "grad_norm": 0.12489450722932816, "learning_rate": 0.0005, "loss": 2.1102, "step": 240090 }, { "epoch": 0.9138798596256176, "grad_norm": 0.13946636021137238, "learning_rate": 0.0005, "loss": 2.1028, "step": 240100 }, { "epoch": 0.9139179220937402, "grad_norm": 0.13643383979797363, "learning_rate": 0.0005, "loss": 2.0916, "step": 240110 }, { "epoch": 0.9139559845618629, "grad_norm": 0.12476367503404617, "learning_rate": 0.0005, "loss": 2.0862, "step": 240120 }, { "epoch": 0.9139940470299857, "grad_norm": 0.13304784893989563, "learning_rate": 0.0005, "loss": 2.0984, "step": 240130 }, { "epoch": 0.9140321094981083, "grad_norm": 0.11753713339567184, "learning_rate": 0.0005, "loss": 2.1004, "step": 240140 }, { "epoch": 0.914070171966231, "grad_norm": 0.13841630518436432, "learning_rate": 0.0005, "loss": 2.0843, "step": 240150 }, { "epoch": 0.9141082344343536, "grad_norm": 0.1333719938993454, "learning_rate": 0.0005, "loss": 2.0964, "step": 240160 }, { "epoch": 0.9141462969024764, "grad_norm": 0.13542665541172028, "learning_rate": 0.0005, "loss": 2.1123, "step": 240170 }, { "epoch": 0.914184359370599, "grad_norm": 0.135142520070076, "learning_rate": 0.0005, "loss": 2.0995, "step": 240180 }, { "epoch": 0.9142224218387217, "grad_norm": 0.1313934624195099, "learning_rate": 0.0005, "loss": 2.1216, "step": 240190 }, { "epoch": 0.9142604843068444, "grad_norm": 0.12458037585020065, "learning_rate": 0.0005, "loss": 2.1029, "step": 240200 }, { "epoch": 0.914298546774967, "grad_norm": 0.13040408492088318, "learning_rate": 0.0005, "loss": 2.1171, "step": 240210 }, { "epoch": 0.9143366092430898, "grad_norm": 0.124188631772995, "learning_rate": 0.0005, "loss": 2.1002, "step": 240220 }, { "epoch": 0.9143746717112125, "grad_norm": 0.14607562124729156, "learning_rate": 0.0005, "loss": 2.1065, "step": 240230 }, { "epoch": 0.9144127341793351, "grad_norm": 0.13257095217704773, "learning_rate": 0.0005, "loss": 2.1069, "step": 240240 }, { "epoch": 0.9144507966474578, "grad_norm": 0.12451108545064926, "learning_rate": 0.0005, "loss": 2.104, "step": 240250 }, { "epoch": 0.9144888591155805, "grad_norm": 0.12608541548252106, "learning_rate": 0.0005, "loss": 2.1104, "step": 240260 }, { "epoch": 0.9145269215837032, "grad_norm": 0.13142694532871246, "learning_rate": 0.0005, "loss": 2.1071, "step": 240270 }, { "epoch": 0.9145649840518258, "grad_norm": 0.13280390202999115, "learning_rate": 0.0005, "loss": 2.1042, "step": 240280 }, { "epoch": 0.9146030465199485, "grad_norm": 0.13582828640937805, "learning_rate": 0.0005, "loss": 2.1084, "step": 240290 }, { "epoch": 0.9146411089880713, "grad_norm": 0.1362244039773941, "learning_rate": 0.0005, "loss": 2.1041, "step": 240300 }, { "epoch": 0.9146791714561939, "grad_norm": 0.14132159948349, "learning_rate": 0.0005, "loss": 2.086, "step": 240310 }, { "epoch": 0.9147172339243166, "grad_norm": 0.124893419444561, "learning_rate": 0.0005, "loss": 2.1098, "step": 240320 }, { "epoch": 0.9147552963924392, "grad_norm": 0.1291424036026001, "learning_rate": 0.0005, "loss": 2.1093, "step": 240330 }, { "epoch": 0.9147933588605619, "grad_norm": 0.11848355829715729, "learning_rate": 0.0005, "loss": 2.1123, "step": 240340 }, { "epoch": 0.9148314213286847, "grad_norm": 0.12111199647188187, "learning_rate": 0.0005, "loss": 2.1064, "step": 240350 }, { "epoch": 0.9148694837968073, "grad_norm": 0.11707769334316254, "learning_rate": 0.0005, "loss": 2.1063, "step": 240360 }, { "epoch": 0.91490754626493, "grad_norm": 0.12086135894060135, "learning_rate": 0.0005, "loss": 2.0953, "step": 240370 }, { "epoch": 0.9149456087330526, "grad_norm": 0.13472595810890198, "learning_rate": 0.0005, "loss": 2.0966, "step": 240380 }, { "epoch": 0.9149836712011754, "grad_norm": 0.12526558339595795, "learning_rate": 0.0005, "loss": 2.082, "step": 240390 }, { "epoch": 0.9150217336692981, "grad_norm": 0.12476403266191483, "learning_rate": 0.0005, "loss": 2.1087, "step": 240400 }, { "epoch": 0.9150597961374207, "grad_norm": 0.13596831262111664, "learning_rate": 0.0005, "loss": 2.1066, "step": 240410 }, { "epoch": 0.9150978586055434, "grad_norm": 0.12879303097724915, "learning_rate": 0.0005, "loss": 2.1187, "step": 240420 }, { "epoch": 0.9151359210736661, "grad_norm": 0.13720719516277313, "learning_rate": 0.0005, "loss": 2.0952, "step": 240430 }, { "epoch": 0.9151739835417888, "grad_norm": 0.1436295062303543, "learning_rate": 0.0005, "loss": 2.1236, "step": 240440 }, { "epoch": 0.9152120460099115, "grad_norm": 0.12874655425548553, "learning_rate": 0.0005, "loss": 2.1082, "step": 240450 }, { "epoch": 0.9152501084780341, "grad_norm": 0.1262015402317047, "learning_rate": 0.0005, "loss": 2.0894, "step": 240460 }, { "epoch": 0.9152881709461569, "grad_norm": 0.13029156625270844, "learning_rate": 0.0005, "loss": 2.1104, "step": 240470 }, { "epoch": 0.9153262334142795, "grad_norm": 0.1307915300130844, "learning_rate": 0.0005, "loss": 2.0999, "step": 240480 }, { "epoch": 0.9153642958824022, "grad_norm": 0.13881021738052368, "learning_rate": 0.0005, "loss": 2.1005, "step": 240490 }, { "epoch": 0.9154023583505249, "grad_norm": 0.12643685936927795, "learning_rate": 0.0005, "loss": 2.0945, "step": 240500 }, { "epoch": 0.9154404208186475, "grad_norm": 0.11630550026893616, "learning_rate": 0.0005, "loss": 2.1111, "step": 240510 }, { "epoch": 0.9154784832867703, "grad_norm": 0.11585814505815506, "learning_rate": 0.0005, "loss": 2.1132, "step": 240520 }, { "epoch": 0.9155165457548929, "grad_norm": 0.12080513685941696, "learning_rate": 0.0005, "loss": 2.0908, "step": 240530 }, { "epoch": 0.9155546082230156, "grad_norm": 0.13465972244739532, "learning_rate": 0.0005, "loss": 2.1056, "step": 240540 }, { "epoch": 0.9155926706911383, "grad_norm": 0.11828291416168213, "learning_rate": 0.0005, "loss": 2.1155, "step": 240550 }, { "epoch": 0.915630733159261, "grad_norm": 0.13376320898532867, "learning_rate": 0.0005, "loss": 2.1014, "step": 240560 }, { "epoch": 0.9156687956273837, "grad_norm": 0.12552687525749207, "learning_rate": 0.0005, "loss": 2.1085, "step": 240570 }, { "epoch": 0.9157068580955063, "grad_norm": 0.13472148776054382, "learning_rate": 0.0005, "loss": 2.0974, "step": 240580 }, { "epoch": 0.915744920563629, "grad_norm": 0.12127243727445602, "learning_rate": 0.0005, "loss": 2.0959, "step": 240590 }, { "epoch": 0.9157829830317518, "grad_norm": 0.13831770420074463, "learning_rate": 0.0005, "loss": 2.089, "step": 240600 }, { "epoch": 0.9158210454998744, "grad_norm": 0.12685850262641907, "learning_rate": 0.0005, "loss": 2.1001, "step": 240610 }, { "epoch": 0.9158591079679971, "grad_norm": 0.14404189586639404, "learning_rate": 0.0005, "loss": 2.1089, "step": 240620 }, { "epoch": 0.9158971704361197, "grad_norm": 0.13158273696899414, "learning_rate": 0.0005, "loss": 2.1072, "step": 240630 }, { "epoch": 0.9159352329042424, "grad_norm": 0.13201908767223358, "learning_rate": 0.0005, "loss": 2.1159, "step": 240640 }, { "epoch": 0.9159732953723652, "grad_norm": 0.1278715282678604, "learning_rate": 0.0005, "loss": 2.107, "step": 240650 }, { "epoch": 0.9160113578404878, "grad_norm": 0.12094113230705261, "learning_rate": 0.0005, "loss": 2.089, "step": 240660 }, { "epoch": 0.9160494203086105, "grad_norm": 0.11523523926734924, "learning_rate": 0.0005, "loss": 2.1057, "step": 240670 }, { "epoch": 0.9160874827767331, "grad_norm": 0.12744709849357605, "learning_rate": 0.0005, "loss": 2.1014, "step": 240680 }, { "epoch": 0.9161255452448559, "grad_norm": 0.12252113968133926, "learning_rate": 0.0005, "loss": 2.091, "step": 240690 }, { "epoch": 0.9161636077129786, "grad_norm": 0.12514427304267883, "learning_rate": 0.0005, "loss": 2.0992, "step": 240700 }, { "epoch": 0.9162016701811012, "grad_norm": 0.12158069014549255, "learning_rate": 0.0005, "loss": 2.1081, "step": 240710 }, { "epoch": 0.9162397326492239, "grad_norm": 0.13026173412799835, "learning_rate": 0.0005, "loss": 2.1023, "step": 240720 }, { "epoch": 0.9162777951173466, "grad_norm": 0.11686385422945023, "learning_rate": 0.0005, "loss": 2.1163, "step": 240730 }, { "epoch": 0.9163158575854693, "grad_norm": 0.13855034112930298, "learning_rate": 0.0005, "loss": 2.1097, "step": 240740 }, { "epoch": 0.916353920053592, "grad_norm": 0.1281408816576004, "learning_rate": 0.0005, "loss": 2.0993, "step": 240750 }, { "epoch": 0.9163919825217146, "grad_norm": 0.13539083302021027, "learning_rate": 0.0005, "loss": 2.121, "step": 240760 }, { "epoch": 0.9164300449898373, "grad_norm": 0.11456126719713211, "learning_rate": 0.0005, "loss": 2.1029, "step": 240770 }, { "epoch": 0.91646810745796, "grad_norm": 0.12172668427228928, "learning_rate": 0.0005, "loss": 2.0896, "step": 240780 }, { "epoch": 0.9165061699260827, "grad_norm": 0.12095924466848373, "learning_rate": 0.0005, "loss": 2.1077, "step": 240790 }, { "epoch": 0.9165442323942053, "grad_norm": 0.12311307340860367, "learning_rate": 0.0005, "loss": 2.1183, "step": 240800 }, { "epoch": 0.916582294862328, "grad_norm": 0.14387722313404083, "learning_rate": 0.0005, "loss": 2.1175, "step": 240810 }, { "epoch": 0.9166203573304508, "grad_norm": 0.1334153264760971, "learning_rate": 0.0005, "loss": 2.1018, "step": 240820 }, { "epoch": 0.9166584197985734, "grad_norm": 0.12912805378437042, "learning_rate": 0.0005, "loss": 2.0925, "step": 240830 }, { "epoch": 0.9166964822666961, "grad_norm": 0.11982877552509308, "learning_rate": 0.0005, "loss": 2.1015, "step": 240840 }, { "epoch": 0.9167345447348187, "grad_norm": 0.12010152637958527, "learning_rate": 0.0005, "loss": 2.0923, "step": 240850 }, { "epoch": 0.9167726072029415, "grad_norm": 0.1530761867761612, "learning_rate": 0.0005, "loss": 2.1129, "step": 240860 }, { "epoch": 0.9168106696710642, "grad_norm": 0.13544851541519165, "learning_rate": 0.0005, "loss": 2.1058, "step": 240870 }, { "epoch": 0.9168487321391868, "grad_norm": 0.13245892524719238, "learning_rate": 0.0005, "loss": 2.105, "step": 240880 }, { "epoch": 0.9168867946073095, "grad_norm": 0.13171501457691193, "learning_rate": 0.0005, "loss": 2.0951, "step": 240890 }, { "epoch": 0.9169248570754323, "grad_norm": 0.12141535431146622, "learning_rate": 0.0005, "loss": 2.0924, "step": 240900 }, { "epoch": 0.9169629195435549, "grad_norm": 0.12837472558021545, "learning_rate": 0.0005, "loss": 2.1122, "step": 240910 }, { "epoch": 0.9170009820116776, "grad_norm": 0.12579694390296936, "learning_rate": 0.0005, "loss": 2.1121, "step": 240920 }, { "epoch": 0.9170390444798002, "grad_norm": 0.12822788953781128, "learning_rate": 0.0005, "loss": 2.1123, "step": 240930 }, { "epoch": 0.9170771069479229, "grad_norm": 0.12980876863002777, "learning_rate": 0.0005, "loss": 2.111, "step": 240940 }, { "epoch": 0.9171151694160457, "grad_norm": 0.11740733683109283, "learning_rate": 0.0005, "loss": 2.0993, "step": 240950 }, { "epoch": 0.9171532318841683, "grad_norm": 0.1145625188946724, "learning_rate": 0.0005, "loss": 2.0938, "step": 240960 }, { "epoch": 0.917191294352291, "grad_norm": 0.1304570436477661, "learning_rate": 0.0005, "loss": 2.1069, "step": 240970 }, { "epoch": 0.9172293568204136, "grad_norm": 0.12784039974212646, "learning_rate": 0.0005, "loss": 2.0988, "step": 240980 }, { "epoch": 0.9172674192885364, "grad_norm": 0.12327566742897034, "learning_rate": 0.0005, "loss": 2.1002, "step": 240990 }, { "epoch": 0.917305481756659, "grad_norm": 0.1315973401069641, "learning_rate": 0.0005, "loss": 2.1071, "step": 241000 }, { "epoch": 0.9173435442247817, "grad_norm": 0.15045495331287384, "learning_rate": 0.0005, "loss": 2.1076, "step": 241010 }, { "epoch": 0.9173816066929044, "grad_norm": 0.11927295476198196, "learning_rate": 0.0005, "loss": 2.0953, "step": 241020 }, { "epoch": 0.9174196691610271, "grad_norm": 0.1331934928894043, "learning_rate": 0.0005, "loss": 2.1129, "step": 241030 }, { "epoch": 0.9174577316291498, "grad_norm": 0.12129730731248856, "learning_rate": 0.0005, "loss": 2.0902, "step": 241040 }, { "epoch": 0.9174957940972724, "grad_norm": 0.13536980748176575, "learning_rate": 0.0005, "loss": 2.1193, "step": 241050 }, { "epoch": 0.9175338565653951, "grad_norm": 0.13700516521930695, "learning_rate": 0.0005, "loss": 2.1012, "step": 241060 }, { "epoch": 0.9175719190335178, "grad_norm": 0.13450057804584503, "learning_rate": 0.0005, "loss": 2.1258, "step": 241070 }, { "epoch": 0.9176099815016405, "grad_norm": 0.13630062341690063, "learning_rate": 0.0005, "loss": 2.1023, "step": 241080 }, { "epoch": 0.9176480439697632, "grad_norm": 0.1458582878112793, "learning_rate": 0.0005, "loss": 2.0986, "step": 241090 }, { "epoch": 0.9176861064378858, "grad_norm": 0.1451157182455063, "learning_rate": 0.0005, "loss": 2.1206, "step": 241100 }, { "epoch": 0.9177241689060085, "grad_norm": 0.11896733939647675, "learning_rate": 0.0005, "loss": 2.0929, "step": 241110 }, { "epoch": 0.9177622313741313, "grad_norm": 0.1251809000968933, "learning_rate": 0.0005, "loss": 2.101, "step": 241120 }, { "epoch": 0.9178002938422539, "grad_norm": 0.11638472974300385, "learning_rate": 0.0005, "loss": 2.0895, "step": 241130 }, { "epoch": 0.9178383563103766, "grad_norm": 0.1312682032585144, "learning_rate": 0.0005, "loss": 2.1131, "step": 241140 }, { "epoch": 0.9178764187784992, "grad_norm": 0.11362048238515854, "learning_rate": 0.0005, "loss": 2.1077, "step": 241150 }, { "epoch": 0.917914481246622, "grad_norm": 0.11174650490283966, "learning_rate": 0.0005, "loss": 2.1089, "step": 241160 }, { "epoch": 0.9179525437147447, "grad_norm": 0.1195438802242279, "learning_rate": 0.0005, "loss": 2.1127, "step": 241170 }, { "epoch": 0.9179906061828673, "grad_norm": 0.12362208217382431, "learning_rate": 0.0005, "loss": 2.1029, "step": 241180 }, { "epoch": 0.91802866865099, "grad_norm": 0.1291234940290451, "learning_rate": 0.0005, "loss": 2.1161, "step": 241190 }, { "epoch": 0.9180667311191126, "grad_norm": 0.1306326985359192, "learning_rate": 0.0005, "loss": 2.1035, "step": 241200 }, { "epoch": 0.9181047935872354, "grad_norm": 0.11614658683538437, "learning_rate": 0.0005, "loss": 2.0925, "step": 241210 }, { "epoch": 0.9181428560553581, "grad_norm": 0.13114012777805328, "learning_rate": 0.0005, "loss": 2.1127, "step": 241220 }, { "epoch": 0.9181809185234807, "grad_norm": 0.12753938138484955, "learning_rate": 0.0005, "loss": 2.1019, "step": 241230 }, { "epoch": 0.9182189809916034, "grad_norm": 0.1368921548128128, "learning_rate": 0.0005, "loss": 2.1047, "step": 241240 }, { "epoch": 0.9182570434597261, "grad_norm": 0.11379440873861313, "learning_rate": 0.0005, "loss": 2.087, "step": 241250 }, { "epoch": 0.9182951059278488, "grad_norm": 0.14151537418365479, "learning_rate": 0.0005, "loss": 2.1126, "step": 241260 }, { "epoch": 0.9183331683959715, "grad_norm": 0.1276009976863861, "learning_rate": 0.0005, "loss": 2.1047, "step": 241270 }, { "epoch": 0.9183712308640941, "grad_norm": 0.14686158299446106, "learning_rate": 0.0005, "loss": 2.0872, "step": 241280 }, { "epoch": 0.9184092933322169, "grad_norm": 0.12174024432897568, "learning_rate": 0.0005, "loss": 2.1089, "step": 241290 }, { "epoch": 0.9184473558003395, "grad_norm": 0.12808623909950256, "learning_rate": 0.0005, "loss": 2.0832, "step": 241300 }, { "epoch": 0.9184854182684622, "grad_norm": 0.13373562693595886, "learning_rate": 0.0005, "loss": 2.1182, "step": 241310 }, { "epoch": 0.9185234807365849, "grad_norm": 0.13873694837093353, "learning_rate": 0.0005, "loss": 2.0989, "step": 241320 }, { "epoch": 0.9185615432047076, "grad_norm": 0.1596411168575287, "learning_rate": 0.0005, "loss": 2.1006, "step": 241330 }, { "epoch": 0.9185996056728303, "grad_norm": 0.1393207609653473, "learning_rate": 0.0005, "loss": 2.1048, "step": 241340 }, { "epoch": 0.9186376681409529, "grad_norm": 0.12455132603645325, "learning_rate": 0.0005, "loss": 2.0927, "step": 241350 }, { "epoch": 0.9186757306090756, "grad_norm": 0.13242022693157196, "learning_rate": 0.0005, "loss": 2.1036, "step": 241360 }, { "epoch": 0.9187137930771982, "grad_norm": 0.13528971374034882, "learning_rate": 0.0005, "loss": 2.1154, "step": 241370 }, { "epoch": 0.918751855545321, "grad_norm": 0.12305892258882523, "learning_rate": 0.0005, "loss": 2.0876, "step": 241380 }, { "epoch": 0.9187899180134437, "grad_norm": 0.11626999080181122, "learning_rate": 0.0005, "loss": 2.0927, "step": 241390 }, { "epoch": 0.9188279804815663, "grad_norm": 0.11935063451528549, "learning_rate": 0.0005, "loss": 2.1049, "step": 241400 }, { "epoch": 0.918866042949689, "grad_norm": 0.13918636739253998, "learning_rate": 0.0005, "loss": 2.0972, "step": 241410 }, { "epoch": 0.9189041054178118, "grad_norm": 0.117560476064682, "learning_rate": 0.0005, "loss": 2.112, "step": 241420 }, { "epoch": 0.9189421678859344, "grad_norm": 0.12481194734573364, "learning_rate": 0.0005, "loss": 2.1261, "step": 241430 }, { "epoch": 0.9189802303540571, "grad_norm": 0.12880441546440125, "learning_rate": 0.0005, "loss": 2.0838, "step": 241440 }, { "epoch": 0.9190182928221797, "grad_norm": 0.14205487072467804, "learning_rate": 0.0005, "loss": 2.0902, "step": 241450 }, { "epoch": 0.9190563552903025, "grad_norm": 0.13198022544384003, "learning_rate": 0.0005, "loss": 2.0923, "step": 241460 }, { "epoch": 0.9190944177584252, "grad_norm": 0.12845546007156372, "learning_rate": 0.0005, "loss": 2.1003, "step": 241470 }, { "epoch": 0.9191324802265478, "grad_norm": 0.22721713781356812, "learning_rate": 0.0005, "loss": 2.0987, "step": 241480 }, { "epoch": 0.9191705426946705, "grad_norm": 0.12433764338493347, "learning_rate": 0.0005, "loss": 2.1006, "step": 241490 }, { "epoch": 0.9192086051627931, "grad_norm": 0.12495458871126175, "learning_rate": 0.0005, "loss": 2.0987, "step": 241500 }, { "epoch": 0.9192466676309159, "grad_norm": 0.13803954422473907, "learning_rate": 0.0005, "loss": 2.1071, "step": 241510 }, { "epoch": 0.9192847300990385, "grad_norm": 0.11726602911949158, "learning_rate": 0.0005, "loss": 2.1157, "step": 241520 }, { "epoch": 0.9193227925671612, "grad_norm": 0.12827108800411224, "learning_rate": 0.0005, "loss": 2.0917, "step": 241530 }, { "epoch": 0.9193608550352839, "grad_norm": 0.12403757125139236, "learning_rate": 0.0005, "loss": 2.112, "step": 241540 }, { "epoch": 0.9193989175034066, "grad_norm": 0.12619732320308685, "learning_rate": 0.0005, "loss": 2.0945, "step": 241550 }, { "epoch": 0.9194369799715293, "grad_norm": 0.13321739435195923, "learning_rate": 0.0005, "loss": 2.1143, "step": 241560 }, { "epoch": 0.919475042439652, "grad_norm": 0.1475500762462616, "learning_rate": 0.0005, "loss": 2.1187, "step": 241570 }, { "epoch": 0.9195131049077746, "grad_norm": 0.11906681209802628, "learning_rate": 0.0005, "loss": 2.1118, "step": 241580 }, { "epoch": 0.9195511673758974, "grad_norm": 0.1269826889038086, "learning_rate": 0.0005, "loss": 2.0899, "step": 241590 }, { "epoch": 0.91958922984402, "grad_norm": 0.12411966174840927, "learning_rate": 0.0005, "loss": 2.1053, "step": 241600 }, { "epoch": 0.9196272923121427, "grad_norm": 0.12925204634666443, "learning_rate": 0.0005, "loss": 2.1137, "step": 241610 }, { "epoch": 0.9196653547802653, "grad_norm": 0.12196630239486694, "learning_rate": 0.0005, "loss": 2.1188, "step": 241620 }, { "epoch": 0.919703417248388, "grad_norm": 0.13332179188728333, "learning_rate": 0.0005, "loss": 2.1026, "step": 241630 }, { "epoch": 0.9197414797165108, "grad_norm": 0.11703956127166748, "learning_rate": 0.0005, "loss": 2.1036, "step": 241640 }, { "epoch": 0.9197795421846334, "grad_norm": 0.12794820964336395, "learning_rate": 0.0005, "loss": 2.1046, "step": 241650 }, { "epoch": 0.9198176046527561, "grad_norm": 0.13128633797168732, "learning_rate": 0.0005, "loss": 2.1109, "step": 241660 }, { "epoch": 0.9198556671208787, "grad_norm": 0.14611156284809113, "learning_rate": 0.0005, "loss": 2.1177, "step": 241670 }, { "epoch": 0.9198937295890015, "grad_norm": 0.11457149684429169, "learning_rate": 0.0005, "loss": 2.1111, "step": 241680 }, { "epoch": 0.9199317920571242, "grad_norm": 0.12531642615795135, "learning_rate": 0.0005, "loss": 2.1071, "step": 241690 }, { "epoch": 0.9199698545252468, "grad_norm": 0.11403706669807434, "learning_rate": 0.0005, "loss": 2.1031, "step": 241700 }, { "epoch": 0.9200079169933695, "grad_norm": 0.13887332379817963, "learning_rate": 0.0005, "loss": 2.0902, "step": 241710 }, { "epoch": 0.9200459794614922, "grad_norm": 0.1301995813846588, "learning_rate": 0.0005, "loss": 2.1003, "step": 241720 }, { "epoch": 0.9200840419296149, "grad_norm": 0.12377279996871948, "learning_rate": 0.0005, "loss": 2.1103, "step": 241730 }, { "epoch": 0.9201221043977376, "grad_norm": 0.1265926957130432, "learning_rate": 0.0005, "loss": 2.1155, "step": 241740 }, { "epoch": 0.9201601668658602, "grad_norm": 0.12687602639198303, "learning_rate": 0.0005, "loss": 2.1093, "step": 241750 }, { "epoch": 0.920198229333983, "grad_norm": 0.12322045117616653, "learning_rate": 0.0005, "loss": 2.1102, "step": 241760 }, { "epoch": 0.9202362918021056, "grad_norm": 0.12699291110038757, "learning_rate": 0.0005, "loss": 2.1069, "step": 241770 }, { "epoch": 0.9202743542702283, "grad_norm": 0.13091620802879333, "learning_rate": 0.0005, "loss": 2.1147, "step": 241780 }, { "epoch": 0.920312416738351, "grad_norm": 0.14797089993953705, "learning_rate": 0.0005, "loss": 2.1033, "step": 241790 }, { "epoch": 0.9203504792064736, "grad_norm": 0.12071997672319412, "learning_rate": 0.0005, "loss": 2.1128, "step": 241800 }, { "epoch": 0.9203885416745964, "grad_norm": 0.1170908659696579, "learning_rate": 0.0005, "loss": 2.0987, "step": 241810 }, { "epoch": 0.920426604142719, "grad_norm": 0.12971200048923492, "learning_rate": 0.0005, "loss": 2.1098, "step": 241820 }, { "epoch": 0.9204646666108417, "grad_norm": 0.13067524135112762, "learning_rate": 0.0005, "loss": 2.0944, "step": 241830 }, { "epoch": 0.9205027290789644, "grad_norm": 0.11806374788284302, "learning_rate": 0.0005, "loss": 2.0939, "step": 241840 }, { "epoch": 0.9205407915470871, "grad_norm": 0.12400566041469574, "learning_rate": 0.0005, "loss": 2.1069, "step": 241850 }, { "epoch": 0.9205788540152098, "grad_norm": 0.11953874677419662, "learning_rate": 0.0005, "loss": 2.1134, "step": 241860 }, { "epoch": 0.9206169164833324, "grad_norm": 0.1293313354253769, "learning_rate": 0.0005, "loss": 2.1219, "step": 241870 }, { "epoch": 0.9206549789514551, "grad_norm": 0.12719066441059113, "learning_rate": 0.0005, "loss": 2.0887, "step": 241880 }, { "epoch": 0.9206930414195779, "grad_norm": 0.13698673248291016, "learning_rate": 0.0005, "loss": 2.0927, "step": 241890 }, { "epoch": 0.9207311038877005, "grad_norm": 0.12375357747077942, "learning_rate": 0.0005, "loss": 2.1005, "step": 241900 }, { "epoch": 0.9207691663558232, "grad_norm": 0.12502874433994293, "learning_rate": 0.0005, "loss": 2.0936, "step": 241910 }, { "epoch": 0.9208072288239458, "grad_norm": 0.1243850439786911, "learning_rate": 0.0005, "loss": 2.1061, "step": 241920 }, { "epoch": 0.9208452912920685, "grad_norm": 0.1315883994102478, "learning_rate": 0.0005, "loss": 2.0987, "step": 241930 }, { "epoch": 0.9208833537601913, "grad_norm": 0.140249103307724, "learning_rate": 0.0005, "loss": 2.095, "step": 241940 }, { "epoch": 0.9209214162283139, "grad_norm": 0.12236278504133224, "learning_rate": 0.0005, "loss": 2.1157, "step": 241950 }, { "epoch": 0.9209594786964366, "grad_norm": 0.14159269630908966, "learning_rate": 0.0005, "loss": 2.1072, "step": 241960 }, { "epoch": 0.9209975411645592, "grad_norm": 0.12036581337451935, "learning_rate": 0.0005, "loss": 2.0907, "step": 241970 }, { "epoch": 0.921035603632682, "grad_norm": 0.13146193325519562, "learning_rate": 0.0005, "loss": 2.1083, "step": 241980 }, { "epoch": 0.9210736661008047, "grad_norm": 0.12361620366573334, "learning_rate": 0.0005, "loss": 2.1125, "step": 241990 }, { "epoch": 0.9211117285689273, "grad_norm": 0.13313716650009155, "learning_rate": 0.0005, "loss": 2.1029, "step": 242000 }, { "epoch": 0.92114979103705, "grad_norm": 0.12712553143501282, "learning_rate": 0.0005, "loss": 2.1169, "step": 242010 }, { "epoch": 0.9211878535051727, "grad_norm": 0.13036790490150452, "learning_rate": 0.0005, "loss": 2.1017, "step": 242020 }, { "epoch": 0.9212259159732954, "grad_norm": 0.14863251149654388, "learning_rate": 0.0005, "loss": 2.0993, "step": 242030 }, { "epoch": 0.921263978441418, "grad_norm": 0.13584734499454498, "learning_rate": 0.0005, "loss": 2.1006, "step": 242040 }, { "epoch": 0.9213020409095407, "grad_norm": 0.12490679323673248, "learning_rate": 0.0005, "loss": 2.0962, "step": 242050 }, { "epoch": 0.9213401033776634, "grad_norm": 0.1250891089439392, "learning_rate": 0.0005, "loss": 2.1001, "step": 242060 }, { "epoch": 0.9213781658457861, "grad_norm": 0.12336582690477371, "learning_rate": 0.0005, "loss": 2.0968, "step": 242070 }, { "epoch": 0.9214162283139088, "grad_norm": 0.12958140671253204, "learning_rate": 0.0005, "loss": 2.1139, "step": 242080 }, { "epoch": 0.9214542907820314, "grad_norm": 0.1208835244178772, "learning_rate": 0.0005, "loss": 2.0879, "step": 242090 }, { "epoch": 0.9214923532501541, "grad_norm": 0.12668830156326294, "learning_rate": 0.0005, "loss": 2.1078, "step": 242100 }, { "epoch": 0.9215304157182769, "grad_norm": 0.13278093934059143, "learning_rate": 0.0005, "loss": 2.0991, "step": 242110 }, { "epoch": 0.9215684781863995, "grad_norm": 0.13082022964954376, "learning_rate": 0.0005, "loss": 2.1146, "step": 242120 }, { "epoch": 0.9216065406545222, "grad_norm": 0.12971073389053345, "learning_rate": 0.0005, "loss": 2.1122, "step": 242130 }, { "epoch": 0.9216446031226448, "grad_norm": 0.13306653499603271, "learning_rate": 0.0005, "loss": 2.1047, "step": 242140 }, { "epoch": 0.9216826655907676, "grad_norm": 0.123257115483284, "learning_rate": 0.0005, "loss": 2.0999, "step": 242150 }, { "epoch": 0.9217207280588903, "grad_norm": 0.1386224329471588, "learning_rate": 0.0005, "loss": 2.0916, "step": 242160 }, { "epoch": 0.9217587905270129, "grad_norm": 0.12598341703414917, "learning_rate": 0.0005, "loss": 2.0993, "step": 242170 }, { "epoch": 0.9217968529951356, "grad_norm": 0.14487870037555695, "learning_rate": 0.0005, "loss": 2.0904, "step": 242180 }, { "epoch": 0.9218349154632584, "grad_norm": 0.12394547462463379, "learning_rate": 0.0005, "loss": 2.0905, "step": 242190 }, { "epoch": 0.921872977931381, "grad_norm": 0.12625393271446228, "learning_rate": 0.0005, "loss": 2.0925, "step": 242200 }, { "epoch": 0.9219110403995037, "grad_norm": 0.12205489724874496, "learning_rate": 0.0005, "loss": 2.0926, "step": 242210 }, { "epoch": 0.9219491028676263, "grad_norm": 0.12885427474975586, "learning_rate": 0.0005, "loss": 2.1117, "step": 242220 }, { "epoch": 0.921987165335749, "grad_norm": 0.14043216407299042, "learning_rate": 0.0005, "loss": 2.1178, "step": 242230 }, { "epoch": 0.9220252278038717, "grad_norm": 0.12551164627075195, "learning_rate": 0.0005, "loss": 2.0929, "step": 242240 }, { "epoch": 0.9220632902719944, "grad_norm": 0.13114112615585327, "learning_rate": 0.0005, "loss": 2.0974, "step": 242250 }, { "epoch": 0.9221013527401171, "grad_norm": 0.11854348331689835, "learning_rate": 0.0005, "loss": 2.1101, "step": 242260 }, { "epoch": 0.9221394152082397, "grad_norm": 0.12824852764606476, "learning_rate": 0.0005, "loss": 2.1085, "step": 242270 }, { "epoch": 0.9221774776763625, "grad_norm": 0.12649212777614594, "learning_rate": 0.0005, "loss": 2.101, "step": 242280 }, { "epoch": 0.9222155401444851, "grad_norm": 0.14085513353347778, "learning_rate": 0.0005, "loss": 2.0988, "step": 242290 }, { "epoch": 0.9222536026126078, "grad_norm": 0.13265717029571533, "learning_rate": 0.0005, "loss": 2.0977, "step": 242300 }, { "epoch": 0.9222916650807305, "grad_norm": 0.12744875252246857, "learning_rate": 0.0005, "loss": 2.0952, "step": 242310 }, { "epoch": 0.9223297275488532, "grad_norm": 0.12851621210575104, "learning_rate": 0.0005, "loss": 2.1055, "step": 242320 }, { "epoch": 0.9223677900169759, "grad_norm": 0.1234138086438179, "learning_rate": 0.0005, "loss": 2.0816, "step": 242330 }, { "epoch": 0.9224058524850985, "grad_norm": 0.12517589330673218, "learning_rate": 0.0005, "loss": 2.1143, "step": 242340 }, { "epoch": 0.9224439149532212, "grad_norm": 0.12155363708734512, "learning_rate": 0.0005, "loss": 2.1034, "step": 242350 }, { "epoch": 0.9224819774213439, "grad_norm": 0.12556451559066772, "learning_rate": 0.0005, "loss": 2.103, "step": 242360 }, { "epoch": 0.9225200398894666, "grad_norm": 0.13065999746322632, "learning_rate": 0.0005, "loss": 2.0889, "step": 242370 }, { "epoch": 0.9225581023575893, "grad_norm": 0.12490629404783249, "learning_rate": 0.0005, "loss": 2.0955, "step": 242380 }, { "epoch": 0.9225961648257119, "grad_norm": 0.12175456434488297, "learning_rate": 0.0005, "loss": 2.1084, "step": 242390 }, { "epoch": 0.9226342272938346, "grad_norm": 0.13581399619579315, "learning_rate": 0.0005, "loss": 2.1105, "step": 242400 }, { "epoch": 0.9226722897619574, "grad_norm": 0.146930992603302, "learning_rate": 0.0005, "loss": 2.1053, "step": 242410 }, { "epoch": 0.92271035223008, "grad_norm": 0.13098594546318054, "learning_rate": 0.0005, "loss": 2.1046, "step": 242420 }, { "epoch": 0.9227484146982027, "grad_norm": 0.1277133822441101, "learning_rate": 0.0005, "loss": 2.1054, "step": 242430 }, { "epoch": 0.9227864771663253, "grad_norm": 0.1202763095498085, "learning_rate": 0.0005, "loss": 2.0993, "step": 242440 }, { "epoch": 0.9228245396344481, "grad_norm": 0.11052737385034561, "learning_rate": 0.0005, "loss": 2.1136, "step": 242450 }, { "epoch": 0.9228626021025708, "grad_norm": 0.12411277741193771, "learning_rate": 0.0005, "loss": 2.0992, "step": 242460 }, { "epoch": 0.9229006645706934, "grad_norm": 0.13022834062576294, "learning_rate": 0.0005, "loss": 2.1092, "step": 242470 }, { "epoch": 0.9229387270388161, "grad_norm": 0.13648240268230438, "learning_rate": 0.0005, "loss": 2.1006, "step": 242480 }, { "epoch": 0.9229767895069387, "grad_norm": 0.126776322722435, "learning_rate": 0.0005, "loss": 2.096, "step": 242490 }, { "epoch": 0.9230148519750615, "grad_norm": 0.13091644644737244, "learning_rate": 0.0005, "loss": 2.1032, "step": 242500 }, { "epoch": 0.9230529144431842, "grad_norm": 0.14091360569000244, "learning_rate": 0.0005, "loss": 2.1006, "step": 242510 }, { "epoch": 0.9230909769113068, "grad_norm": 0.13159774243831635, "learning_rate": 0.0005, "loss": 2.1077, "step": 242520 }, { "epoch": 0.9231290393794295, "grad_norm": 0.1189807653427124, "learning_rate": 0.0005, "loss": 2.1138, "step": 242530 }, { "epoch": 0.9231671018475522, "grad_norm": 0.1277497261762619, "learning_rate": 0.0005, "loss": 2.105, "step": 242540 }, { "epoch": 0.9232051643156749, "grad_norm": 0.13974037766456604, "learning_rate": 0.0005, "loss": 2.0998, "step": 242550 }, { "epoch": 0.9232432267837976, "grad_norm": 0.11727729439735413, "learning_rate": 0.0005, "loss": 2.113, "step": 242560 }, { "epoch": 0.9232812892519202, "grad_norm": 0.1266697645187378, "learning_rate": 0.0005, "loss": 2.1154, "step": 242570 }, { "epoch": 0.923319351720043, "grad_norm": 0.1267307996749878, "learning_rate": 0.0005, "loss": 2.1, "step": 242580 }, { "epoch": 0.9233574141881656, "grad_norm": 0.1328422725200653, "learning_rate": 0.0005, "loss": 2.098, "step": 242590 }, { "epoch": 0.9233954766562883, "grad_norm": 0.12920698523521423, "learning_rate": 0.0005, "loss": 2.1101, "step": 242600 }, { "epoch": 0.923433539124411, "grad_norm": 0.135263592004776, "learning_rate": 0.0005, "loss": 2.0948, "step": 242610 }, { "epoch": 0.9234716015925337, "grad_norm": 0.13382890820503235, "learning_rate": 0.0005, "loss": 2.1009, "step": 242620 }, { "epoch": 0.9235096640606564, "grad_norm": 0.12548057734966278, "learning_rate": 0.0005, "loss": 2.0885, "step": 242630 }, { "epoch": 0.923547726528779, "grad_norm": 0.13171319663524628, "learning_rate": 0.0005, "loss": 2.1057, "step": 242640 }, { "epoch": 0.9235857889969017, "grad_norm": 0.1245994120836258, "learning_rate": 0.0005, "loss": 2.0864, "step": 242650 }, { "epoch": 0.9236238514650243, "grad_norm": 0.1319652944803238, "learning_rate": 0.0005, "loss": 2.0951, "step": 242660 }, { "epoch": 0.9236619139331471, "grad_norm": 0.12865465879440308, "learning_rate": 0.0005, "loss": 2.0985, "step": 242670 }, { "epoch": 0.9236999764012698, "grad_norm": 0.12202879786491394, "learning_rate": 0.0005, "loss": 2.1099, "step": 242680 }, { "epoch": 0.9237380388693924, "grad_norm": 0.11969945579767227, "learning_rate": 0.0005, "loss": 2.114, "step": 242690 }, { "epoch": 0.9237761013375151, "grad_norm": 0.12907131016254425, "learning_rate": 0.0005, "loss": 2.1139, "step": 242700 }, { "epoch": 0.9238141638056379, "grad_norm": 0.13925276696681976, "learning_rate": 0.0005, "loss": 2.0981, "step": 242710 }, { "epoch": 0.9238522262737605, "grad_norm": 0.12933896481990814, "learning_rate": 0.0005, "loss": 2.0962, "step": 242720 }, { "epoch": 0.9238902887418832, "grad_norm": 0.12854160368442535, "learning_rate": 0.0005, "loss": 2.1116, "step": 242730 }, { "epoch": 0.9239283512100058, "grad_norm": 0.12014547735452652, "learning_rate": 0.0005, "loss": 2.112, "step": 242740 }, { "epoch": 0.9239664136781286, "grad_norm": 0.11493667215108871, "learning_rate": 0.0005, "loss": 2.0963, "step": 242750 }, { "epoch": 0.9240044761462513, "grad_norm": 0.12854570150375366, "learning_rate": 0.0005, "loss": 2.0963, "step": 242760 }, { "epoch": 0.9240425386143739, "grad_norm": 0.1280025988817215, "learning_rate": 0.0005, "loss": 2.1083, "step": 242770 }, { "epoch": 0.9240806010824966, "grad_norm": 0.12395115196704865, "learning_rate": 0.0005, "loss": 2.1011, "step": 242780 }, { "epoch": 0.9241186635506192, "grad_norm": 0.1267663985490799, "learning_rate": 0.0005, "loss": 2.094, "step": 242790 }, { "epoch": 0.924156726018742, "grad_norm": 0.13480299711227417, "learning_rate": 0.0005, "loss": 2.1086, "step": 242800 }, { "epoch": 0.9241947884868646, "grad_norm": 0.1318933069705963, "learning_rate": 0.0005, "loss": 2.1, "step": 242810 }, { "epoch": 0.9242328509549873, "grad_norm": 0.13908858597278595, "learning_rate": 0.0005, "loss": 2.1091, "step": 242820 }, { "epoch": 0.92427091342311, "grad_norm": 0.11871346086263657, "learning_rate": 0.0005, "loss": 2.0974, "step": 242830 }, { "epoch": 0.9243089758912327, "grad_norm": 0.12937457859516144, "learning_rate": 0.0005, "loss": 2.1127, "step": 242840 }, { "epoch": 0.9243470383593554, "grad_norm": 0.14433366060256958, "learning_rate": 0.0005, "loss": 2.0954, "step": 242850 }, { "epoch": 0.924385100827478, "grad_norm": 0.13290633261203766, "learning_rate": 0.0005, "loss": 2.121, "step": 242860 }, { "epoch": 0.9244231632956007, "grad_norm": 0.12644854187965393, "learning_rate": 0.0005, "loss": 2.0954, "step": 242870 }, { "epoch": 0.9244612257637235, "grad_norm": 0.13102544844150543, "learning_rate": 0.0005, "loss": 2.1125, "step": 242880 }, { "epoch": 0.9244992882318461, "grad_norm": 0.17648755013942719, "learning_rate": 0.0005, "loss": 2.1107, "step": 242890 }, { "epoch": 0.9245373506999688, "grad_norm": 0.12615133821964264, "learning_rate": 0.0005, "loss": 2.1042, "step": 242900 }, { "epoch": 0.9245754131680914, "grad_norm": 0.12318253517150879, "learning_rate": 0.0005, "loss": 2.1124, "step": 242910 }, { "epoch": 0.9246134756362141, "grad_norm": 0.1262514740228653, "learning_rate": 0.0005, "loss": 2.1145, "step": 242920 }, { "epoch": 0.9246515381043369, "grad_norm": 0.12124498188495636, "learning_rate": 0.0005, "loss": 2.1162, "step": 242930 }, { "epoch": 0.9246896005724595, "grad_norm": 0.13532347977161407, "learning_rate": 0.0005, "loss": 2.1005, "step": 242940 }, { "epoch": 0.9247276630405822, "grad_norm": 0.12648305296897888, "learning_rate": 0.0005, "loss": 2.0988, "step": 242950 }, { "epoch": 0.9247657255087048, "grad_norm": 0.12291644513607025, "learning_rate": 0.0005, "loss": 2.1006, "step": 242960 }, { "epoch": 0.9248037879768276, "grad_norm": 0.1294446587562561, "learning_rate": 0.0005, "loss": 2.0831, "step": 242970 }, { "epoch": 0.9248418504449503, "grad_norm": 0.13656698167324066, "learning_rate": 0.0005, "loss": 2.0856, "step": 242980 }, { "epoch": 0.9248799129130729, "grad_norm": 0.1236143708229065, "learning_rate": 0.0005, "loss": 2.1097, "step": 242990 }, { "epoch": 0.9249179753811956, "grad_norm": 0.13238398730754852, "learning_rate": 0.0005, "loss": 2.1009, "step": 243000 }, { "epoch": 0.9249560378493183, "grad_norm": 0.13070373237133026, "learning_rate": 0.0005, "loss": 2.0925, "step": 243010 }, { "epoch": 0.924994100317441, "grad_norm": 0.13280506432056427, "learning_rate": 0.0005, "loss": 2.1173, "step": 243020 }, { "epoch": 0.9250321627855637, "grad_norm": 0.12996715307235718, "learning_rate": 0.0005, "loss": 2.1126, "step": 243030 }, { "epoch": 0.9250702252536863, "grad_norm": 0.12887771427631378, "learning_rate": 0.0005, "loss": 2.0892, "step": 243040 }, { "epoch": 0.9251082877218091, "grad_norm": 0.13306356966495514, "learning_rate": 0.0005, "loss": 2.1034, "step": 243050 }, { "epoch": 0.9251463501899317, "grad_norm": 0.14614702761173248, "learning_rate": 0.0005, "loss": 2.11, "step": 243060 }, { "epoch": 0.9251844126580544, "grad_norm": 0.12717987596988678, "learning_rate": 0.0005, "loss": 2.0998, "step": 243070 }, { "epoch": 0.9252224751261771, "grad_norm": 0.12082240730524063, "learning_rate": 0.0005, "loss": 2.1016, "step": 243080 }, { "epoch": 0.9252605375942997, "grad_norm": 0.12202537059783936, "learning_rate": 0.0005, "loss": 2.0892, "step": 243090 }, { "epoch": 0.9252986000624225, "grad_norm": 0.13810782134532928, "learning_rate": 0.0005, "loss": 2.1021, "step": 243100 }, { "epoch": 0.9253366625305451, "grad_norm": 0.1495743691921234, "learning_rate": 0.0005, "loss": 2.0927, "step": 243110 }, { "epoch": 0.9253747249986678, "grad_norm": 0.19354461133480072, "learning_rate": 0.0005, "loss": 2.1216, "step": 243120 }, { "epoch": 0.9254127874667905, "grad_norm": 0.12816660106182098, "learning_rate": 0.0005, "loss": 2.1151, "step": 243130 }, { "epoch": 0.9254508499349132, "grad_norm": 0.1510034054517746, "learning_rate": 0.0005, "loss": 2.1082, "step": 243140 }, { "epoch": 0.9254889124030359, "grad_norm": 0.12415878474712372, "learning_rate": 0.0005, "loss": 2.1095, "step": 243150 }, { "epoch": 0.9255269748711585, "grad_norm": 0.11569990962743759, "learning_rate": 0.0005, "loss": 2.1035, "step": 243160 }, { "epoch": 0.9255650373392812, "grad_norm": 0.1276220977306366, "learning_rate": 0.0005, "loss": 2.1052, "step": 243170 }, { "epoch": 0.925603099807404, "grad_norm": 0.12693388760089874, "learning_rate": 0.0005, "loss": 2.0899, "step": 243180 }, { "epoch": 0.9256411622755266, "grad_norm": 0.12227284908294678, "learning_rate": 0.0005, "loss": 2.1086, "step": 243190 }, { "epoch": 0.9256792247436493, "grad_norm": 0.14652849733829498, "learning_rate": 0.0005, "loss": 2.1011, "step": 243200 }, { "epoch": 0.9257172872117719, "grad_norm": 0.12039449065923691, "learning_rate": 0.0005, "loss": 2.1063, "step": 243210 }, { "epoch": 0.9257553496798946, "grad_norm": 0.12962830066680908, "learning_rate": 0.0005, "loss": 2.0976, "step": 243220 }, { "epoch": 0.9257934121480174, "grad_norm": 0.12831825017929077, "learning_rate": 0.0005, "loss": 2.1031, "step": 243230 }, { "epoch": 0.92583147461614, "grad_norm": 0.12297516316175461, "learning_rate": 0.0005, "loss": 2.1025, "step": 243240 }, { "epoch": 0.9258695370842627, "grad_norm": 0.11735675483942032, "learning_rate": 0.0005, "loss": 2.1105, "step": 243250 }, { "epoch": 0.9259075995523853, "grad_norm": 0.1269374042749405, "learning_rate": 0.0005, "loss": 2.0746, "step": 243260 }, { "epoch": 0.9259456620205081, "grad_norm": 0.11785628646612167, "learning_rate": 0.0005, "loss": 2.0923, "step": 243270 }, { "epoch": 0.9259837244886308, "grad_norm": 0.12496186792850494, "learning_rate": 0.0005, "loss": 2.1184, "step": 243280 }, { "epoch": 0.9260217869567534, "grad_norm": 0.13376538455486298, "learning_rate": 0.0005, "loss": 2.091, "step": 243290 }, { "epoch": 0.9260598494248761, "grad_norm": 0.1268036812543869, "learning_rate": 0.0005, "loss": 2.1087, "step": 243300 }, { "epoch": 0.9260979118929988, "grad_norm": 0.12560303509235382, "learning_rate": 0.0005, "loss": 2.092, "step": 243310 }, { "epoch": 0.9261359743611215, "grad_norm": 0.12618377804756165, "learning_rate": 0.0005, "loss": 2.1031, "step": 243320 }, { "epoch": 0.9261740368292442, "grad_norm": 0.12502415478229523, "learning_rate": 0.0005, "loss": 2.1047, "step": 243330 }, { "epoch": 0.9262120992973668, "grad_norm": 0.1230950802564621, "learning_rate": 0.0005, "loss": 2.0981, "step": 243340 }, { "epoch": 0.9262501617654895, "grad_norm": 0.12102053314447403, "learning_rate": 0.0005, "loss": 2.1043, "step": 243350 }, { "epoch": 0.9262882242336122, "grad_norm": 0.1210583746433258, "learning_rate": 0.0005, "loss": 2.0992, "step": 243360 }, { "epoch": 0.9263262867017349, "grad_norm": 0.13412946462631226, "learning_rate": 0.0005, "loss": 2.0986, "step": 243370 }, { "epoch": 0.9263643491698575, "grad_norm": 0.15333864092826843, "learning_rate": 0.0005, "loss": 2.1024, "step": 243380 }, { "epoch": 0.9264024116379802, "grad_norm": 0.13135433197021484, "learning_rate": 0.0005, "loss": 2.1168, "step": 243390 }, { "epoch": 0.926440474106103, "grad_norm": 0.1341996043920517, "learning_rate": 0.0005, "loss": 2.0946, "step": 243400 }, { "epoch": 0.9264785365742256, "grad_norm": 0.13238976895809174, "learning_rate": 0.0005, "loss": 2.1024, "step": 243410 }, { "epoch": 0.9265165990423483, "grad_norm": 0.1404605507850647, "learning_rate": 0.0005, "loss": 2.0975, "step": 243420 }, { "epoch": 0.926554661510471, "grad_norm": 0.12437047809362411, "learning_rate": 0.0005, "loss": 2.1123, "step": 243430 }, { "epoch": 0.9265927239785937, "grad_norm": 0.12488163262605667, "learning_rate": 0.0005, "loss": 2.0929, "step": 243440 }, { "epoch": 0.9266307864467164, "grad_norm": 0.12038839608430862, "learning_rate": 0.0005, "loss": 2.1039, "step": 243450 }, { "epoch": 0.926668848914839, "grad_norm": 0.12095153331756592, "learning_rate": 0.0005, "loss": 2.0962, "step": 243460 }, { "epoch": 0.9267069113829617, "grad_norm": 0.1299186497926712, "learning_rate": 0.0005, "loss": 2.1126, "step": 243470 }, { "epoch": 0.9267449738510845, "grad_norm": 0.13669754564762115, "learning_rate": 0.0005, "loss": 2.1018, "step": 243480 }, { "epoch": 0.9267830363192071, "grad_norm": 0.12292340397834778, "learning_rate": 0.0005, "loss": 2.1028, "step": 243490 }, { "epoch": 0.9268210987873298, "grad_norm": 0.11784177273511887, "learning_rate": 0.0005, "loss": 2.0979, "step": 243500 }, { "epoch": 0.9268591612554524, "grad_norm": 0.1215096265077591, "learning_rate": 0.0005, "loss": 2.1177, "step": 243510 }, { "epoch": 0.9268972237235751, "grad_norm": 0.13446903228759766, "learning_rate": 0.0005, "loss": 2.0987, "step": 243520 }, { "epoch": 0.9269352861916978, "grad_norm": 0.1293669492006302, "learning_rate": 0.0005, "loss": 2.0969, "step": 243530 }, { "epoch": 0.9269733486598205, "grad_norm": 0.21987639367580414, "learning_rate": 0.0005, "loss": 2.1054, "step": 243540 }, { "epoch": 0.9270114111279432, "grad_norm": 0.13799740374088287, "learning_rate": 0.0005, "loss": 2.091, "step": 243550 }, { "epoch": 0.9270494735960658, "grad_norm": 0.14946503937244415, "learning_rate": 0.0005, "loss": 2.1141, "step": 243560 }, { "epoch": 0.9270875360641886, "grad_norm": 0.13505719602108002, "learning_rate": 0.0005, "loss": 2.1225, "step": 243570 }, { "epoch": 0.9271255985323112, "grad_norm": 0.13582663238048553, "learning_rate": 0.0005, "loss": 2.0999, "step": 243580 }, { "epoch": 0.9271636610004339, "grad_norm": 0.15584570169448853, "learning_rate": 0.0005, "loss": 2.114, "step": 243590 }, { "epoch": 0.9272017234685566, "grad_norm": 0.12088710069656372, "learning_rate": 0.0005, "loss": 2.1095, "step": 243600 }, { "epoch": 0.9272397859366793, "grad_norm": 0.12905220687389374, "learning_rate": 0.0005, "loss": 2.0933, "step": 243610 }, { "epoch": 0.927277848404802, "grad_norm": 0.12397059798240662, "learning_rate": 0.0005, "loss": 2.1122, "step": 243620 }, { "epoch": 0.9273159108729246, "grad_norm": 0.11348934471607208, "learning_rate": 0.0005, "loss": 2.1039, "step": 243630 }, { "epoch": 0.9273539733410473, "grad_norm": 0.12789826095104218, "learning_rate": 0.0005, "loss": 2.104, "step": 243640 }, { "epoch": 0.92739203580917, "grad_norm": 0.13802658021450043, "learning_rate": 0.0005, "loss": 2.1076, "step": 243650 }, { "epoch": 0.9274300982772927, "grad_norm": 0.13512162864208221, "learning_rate": 0.0005, "loss": 2.0855, "step": 243660 }, { "epoch": 0.9274681607454154, "grad_norm": 0.12698446214199066, "learning_rate": 0.0005, "loss": 2.0998, "step": 243670 }, { "epoch": 0.927506223213538, "grad_norm": 0.13155756890773773, "learning_rate": 0.0005, "loss": 2.0946, "step": 243680 }, { "epoch": 0.9275442856816607, "grad_norm": 0.1301504522562027, "learning_rate": 0.0005, "loss": 2.104, "step": 243690 }, { "epoch": 0.9275823481497835, "grad_norm": 0.12535198032855988, "learning_rate": 0.0005, "loss": 2.0925, "step": 243700 }, { "epoch": 0.9276204106179061, "grad_norm": 0.12066397815942764, "learning_rate": 0.0005, "loss": 2.1049, "step": 243710 }, { "epoch": 0.9276584730860288, "grad_norm": 0.13454478979110718, "learning_rate": 0.0005, "loss": 2.1228, "step": 243720 }, { "epoch": 0.9276965355541514, "grad_norm": 0.12323904037475586, "learning_rate": 0.0005, "loss": 2.1074, "step": 243730 }, { "epoch": 0.9277345980222742, "grad_norm": 0.14403332769870758, "learning_rate": 0.0005, "loss": 2.1077, "step": 243740 }, { "epoch": 0.9277726604903969, "grad_norm": 0.1309439241886139, "learning_rate": 0.0005, "loss": 2.0982, "step": 243750 }, { "epoch": 0.9278107229585195, "grad_norm": 0.1328670233488083, "learning_rate": 0.0005, "loss": 2.0964, "step": 243760 }, { "epoch": 0.9278487854266422, "grad_norm": 0.13527549803256989, "learning_rate": 0.0005, "loss": 2.1052, "step": 243770 }, { "epoch": 0.9278868478947649, "grad_norm": 0.13164310157299042, "learning_rate": 0.0005, "loss": 2.1114, "step": 243780 }, { "epoch": 0.9279249103628876, "grad_norm": 0.1374911218881607, "learning_rate": 0.0005, "loss": 2.1119, "step": 243790 }, { "epoch": 0.9279629728310103, "grad_norm": 0.12938323616981506, "learning_rate": 0.0005, "loss": 2.1098, "step": 243800 }, { "epoch": 0.9280010352991329, "grad_norm": 0.13699007034301758, "learning_rate": 0.0005, "loss": 2.1157, "step": 243810 }, { "epoch": 0.9280390977672556, "grad_norm": 0.12728573381900787, "learning_rate": 0.0005, "loss": 2.1116, "step": 243820 }, { "epoch": 0.9280771602353783, "grad_norm": 0.13931600749492645, "learning_rate": 0.0005, "loss": 2.1046, "step": 243830 }, { "epoch": 0.928115222703501, "grad_norm": 0.12266018986701965, "learning_rate": 0.0005, "loss": 2.0865, "step": 243840 }, { "epoch": 0.9281532851716237, "grad_norm": 0.13104864954948425, "learning_rate": 0.0005, "loss": 2.1034, "step": 243850 }, { "epoch": 0.9281913476397463, "grad_norm": 0.1316935271024704, "learning_rate": 0.0005, "loss": 2.1074, "step": 243860 }, { "epoch": 0.9282294101078691, "grad_norm": 0.1339971125125885, "learning_rate": 0.0005, "loss": 2.1117, "step": 243870 }, { "epoch": 0.9282674725759917, "grad_norm": 0.12672404944896698, "learning_rate": 0.0005, "loss": 2.1066, "step": 243880 }, { "epoch": 0.9283055350441144, "grad_norm": 0.11675414443016052, "learning_rate": 0.0005, "loss": 2.1155, "step": 243890 }, { "epoch": 0.928343597512237, "grad_norm": 0.12460727989673615, "learning_rate": 0.0005, "loss": 2.1094, "step": 243900 }, { "epoch": 0.9283816599803598, "grad_norm": 0.12264706939458847, "learning_rate": 0.0005, "loss": 2.102, "step": 243910 }, { "epoch": 0.9284197224484825, "grad_norm": 0.13503718376159668, "learning_rate": 0.0005, "loss": 2.1141, "step": 243920 }, { "epoch": 0.9284577849166051, "grad_norm": 0.128460094332695, "learning_rate": 0.0005, "loss": 2.1121, "step": 243930 }, { "epoch": 0.9284958473847278, "grad_norm": 0.13557086884975433, "learning_rate": 0.0005, "loss": 2.1095, "step": 243940 }, { "epoch": 0.9285339098528504, "grad_norm": 0.1326914131641388, "learning_rate": 0.0005, "loss": 2.0946, "step": 243950 }, { "epoch": 0.9285719723209732, "grad_norm": 0.12578825652599335, "learning_rate": 0.0005, "loss": 2.1056, "step": 243960 }, { "epoch": 0.9286100347890959, "grad_norm": 0.12709611654281616, "learning_rate": 0.0005, "loss": 2.1056, "step": 243970 }, { "epoch": 0.9286480972572185, "grad_norm": 0.12961846590042114, "learning_rate": 0.0005, "loss": 2.107, "step": 243980 }, { "epoch": 0.9286861597253412, "grad_norm": 0.13466140627861023, "learning_rate": 0.0005, "loss": 2.099, "step": 243990 }, { "epoch": 0.928724222193464, "grad_norm": 0.12910573184490204, "learning_rate": 0.0005, "loss": 2.0979, "step": 244000 }, { "epoch": 0.9287622846615866, "grad_norm": 0.12387394905090332, "learning_rate": 0.0005, "loss": 2.096, "step": 244010 }, { "epoch": 0.9288003471297093, "grad_norm": 0.1370897889137268, "learning_rate": 0.0005, "loss": 2.1069, "step": 244020 }, { "epoch": 0.9288384095978319, "grad_norm": 0.1256018429994583, "learning_rate": 0.0005, "loss": 2.1079, "step": 244030 }, { "epoch": 0.9288764720659547, "grad_norm": 0.13081581890583038, "learning_rate": 0.0005, "loss": 2.0939, "step": 244040 }, { "epoch": 0.9289145345340774, "grad_norm": 0.14325900375843048, "learning_rate": 0.0005, "loss": 2.1133, "step": 244050 }, { "epoch": 0.9289525970022, "grad_norm": 0.15604746341705322, "learning_rate": 0.0005, "loss": 2.107, "step": 244060 }, { "epoch": 0.9289906594703227, "grad_norm": 0.1184174194931984, "learning_rate": 0.0005, "loss": 2.0926, "step": 244070 }, { "epoch": 0.9290287219384453, "grad_norm": 0.14096128940582275, "learning_rate": 0.0005, "loss": 2.1166, "step": 244080 }, { "epoch": 0.9290667844065681, "grad_norm": 0.12300019711256027, "learning_rate": 0.0005, "loss": 2.0989, "step": 244090 }, { "epoch": 0.9291048468746907, "grad_norm": 0.13574087619781494, "learning_rate": 0.0005, "loss": 2.0913, "step": 244100 }, { "epoch": 0.9291429093428134, "grad_norm": 0.12608695030212402, "learning_rate": 0.0005, "loss": 2.1131, "step": 244110 }, { "epoch": 0.9291809718109361, "grad_norm": 0.12948545813560486, "learning_rate": 0.0005, "loss": 2.1007, "step": 244120 }, { "epoch": 0.9292190342790588, "grad_norm": 0.1463763415813446, "learning_rate": 0.0005, "loss": 2.1012, "step": 244130 }, { "epoch": 0.9292570967471815, "grad_norm": 0.14562129974365234, "learning_rate": 0.0005, "loss": 2.108, "step": 244140 }, { "epoch": 0.9292951592153041, "grad_norm": 0.1424500048160553, "learning_rate": 0.0005, "loss": 2.0962, "step": 244150 }, { "epoch": 0.9293332216834268, "grad_norm": 0.1314752846956253, "learning_rate": 0.0005, "loss": 2.1009, "step": 244160 }, { "epoch": 0.9293712841515496, "grad_norm": 0.12602221965789795, "learning_rate": 0.0005, "loss": 2.1078, "step": 244170 }, { "epoch": 0.9294093466196722, "grad_norm": 0.12631013989448547, "learning_rate": 0.0005, "loss": 2.1011, "step": 244180 }, { "epoch": 0.9294474090877949, "grad_norm": 0.11970032751560211, "learning_rate": 0.0005, "loss": 2.1126, "step": 244190 }, { "epoch": 0.9294854715559175, "grad_norm": 0.13537070155143738, "learning_rate": 0.0005, "loss": 2.1029, "step": 244200 }, { "epoch": 0.9295235340240403, "grad_norm": 0.1548534780740738, "learning_rate": 0.0005, "loss": 2.0944, "step": 244210 }, { "epoch": 0.929561596492163, "grad_norm": 0.13076388835906982, "learning_rate": 0.0005, "loss": 2.1054, "step": 244220 }, { "epoch": 0.9295996589602856, "grad_norm": 0.13737253844738007, "learning_rate": 0.0005, "loss": 2.0973, "step": 244230 }, { "epoch": 0.9296377214284083, "grad_norm": 0.13844993710517883, "learning_rate": 0.0005, "loss": 2.1035, "step": 244240 }, { "epoch": 0.9296757838965309, "grad_norm": 0.13641414046287537, "learning_rate": 0.0005, "loss": 2.0983, "step": 244250 }, { "epoch": 0.9297138463646537, "grad_norm": 0.13651876151561737, "learning_rate": 0.0005, "loss": 2.0961, "step": 244260 }, { "epoch": 0.9297519088327764, "grad_norm": 0.12432459741830826, "learning_rate": 0.0005, "loss": 2.091, "step": 244270 }, { "epoch": 0.929789971300899, "grad_norm": 0.1276438683271408, "learning_rate": 0.0005, "loss": 2.1047, "step": 244280 }, { "epoch": 0.9298280337690217, "grad_norm": 0.1557062566280365, "learning_rate": 0.0005, "loss": 2.0996, "step": 244290 }, { "epoch": 0.9298660962371444, "grad_norm": 0.12883219122886658, "learning_rate": 0.0005, "loss": 2.0962, "step": 244300 }, { "epoch": 0.9299041587052671, "grad_norm": 0.12083089351654053, "learning_rate": 0.0005, "loss": 2.0889, "step": 244310 }, { "epoch": 0.9299422211733898, "grad_norm": 0.12065315246582031, "learning_rate": 0.0005, "loss": 2.108, "step": 244320 }, { "epoch": 0.9299802836415124, "grad_norm": 0.13488535583019257, "learning_rate": 0.0005, "loss": 2.1015, "step": 244330 }, { "epoch": 0.9300183461096352, "grad_norm": 0.13125616312026978, "learning_rate": 0.0005, "loss": 2.098, "step": 244340 }, { "epoch": 0.9300564085777578, "grad_norm": 0.12402810901403427, "learning_rate": 0.0005, "loss": 2.1119, "step": 244350 }, { "epoch": 0.9300944710458805, "grad_norm": 0.12106141448020935, "learning_rate": 0.0005, "loss": 2.1086, "step": 244360 }, { "epoch": 0.9301325335140032, "grad_norm": 0.1306813508272171, "learning_rate": 0.0005, "loss": 2.107, "step": 244370 }, { "epoch": 0.9301705959821258, "grad_norm": 0.13408935070037842, "learning_rate": 0.0005, "loss": 2.1056, "step": 244380 }, { "epoch": 0.9302086584502486, "grad_norm": 0.1249052956700325, "learning_rate": 0.0005, "loss": 2.095, "step": 244390 }, { "epoch": 0.9302467209183712, "grad_norm": 0.12542724609375, "learning_rate": 0.0005, "loss": 2.0916, "step": 244400 }, { "epoch": 0.9302847833864939, "grad_norm": 0.144064798951149, "learning_rate": 0.0005, "loss": 2.0844, "step": 244410 }, { "epoch": 0.9303228458546166, "grad_norm": 0.13257959485054016, "learning_rate": 0.0005, "loss": 2.1098, "step": 244420 }, { "epoch": 0.9303609083227393, "grad_norm": 0.14355061948299408, "learning_rate": 0.0005, "loss": 2.0928, "step": 244430 }, { "epoch": 0.930398970790862, "grad_norm": 0.1293869912624359, "learning_rate": 0.0005, "loss": 2.0938, "step": 244440 }, { "epoch": 0.9304370332589846, "grad_norm": 0.11531233042478561, "learning_rate": 0.0005, "loss": 2.1111, "step": 244450 }, { "epoch": 0.9304750957271073, "grad_norm": 0.12876319885253906, "learning_rate": 0.0005, "loss": 2.1219, "step": 244460 }, { "epoch": 0.9305131581952301, "grad_norm": 0.1262495368719101, "learning_rate": 0.0005, "loss": 2.1091, "step": 244470 }, { "epoch": 0.9305512206633527, "grad_norm": 0.12156631052494049, "learning_rate": 0.0005, "loss": 2.0983, "step": 244480 }, { "epoch": 0.9305892831314754, "grad_norm": 0.13556714355945587, "learning_rate": 0.0005, "loss": 2.1047, "step": 244490 }, { "epoch": 0.930627345599598, "grad_norm": 0.12696297466754913, "learning_rate": 0.0005, "loss": 2.1032, "step": 244500 }, { "epoch": 0.9306654080677207, "grad_norm": 0.12120062112808228, "learning_rate": 0.0005, "loss": 2.094, "step": 244510 }, { "epoch": 0.9307034705358435, "grad_norm": 0.1332768201828003, "learning_rate": 0.0005, "loss": 2.1108, "step": 244520 }, { "epoch": 0.9307415330039661, "grad_norm": 0.1333252191543579, "learning_rate": 0.0005, "loss": 2.1, "step": 244530 }, { "epoch": 0.9307795954720888, "grad_norm": 0.14268051087856293, "learning_rate": 0.0005, "loss": 2.1108, "step": 244540 }, { "epoch": 0.9308176579402114, "grad_norm": 0.15190616250038147, "learning_rate": 0.0005, "loss": 2.1186, "step": 244550 }, { "epoch": 0.9308557204083342, "grad_norm": 0.11583707481622696, "learning_rate": 0.0005, "loss": 2.1115, "step": 244560 }, { "epoch": 0.9308937828764569, "grad_norm": 0.12261312454938889, "learning_rate": 0.0005, "loss": 2.1071, "step": 244570 }, { "epoch": 0.9309318453445795, "grad_norm": 0.13171446323394775, "learning_rate": 0.0005, "loss": 2.0932, "step": 244580 }, { "epoch": 0.9309699078127022, "grad_norm": 0.12392304092645645, "learning_rate": 0.0005, "loss": 2.1017, "step": 244590 }, { "epoch": 0.9310079702808249, "grad_norm": 0.13120070099830627, "learning_rate": 0.0005, "loss": 2.1144, "step": 244600 }, { "epoch": 0.9310460327489476, "grad_norm": 0.12526655197143555, "learning_rate": 0.0005, "loss": 2.1104, "step": 244610 }, { "epoch": 0.9310840952170703, "grad_norm": 0.1256546527147293, "learning_rate": 0.0005, "loss": 2.0988, "step": 244620 }, { "epoch": 0.9311221576851929, "grad_norm": 0.11751958727836609, "learning_rate": 0.0005, "loss": 2.1037, "step": 244630 }, { "epoch": 0.9311602201533157, "grad_norm": 0.14713047444820404, "learning_rate": 0.0005, "loss": 2.1124, "step": 244640 }, { "epoch": 0.9311982826214383, "grad_norm": 0.1289437860250473, "learning_rate": 0.0005, "loss": 2.1004, "step": 244650 }, { "epoch": 0.931236345089561, "grad_norm": 0.12775853276252747, "learning_rate": 0.0005, "loss": 2.1122, "step": 244660 }, { "epoch": 0.9312744075576836, "grad_norm": 0.128163143992424, "learning_rate": 0.0005, "loss": 2.1024, "step": 244670 }, { "epoch": 0.9313124700258063, "grad_norm": 0.12197849154472351, "learning_rate": 0.0005, "loss": 2.1106, "step": 244680 }, { "epoch": 0.9313505324939291, "grad_norm": 0.11555957794189453, "learning_rate": 0.0005, "loss": 2.1188, "step": 244690 }, { "epoch": 0.9313885949620517, "grad_norm": 0.11189104616641998, "learning_rate": 0.0005, "loss": 2.077, "step": 244700 }, { "epoch": 0.9314266574301744, "grad_norm": 0.126925989985466, "learning_rate": 0.0005, "loss": 2.1049, "step": 244710 }, { "epoch": 0.931464719898297, "grad_norm": 0.14960403740406036, "learning_rate": 0.0005, "loss": 2.1098, "step": 244720 }, { "epoch": 0.9315027823664198, "grad_norm": 0.12165334820747375, "learning_rate": 0.0005, "loss": 2.0917, "step": 244730 }, { "epoch": 0.9315408448345425, "grad_norm": 0.1221255511045456, "learning_rate": 0.0005, "loss": 2.1044, "step": 244740 }, { "epoch": 0.9315789073026651, "grad_norm": 0.12555082142353058, "learning_rate": 0.0005, "loss": 2.1121, "step": 244750 }, { "epoch": 0.9316169697707878, "grad_norm": 0.1253214180469513, "learning_rate": 0.0005, "loss": 2.1164, "step": 244760 }, { "epoch": 0.9316550322389106, "grad_norm": 0.12106679379940033, "learning_rate": 0.0005, "loss": 2.1047, "step": 244770 }, { "epoch": 0.9316930947070332, "grad_norm": 0.1325124055147171, "learning_rate": 0.0005, "loss": 2.0897, "step": 244780 }, { "epoch": 0.9317311571751559, "grad_norm": 0.1329493522644043, "learning_rate": 0.0005, "loss": 2.1118, "step": 244790 }, { "epoch": 0.9317692196432785, "grad_norm": 0.12659507989883423, "learning_rate": 0.0005, "loss": 2.108, "step": 244800 }, { "epoch": 0.9318072821114012, "grad_norm": 0.12768709659576416, "learning_rate": 0.0005, "loss": 2.1073, "step": 244810 }, { "epoch": 0.931845344579524, "grad_norm": 0.13004130125045776, "learning_rate": 0.0005, "loss": 2.088, "step": 244820 }, { "epoch": 0.9318834070476466, "grad_norm": 0.13215667009353638, "learning_rate": 0.0005, "loss": 2.0925, "step": 244830 }, { "epoch": 0.9319214695157693, "grad_norm": 0.13396494090557098, "learning_rate": 0.0005, "loss": 2.0988, "step": 244840 }, { "epoch": 0.9319595319838919, "grad_norm": 0.12323446571826935, "learning_rate": 0.0005, "loss": 2.1032, "step": 244850 }, { "epoch": 0.9319975944520147, "grad_norm": 0.1380215287208557, "learning_rate": 0.0005, "loss": 2.1043, "step": 244860 }, { "epoch": 0.9320356569201373, "grad_norm": 0.13250230252742767, "learning_rate": 0.0005, "loss": 2.0982, "step": 244870 }, { "epoch": 0.93207371938826, "grad_norm": 0.13689535856246948, "learning_rate": 0.0005, "loss": 2.1051, "step": 244880 }, { "epoch": 0.9321117818563827, "grad_norm": 0.13904209434986115, "learning_rate": 0.0005, "loss": 2.1057, "step": 244890 }, { "epoch": 0.9321498443245054, "grad_norm": 0.12622742354869843, "learning_rate": 0.0005, "loss": 2.1044, "step": 244900 }, { "epoch": 0.9321879067926281, "grad_norm": 0.13061605393886566, "learning_rate": 0.0005, "loss": 2.1041, "step": 244910 }, { "epoch": 0.9322259692607507, "grad_norm": 0.12408585846424103, "learning_rate": 0.0005, "loss": 2.0996, "step": 244920 }, { "epoch": 0.9322640317288734, "grad_norm": 0.12326142191886902, "learning_rate": 0.0005, "loss": 2.1187, "step": 244930 }, { "epoch": 0.9323020941969961, "grad_norm": 0.1347014605998993, "learning_rate": 0.0005, "loss": 2.1149, "step": 244940 }, { "epoch": 0.9323401566651188, "grad_norm": 0.14028072357177734, "learning_rate": 0.0005, "loss": 2.1045, "step": 244950 }, { "epoch": 0.9323782191332415, "grad_norm": 0.12555497884750366, "learning_rate": 0.0005, "loss": 2.0973, "step": 244960 }, { "epoch": 0.9324162816013641, "grad_norm": 0.12269239872694016, "learning_rate": 0.0005, "loss": 2.0762, "step": 244970 }, { "epoch": 0.9324543440694868, "grad_norm": 0.1264629364013672, "learning_rate": 0.0005, "loss": 2.12, "step": 244980 }, { "epoch": 0.9324924065376096, "grad_norm": 0.11486383527517319, "learning_rate": 0.0005, "loss": 2.0965, "step": 244990 }, { "epoch": 0.9325304690057322, "grad_norm": 0.1295613944530487, "learning_rate": 0.0005, "loss": 2.0915, "step": 245000 }, { "epoch": 0.9325685314738549, "grad_norm": 0.13714879751205444, "learning_rate": 0.0005, "loss": 2.0936, "step": 245010 }, { "epoch": 0.9326065939419775, "grad_norm": 0.13334430754184723, "learning_rate": 0.0005, "loss": 2.0837, "step": 245020 }, { "epoch": 0.9326446564101003, "grad_norm": 0.12343837320804596, "learning_rate": 0.0005, "loss": 2.0892, "step": 245030 }, { "epoch": 0.932682718878223, "grad_norm": 0.12585949897766113, "learning_rate": 0.0005, "loss": 2.0925, "step": 245040 }, { "epoch": 0.9327207813463456, "grad_norm": 0.13595852255821228, "learning_rate": 0.0005, "loss": 2.1031, "step": 245050 }, { "epoch": 0.9327588438144683, "grad_norm": 0.1471097618341446, "learning_rate": 0.0005, "loss": 2.1043, "step": 245060 }, { "epoch": 0.932796906282591, "grad_norm": 0.13127946853637695, "learning_rate": 0.0005, "loss": 2.094, "step": 245070 }, { "epoch": 0.9328349687507137, "grad_norm": 0.12792746722698212, "learning_rate": 0.0005, "loss": 2.1132, "step": 245080 }, { "epoch": 0.9328730312188364, "grad_norm": 0.1280100792646408, "learning_rate": 0.0005, "loss": 2.1103, "step": 245090 }, { "epoch": 0.932911093686959, "grad_norm": 0.13393692672252655, "learning_rate": 0.0005, "loss": 2.0926, "step": 245100 }, { "epoch": 0.9329491561550817, "grad_norm": 0.13617435097694397, "learning_rate": 0.0005, "loss": 2.0963, "step": 245110 }, { "epoch": 0.9329872186232044, "grad_norm": 0.13600575923919678, "learning_rate": 0.0005, "loss": 2.1164, "step": 245120 }, { "epoch": 0.9330252810913271, "grad_norm": 0.14357805252075195, "learning_rate": 0.0005, "loss": 2.101, "step": 245130 }, { "epoch": 0.9330633435594498, "grad_norm": 0.12242817133665085, "learning_rate": 0.0005, "loss": 2.0949, "step": 245140 }, { "epoch": 0.9331014060275724, "grad_norm": 0.11658231168985367, "learning_rate": 0.0005, "loss": 2.1036, "step": 245150 }, { "epoch": 0.9331394684956952, "grad_norm": 0.11763457208871841, "learning_rate": 0.0005, "loss": 2.1114, "step": 245160 }, { "epoch": 0.9331775309638178, "grad_norm": 0.12078958749771118, "learning_rate": 0.0005, "loss": 2.0855, "step": 245170 }, { "epoch": 0.9332155934319405, "grad_norm": 0.12004182487726212, "learning_rate": 0.0005, "loss": 2.117, "step": 245180 }, { "epoch": 0.9332536559000632, "grad_norm": 0.13686971366405487, "learning_rate": 0.0005, "loss": 2.1101, "step": 245190 }, { "epoch": 0.9332917183681859, "grad_norm": 0.12164989113807678, "learning_rate": 0.0005, "loss": 2.095, "step": 245200 }, { "epoch": 0.9333297808363086, "grad_norm": 0.11873283237218857, "learning_rate": 0.0005, "loss": 2.0832, "step": 245210 }, { "epoch": 0.9333678433044312, "grad_norm": 0.13262200355529785, "learning_rate": 0.0005, "loss": 2.0946, "step": 245220 }, { "epoch": 0.9334059057725539, "grad_norm": 0.13883377611637115, "learning_rate": 0.0005, "loss": 2.1044, "step": 245230 }, { "epoch": 0.9334439682406765, "grad_norm": 0.12576153874397278, "learning_rate": 0.0005, "loss": 2.0878, "step": 245240 }, { "epoch": 0.9334820307087993, "grad_norm": 0.14034013450145721, "learning_rate": 0.0005, "loss": 2.093, "step": 245250 }, { "epoch": 0.933520093176922, "grad_norm": 0.12679800391197205, "learning_rate": 0.0005, "loss": 2.1084, "step": 245260 }, { "epoch": 0.9335581556450446, "grad_norm": 0.12937819957733154, "learning_rate": 0.0005, "loss": 2.1, "step": 245270 }, { "epoch": 0.9335962181131673, "grad_norm": 0.12601298093795776, "learning_rate": 0.0005, "loss": 2.0998, "step": 245280 }, { "epoch": 0.93363428058129, "grad_norm": 0.14028529822826385, "learning_rate": 0.0005, "loss": 2.0837, "step": 245290 }, { "epoch": 0.9336723430494127, "grad_norm": 0.12624330818653107, "learning_rate": 0.0005, "loss": 2.1175, "step": 245300 }, { "epoch": 0.9337104055175354, "grad_norm": 0.11185381561517715, "learning_rate": 0.0005, "loss": 2.0955, "step": 245310 }, { "epoch": 0.933748467985658, "grad_norm": 0.1357858031988144, "learning_rate": 0.0005, "loss": 2.1027, "step": 245320 }, { "epoch": 0.9337865304537808, "grad_norm": 0.1280422806739807, "learning_rate": 0.0005, "loss": 2.0834, "step": 245330 }, { "epoch": 0.9338245929219035, "grad_norm": 0.13474594056606293, "learning_rate": 0.0005, "loss": 2.0878, "step": 245340 }, { "epoch": 0.9338626553900261, "grad_norm": 0.12183479964733124, "learning_rate": 0.0005, "loss": 2.0983, "step": 245350 }, { "epoch": 0.9339007178581488, "grad_norm": 0.1369076818227768, "learning_rate": 0.0005, "loss": 2.0996, "step": 245360 }, { "epoch": 0.9339387803262714, "grad_norm": 0.1256004273891449, "learning_rate": 0.0005, "loss": 2.107, "step": 245370 }, { "epoch": 0.9339768427943942, "grad_norm": 0.13013307750225067, "learning_rate": 0.0005, "loss": 2.1091, "step": 245380 }, { "epoch": 0.9340149052625168, "grad_norm": 0.1307704746723175, "learning_rate": 0.0005, "loss": 2.1123, "step": 245390 }, { "epoch": 0.9340529677306395, "grad_norm": 0.1378530114889145, "learning_rate": 0.0005, "loss": 2.1067, "step": 245400 }, { "epoch": 0.9340910301987622, "grad_norm": 0.13222309947013855, "learning_rate": 0.0005, "loss": 2.1102, "step": 245410 }, { "epoch": 0.9341290926668849, "grad_norm": 0.13338710367679596, "learning_rate": 0.0005, "loss": 2.1054, "step": 245420 }, { "epoch": 0.9341671551350076, "grad_norm": 0.13438653945922852, "learning_rate": 0.0005, "loss": 2.1026, "step": 245430 }, { "epoch": 0.9342052176031302, "grad_norm": 0.11549054831266403, "learning_rate": 0.0005, "loss": 2.0903, "step": 245440 }, { "epoch": 0.9342432800712529, "grad_norm": 0.1245436742901802, "learning_rate": 0.0005, "loss": 2.115, "step": 245450 }, { "epoch": 0.9342813425393757, "grad_norm": 0.12763431668281555, "learning_rate": 0.0005, "loss": 2.0855, "step": 245460 }, { "epoch": 0.9343194050074983, "grad_norm": 0.1399812549352646, "learning_rate": 0.0005, "loss": 2.1121, "step": 245470 }, { "epoch": 0.934357467475621, "grad_norm": 0.12727563083171844, "learning_rate": 0.0005, "loss": 2.098, "step": 245480 }, { "epoch": 0.9343955299437436, "grad_norm": 0.1361941248178482, "learning_rate": 0.0005, "loss": 2.0954, "step": 245490 }, { "epoch": 0.9344335924118664, "grad_norm": 0.13142044842243195, "learning_rate": 0.0005, "loss": 2.1259, "step": 245500 }, { "epoch": 0.9344716548799891, "grad_norm": 0.11843173205852509, "learning_rate": 0.0005, "loss": 2.0804, "step": 245510 }, { "epoch": 0.9345097173481117, "grad_norm": 0.13190241158008575, "learning_rate": 0.0005, "loss": 2.101, "step": 245520 }, { "epoch": 0.9345477798162344, "grad_norm": 0.12379549443721771, "learning_rate": 0.0005, "loss": 2.1083, "step": 245530 }, { "epoch": 0.934585842284357, "grad_norm": 0.12975290417671204, "learning_rate": 0.0005, "loss": 2.0866, "step": 245540 }, { "epoch": 0.9346239047524798, "grad_norm": 0.11416078358888626, "learning_rate": 0.0005, "loss": 2.0987, "step": 245550 }, { "epoch": 0.9346619672206025, "grad_norm": 0.1281895637512207, "learning_rate": 0.0005, "loss": 2.0973, "step": 245560 }, { "epoch": 0.9347000296887251, "grad_norm": 0.11358912289142609, "learning_rate": 0.0005, "loss": 2.1156, "step": 245570 }, { "epoch": 0.9347380921568478, "grad_norm": 0.1254263073205948, "learning_rate": 0.0005, "loss": 2.1081, "step": 245580 }, { "epoch": 0.9347761546249705, "grad_norm": 0.1187024861574173, "learning_rate": 0.0005, "loss": 2.1055, "step": 245590 }, { "epoch": 0.9348142170930932, "grad_norm": 0.13790404796600342, "learning_rate": 0.0005, "loss": 2.1289, "step": 245600 }, { "epoch": 0.9348522795612159, "grad_norm": 0.12254596501588821, "learning_rate": 0.0005, "loss": 2.1106, "step": 245610 }, { "epoch": 0.9348903420293385, "grad_norm": 0.12026899307966232, "learning_rate": 0.0005, "loss": 2.0981, "step": 245620 }, { "epoch": 0.9349284044974613, "grad_norm": 0.1398571878671646, "learning_rate": 0.0005, "loss": 2.1177, "step": 245630 }, { "epoch": 0.9349664669655839, "grad_norm": 0.13355661928653717, "learning_rate": 0.0005, "loss": 2.1127, "step": 245640 }, { "epoch": 0.9350045294337066, "grad_norm": 0.12633633613586426, "learning_rate": 0.0005, "loss": 2.0953, "step": 245650 }, { "epoch": 0.9350425919018293, "grad_norm": 0.13872991502285004, "learning_rate": 0.0005, "loss": 2.1046, "step": 245660 }, { "epoch": 0.9350806543699519, "grad_norm": 0.13830257952213287, "learning_rate": 0.0005, "loss": 2.0985, "step": 245670 }, { "epoch": 0.9351187168380747, "grad_norm": 0.12482500821352005, "learning_rate": 0.0005, "loss": 2.0914, "step": 245680 }, { "epoch": 0.9351567793061973, "grad_norm": 0.13927072286605835, "learning_rate": 0.0005, "loss": 2.0915, "step": 245690 }, { "epoch": 0.93519484177432, "grad_norm": 0.1152929961681366, "learning_rate": 0.0005, "loss": 2.1077, "step": 245700 }, { "epoch": 0.9352329042424427, "grad_norm": 0.14349882304668427, "learning_rate": 0.0005, "loss": 2.0947, "step": 245710 }, { "epoch": 0.9352709667105654, "grad_norm": 0.2207951843738556, "learning_rate": 0.0005, "loss": 2.1001, "step": 245720 }, { "epoch": 0.9353090291786881, "grad_norm": 0.1341167688369751, "learning_rate": 0.0005, "loss": 2.0991, "step": 245730 }, { "epoch": 0.9353470916468107, "grad_norm": 0.12583181262016296, "learning_rate": 0.0005, "loss": 2.1039, "step": 245740 }, { "epoch": 0.9353851541149334, "grad_norm": 0.1297006458044052, "learning_rate": 0.0005, "loss": 2.1, "step": 245750 }, { "epoch": 0.9354232165830562, "grad_norm": 0.13608314096927643, "learning_rate": 0.0005, "loss": 2.111, "step": 245760 }, { "epoch": 0.9354612790511788, "grad_norm": 0.15426835417747498, "learning_rate": 0.0005, "loss": 2.1017, "step": 245770 }, { "epoch": 0.9354993415193015, "grad_norm": 0.133841872215271, "learning_rate": 0.0005, "loss": 2.1031, "step": 245780 }, { "epoch": 0.9355374039874241, "grad_norm": 0.1368071734905243, "learning_rate": 0.0005, "loss": 2.1125, "step": 245790 }, { "epoch": 0.9355754664555468, "grad_norm": 0.12981870770454407, "learning_rate": 0.0005, "loss": 2.1074, "step": 245800 }, { "epoch": 0.9356135289236696, "grad_norm": 0.12528027594089508, "learning_rate": 0.0005, "loss": 2.0901, "step": 245810 }, { "epoch": 0.9356515913917922, "grad_norm": 0.13445281982421875, "learning_rate": 0.0005, "loss": 2.1164, "step": 245820 }, { "epoch": 0.9356896538599149, "grad_norm": 0.1206308901309967, "learning_rate": 0.0005, "loss": 2.0953, "step": 245830 }, { "epoch": 0.9357277163280375, "grad_norm": 0.11260633170604706, "learning_rate": 0.0005, "loss": 2.1091, "step": 245840 }, { "epoch": 0.9357657787961603, "grad_norm": 0.12294673174619675, "learning_rate": 0.0005, "loss": 2.0873, "step": 245850 }, { "epoch": 0.935803841264283, "grad_norm": 0.11559704691171646, "learning_rate": 0.0005, "loss": 2.1096, "step": 245860 }, { "epoch": 0.9358419037324056, "grad_norm": 0.12751439213752747, "learning_rate": 0.0005, "loss": 2.0912, "step": 245870 }, { "epoch": 0.9358799662005283, "grad_norm": 0.12568078935146332, "learning_rate": 0.0005, "loss": 2.1088, "step": 245880 }, { "epoch": 0.935918028668651, "grad_norm": 0.12613657116889954, "learning_rate": 0.0005, "loss": 2.0834, "step": 245890 }, { "epoch": 0.9359560911367737, "grad_norm": 0.131540447473526, "learning_rate": 0.0005, "loss": 2.1036, "step": 245900 }, { "epoch": 0.9359941536048964, "grad_norm": 0.13464267551898956, "learning_rate": 0.0005, "loss": 2.1125, "step": 245910 }, { "epoch": 0.936032216073019, "grad_norm": 0.1384028196334839, "learning_rate": 0.0005, "loss": 2.101, "step": 245920 }, { "epoch": 0.9360702785411418, "grad_norm": 0.13131101429462433, "learning_rate": 0.0005, "loss": 2.0984, "step": 245930 }, { "epoch": 0.9361083410092644, "grad_norm": 0.13290739059448242, "learning_rate": 0.0005, "loss": 2.1036, "step": 245940 }, { "epoch": 0.9361464034773871, "grad_norm": 0.12560899555683136, "learning_rate": 0.0005, "loss": 2.1127, "step": 245950 }, { "epoch": 0.9361844659455097, "grad_norm": 0.12775875627994537, "learning_rate": 0.0005, "loss": 2.1124, "step": 245960 }, { "epoch": 0.9362225284136324, "grad_norm": 0.12522175908088684, "learning_rate": 0.0005, "loss": 2.1201, "step": 245970 }, { "epoch": 0.9362605908817552, "grad_norm": 0.13588744401931763, "learning_rate": 0.0005, "loss": 2.0895, "step": 245980 }, { "epoch": 0.9362986533498778, "grad_norm": 0.11728566884994507, "learning_rate": 0.0005, "loss": 2.1052, "step": 245990 }, { "epoch": 0.9363367158180005, "grad_norm": 0.14255517721176147, "learning_rate": 0.0005, "loss": 2.0999, "step": 246000 }, { "epoch": 0.9363747782861231, "grad_norm": 0.1172654777765274, "learning_rate": 0.0005, "loss": 2.0814, "step": 246010 }, { "epoch": 0.9364128407542459, "grad_norm": 0.12044258415699005, "learning_rate": 0.0005, "loss": 2.0894, "step": 246020 }, { "epoch": 0.9364509032223686, "grad_norm": 0.13443933427333832, "learning_rate": 0.0005, "loss": 2.1162, "step": 246030 }, { "epoch": 0.9364889656904912, "grad_norm": 0.3673914968967438, "learning_rate": 0.0005, "loss": 2.1048, "step": 246040 }, { "epoch": 0.9365270281586139, "grad_norm": 0.136433944106102, "learning_rate": 0.0005, "loss": 2.1084, "step": 246050 }, { "epoch": 0.9365650906267367, "grad_norm": 0.12014338374137878, "learning_rate": 0.0005, "loss": 2.11, "step": 246060 }, { "epoch": 0.9366031530948593, "grad_norm": 0.12036501616239548, "learning_rate": 0.0005, "loss": 2.0948, "step": 246070 }, { "epoch": 0.936641215562982, "grad_norm": 0.1268536001443863, "learning_rate": 0.0005, "loss": 2.1086, "step": 246080 }, { "epoch": 0.9366792780311046, "grad_norm": 0.13351179659366608, "learning_rate": 0.0005, "loss": 2.1002, "step": 246090 }, { "epoch": 0.9367173404992273, "grad_norm": 0.12870946526527405, "learning_rate": 0.0005, "loss": 2.1041, "step": 246100 }, { "epoch": 0.93675540296735, "grad_norm": 0.13091915845870972, "learning_rate": 0.0005, "loss": 2.1116, "step": 246110 }, { "epoch": 0.9367934654354727, "grad_norm": 0.12250076234340668, "learning_rate": 0.0005, "loss": 2.1068, "step": 246120 }, { "epoch": 0.9368315279035954, "grad_norm": 0.1321459263563156, "learning_rate": 0.0005, "loss": 2.0904, "step": 246130 }, { "epoch": 0.936869590371718, "grad_norm": 0.13747379183769226, "learning_rate": 0.0005, "loss": 2.1052, "step": 246140 }, { "epoch": 0.9369076528398408, "grad_norm": 0.12448690831661224, "learning_rate": 0.0005, "loss": 2.0996, "step": 246150 }, { "epoch": 0.9369457153079634, "grad_norm": 0.12248706817626953, "learning_rate": 0.0005, "loss": 2.0959, "step": 246160 }, { "epoch": 0.9369837777760861, "grad_norm": 0.12774351239204407, "learning_rate": 0.0005, "loss": 2.0891, "step": 246170 }, { "epoch": 0.9370218402442088, "grad_norm": 0.1347806453704834, "learning_rate": 0.0005, "loss": 2.1158, "step": 246180 }, { "epoch": 0.9370599027123315, "grad_norm": 0.12406744062900543, "learning_rate": 0.0005, "loss": 2.104, "step": 246190 }, { "epoch": 0.9370979651804542, "grad_norm": 0.12361624836921692, "learning_rate": 0.0005, "loss": 2.0981, "step": 246200 }, { "epoch": 0.9371360276485768, "grad_norm": 0.12326037883758545, "learning_rate": 0.0005, "loss": 2.1063, "step": 246210 }, { "epoch": 0.9371740901166995, "grad_norm": 0.13250653445720673, "learning_rate": 0.0005, "loss": 2.0898, "step": 246220 }, { "epoch": 0.9372121525848222, "grad_norm": 0.12099245190620422, "learning_rate": 0.0005, "loss": 2.1046, "step": 246230 }, { "epoch": 0.9372502150529449, "grad_norm": 0.12234620004892349, "learning_rate": 0.0005, "loss": 2.1028, "step": 246240 }, { "epoch": 0.9372882775210676, "grad_norm": 0.12898355722427368, "learning_rate": 0.0005, "loss": 2.1015, "step": 246250 }, { "epoch": 0.9373263399891902, "grad_norm": 0.1290358155965805, "learning_rate": 0.0005, "loss": 2.0973, "step": 246260 }, { "epoch": 0.9373644024573129, "grad_norm": 0.11929871886968613, "learning_rate": 0.0005, "loss": 2.0819, "step": 246270 }, { "epoch": 0.9374024649254357, "grad_norm": 0.15042388439178467, "learning_rate": 0.0005, "loss": 2.1043, "step": 246280 }, { "epoch": 0.9374405273935583, "grad_norm": 0.12484857439994812, "learning_rate": 0.0005, "loss": 2.1036, "step": 246290 }, { "epoch": 0.937478589861681, "grad_norm": 0.14481140673160553, "learning_rate": 0.0005, "loss": 2.0918, "step": 246300 }, { "epoch": 0.9375166523298036, "grad_norm": 0.13843390345573425, "learning_rate": 0.0005, "loss": 2.0957, "step": 246310 }, { "epoch": 0.9375547147979264, "grad_norm": 0.12472894787788391, "learning_rate": 0.0005, "loss": 2.1021, "step": 246320 }, { "epoch": 0.9375927772660491, "grad_norm": 0.12258918583393097, "learning_rate": 0.0005, "loss": 2.1111, "step": 246330 }, { "epoch": 0.9376308397341717, "grad_norm": 0.12393265962600708, "learning_rate": 0.0005, "loss": 2.1038, "step": 246340 }, { "epoch": 0.9376689022022944, "grad_norm": 0.13368739187717438, "learning_rate": 0.0005, "loss": 2.1036, "step": 246350 }, { "epoch": 0.9377069646704171, "grad_norm": 0.1267513930797577, "learning_rate": 0.0005, "loss": 2.1226, "step": 246360 }, { "epoch": 0.9377450271385398, "grad_norm": 0.13105252385139465, "learning_rate": 0.0005, "loss": 2.1052, "step": 246370 }, { "epoch": 0.9377830896066625, "grad_norm": 0.1518404483795166, "learning_rate": 0.0005, "loss": 2.1042, "step": 246380 }, { "epoch": 0.9378211520747851, "grad_norm": 0.14227744936943054, "learning_rate": 0.0005, "loss": 2.1014, "step": 246390 }, { "epoch": 0.9378592145429078, "grad_norm": 0.12951421737670898, "learning_rate": 0.0005, "loss": 2.1019, "step": 246400 }, { "epoch": 0.9378972770110305, "grad_norm": 0.12221680581569672, "learning_rate": 0.0005, "loss": 2.1066, "step": 246410 }, { "epoch": 0.9379353394791532, "grad_norm": 0.1388002187013626, "learning_rate": 0.0005, "loss": 2.1032, "step": 246420 }, { "epoch": 0.9379734019472759, "grad_norm": 0.11565013229846954, "learning_rate": 0.0005, "loss": 2.104, "step": 246430 }, { "epoch": 0.9380114644153985, "grad_norm": 0.12359726428985596, "learning_rate": 0.0005, "loss": 2.104, "step": 246440 }, { "epoch": 0.9380495268835213, "grad_norm": 0.12950290739536285, "learning_rate": 0.0005, "loss": 2.0792, "step": 246450 }, { "epoch": 0.9380875893516439, "grad_norm": 0.12705442309379578, "learning_rate": 0.0005, "loss": 2.1095, "step": 246460 }, { "epoch": 0.9381256518197666, "grad_norm": 0.12719959020614624, "learning_rate": 0.0005, "loss": 2.1049, "step": 246470 }, { "epoch": 0.9381637142878892, "grad_norm": 0.11859887838363647, "learning_rate": 0.0005, "loss": 2.1008, "step": 246480 }, { "epoch": 0.938201776756012, "grad_norm": 0.1301327794790268, "learning_rate": 0.0005, "loss": 2.1028, "step": 246490 }, { "epoch": 0.9382398392241347, "grad_norm": 0.13133437931537628, "learning_rate": 0.0005, "loss": 2.0973, "step": 246500 }, { "epoch": 0.9382779016922573, "grad_norm": 0.14098897576332092, "learning_rate": 0.0005, "loss": 2.1155, "step": 246510 }, { "epoch": 0.93831596416038, "grad_norm": 0.12575797736644745, "learning_rate": 0.0005, "loss": 2.1181, "step": 246520 }, { "epoch": 0.9383540266285026, "grad_norm": 0.1329401731491089, "learning_rate": 0.0005, "loss": 2.1053, "step": 246530 }, { "epoch": 0.9383920890966254, "grad_norm": 0.11845256388187408, "learning_rate": 0.0005, "loss": 2.1138, "step": 246540 }, { "epoch": 0.9384301515647481, "grad_norm": 0.12183622270822525, "learning_rate": 0.0005, "loss": 2.1077, "step": 246550 }, { "epoch": 0.9384682140328707, "grad_norm": 0.13629251718521118, "learning_rate": 0.0005, "loss": 2.1016, "step": 246560 }, { "epoch": 0.9385062765009934, "grad_norm": 0.13547387719154358, "learning_rate": 0.0005, "loss": 2.1043, "step": 246570 }, { "epoch": 0.9385443389691162, "grad_norm": 0.12108206003904343, "learning_rate": 0.0005, "loss": 2.0825, "step": 246580 }, { "epoch": 0.9385824014372388, "grad_norm": 0.12693358957767487, "learning_rate": 0.0005, "loss": 2.1106, "step": 246590 }, { "epoch": 0.9386204639053615, "grad_norm": 0.12511876225471497, "learning_rate": 0.0005, "loss": 2.0898, "step": 246600 }, { "epoch": 0.9386585263734841, "grad_norm": 0.12276551872491837, "learning_rate": 0.0005, "loss": 2.1128, "step": 246610 }, { "epoch": 0.9386965888416069, "grad_norm": 0.12356232106685638, "learning_rate": 0.0005, "loss": 2.0989, "step": 246620 }, { "epoch": 0.9387346513097296, "grad_norm": 0.1273866444826126, "learning_rate": 0.0005, "loss": 2.1128, "step": 246630 }, { "epoch": 0.9387727137778522, "grad_norm": 0.1403125822544098, "learning_rate": 0.0005, "loss": 2.0997, "step": 246640 }, { "epoch": 0.9388107762459749, "grad_norm": 0.11948072165250778, "learning_rate": 0.0005, "loss": 2.0924, "step": 246650 }, { "epoch": 0.9388488387140975, "grad_norm": 0.1343478113412857, "learning_rate": 0.0005, "loss": 2.1074, "step": 246660 }, { "epoch": 0.9388869011822203, "grad_norm": 0.1337791532278061, "learning_rate": 0.0005, "loss": 2.1009, "step": 246670 }, { "epoch": 0.938924963650343, "grad_norm": 0.12476864457130432, "learning_rate": 0.0005, "loss": 2.1123, "step": 246680 }, { "epoch": 0.9389630261184656, "grad_norm": 0.11873772740364075, "learning_rate": 0.0005, "loss": 2.1102, "step": 246690 }, { "epoch": 0.9390010885865883, "grad_norm": 0.1170477494597435, "learning_rate": 0.0005, "loss": 2.09, "step": 246700 }, { "epoch": 0.939039151054711, "grad_norm": 0.1320660561323166, "learning_rate": 0.0005, "loss": 2.1019, "step": 246710 }, { "epoch": 0.9390772135228337, "grad_norm": 0.11541859060525894, "learning_rate": 0.0005, "loss": 2.1069, "step": 246720 }, { "epoch": 0.9391152759909563, "grad_norm": 0.1338876336812973, "learning_rate": 0.0005, "loss": 2.1025, "step": 246730 }, { "epoch": 0.939153338459079, "grad_norm": 0.12336771190166473, "learning_rate": 0.0005, "loss": 2.0931, "step": 246740 }, { "epoch": 0.9391914009272018, "grad_norm": 0.12288336455821991, "learning_rate": 0.0005, "loss": 2.1079, "step": 246750 }, { "epoch": 0.9392294633953244, "grad_norm": 0.12184653431177139, "learning_rate": 0.0005, "loss": 2.1069, "step": 246760 }, { "epoch": 0.9392675258634471, "grad_norm": 0.11762639880180359, "learning_rate": 0.0005, "loss": 2.1051, "step": 246770 }, { "epoch": 0.9393055883315697, "grad_norm": 0.12614305317401886, "learning_rate": 0.0005, "loss": 2.1057, "step": 246780 }, { "epoch": 0.9393436507996925, "grad_norm": 0.14764799177646637, "learning_rate": 0.0005, "loss": 2.0982, "step": 246790 }, { "epoch": 0.9393817132678152, "grad_norm": 0.13260680437088013, "learning_rate": 0.0005, "loss": 2.1081, "step": 246800 }, { "epoch": 0.9394197757359378, "grad_norm": 0.14004816114902496, "learning_rate": 0.0005, "loss": 2.0995, "step": 246810 }, { "epoch": 0.9394578382040605, "grad_norm": 0.12658268213272095, "learning_rate": 0.0005, "loss": 2.0992, "step": 246820 }, { "epoch": 0.9394959006721831, "grad_norm": 0.12620680034160614, "learning_rate": 0.0005, "loss": 2.1137, "step": 246830 }, { "epoch": 0.9395339631403059, "grad_norm": 0.13516870141029358, "learning_rate": 0.0005, "loss": 2.0992, "step": 246840 }, { "epoch": 0.9395720256084286, "grad_norm": 0.13516582548618317, "learning_rate": 0.0005, "loss": 2.1021, "step": 246850 }, { "epoch": 0.9396100880765512, "grad_norm": 0.13392318785190582, "learning_rate": 0.0005, "loss": 2.0997, "step": 246860 }, { "epoch": 0.9396481505446739, "grad_norm": 0.13238194584846497, "learning_rate": 0.0005, "loss": 2.0991, "step": 246870 }, { "epoch": 0.9396862130127966, "grad_norm": 0.7064668536186218, "learning_rate": 0.0005, "loss": 2.1056, "step": 246880 }, { "epoch": 0.9397242754809193, "grad_norm": 0.12507928907871246, "learning_rate": 0.0005, "loss": 2.0891, "step": 246890 }, { "epoch": 0.939762337949042, "grad_norm": 0.13007816672325134, "learning_rate": 0.0005, "loss": 2.1077, "step": 246900 }, { "epoch": 0.9398004004171646, "grad_norm": 0.15461665391921997, "learning_rate": 0.0005, "loss": 2.087, "step": 246910 }, { "epoch": 0.9398384628852874, "grad_norm": 0.12466249614953995, "learning_rate": 0.0005, "loss": 2.11, "step": 246920 }, { "epoch": 0.93987652535341, "grad_norm": 0.11939510703086853, "learning_rate": 0.0005, "loss": 2.1034, "step": 246930 }, { "epoch": 0.9399145878215327, "grad_norm": 0.13170850276947021, "learning_rate": 0.0005, "loss": 2.1046, "step": 246940 }, { "epoch": 0.9399526502896554, "grad_norm": 0.13314881920814514, "learning_rate": 0.0005, "loss": 2.1053, "step": 246950 }, { "epoch": 0.939990712757778, "grad_norm": 0.1443936675786972, "learning_rate": 0.0005, "loss": 2.1114, "step": 246960 }, { "epoch": 0.9400287752259008, "grad_norm": 0.12503737211227417, "learning_rate": 0.0005, "loss": 2.1041, "step": 246970 }, { "epoch": 0.9400668376940234, "grad_norm": 0.12784408032894135, "learning_rate": 0.0005, "loss": 2.1006, "step": 246980 }, { "epoch": 0.9401049001621461, "grad_norm": 0.12824499607086182, "learning_rate": 0.0005, "loss": 2.0988, "step": 246990 }, { "epoch": 0.9401429626302688, "grad_norm": 0.1239519789814949, "learning_rate": 0.0005, "loss": 2.1176, "step": 247000 }, { "epoch": 0.9401810250983915, "grad_norm": 0.137156143784523, "learning_rate": 0.0005, "loss": 2.099, "step": 247010 }, { "epoch": 0.9402190875665142, "grad_norm": 0.751557469367981, "learning_rate": 0.0005, "loss": 2.1167, "step": 247020 }, { "epoch": 0.9402571500346368, "grad_norm": 0.15649625658988953, "learning_rate": 0.0005, "loss": 2.1155, "step": 247030 }, { "epoch": 0.9402952125027595, "grad_norm": 0.14045241475105286, "learning_rate": 0.0005, "loss": 2.1013, "step": 247040 }, { "epoch": 0.9403332749708823, "grad_norm": 0.1222781240940094, "learning_rate": 0.0005, "loss": 2.0918, "step": 247050 }, { "epoch": 0.9403713374390049, "grad_norm": 0.1385979801416397, "learning_rate": 0.0005, "loss": 2.0976, "step": 247060 }, { "epoch": 0.9404093999071276, "grad_norm": 0.12437307089567184, "learning_rate": 0.0005, "loss": 2.1006, "step": 247070 }, { "epoch": 0.9404474623752502, "grad_norm": 0.1382979303598404, "learning_rate": 0.0005, "loss": 2.1078, "step": 247080 }, { "epoch": 0.9404855248433729, "grad_norm": 0.12030627578496933, "learning_rate": 0.0005, "loss": 2.1009, "step": 247090 }, { "epoch": 0.9405235873114957, "grad_norm": 0.12918499112129211, "learning_rate": 0.0005, "loss": 2.1046, "step": 247100 }, { "epoch": 0.9405616497796183, "grad_norm": 0.13981960713863373, "learning_rate": 0.0005, "loss": 2.1071, "step": 247110 }, { "epoch": 0.940599712247741, "grad_norm": 0.12755261361598969, "learning_rate": 0.0005, "loss": 2.0877, "step": 247120 }, { "epoch": 0.9406377747158636, "grad_norm": 0.12737154960632324, "learning_rate": 0.0005, "loss": 2.0937, "step": 247130 }, { "epoch": 0.9406758371839864, "grad_norm": 0.11642778664827347, "learning_rate": 0.0005, "loss": 2.1076, "step": 247140 }, { "epoch": 0.940713899652109, "grad_norm": 0.1331937313079834, "learning_rate": 0.0005, "loss": 2.1023, "step": 247150 }, { "epoch": 0.9407519621202317, "grad_norm": 0.1338392198085785, "learning_rate": 0.0005, "loss": 2.1047, "step": 247160 }, { "epoch": 0.9407900245883544, "grad_norm": 0.12696552276611328, "learning_rate": 0.0005, "loss": 2.1057, "step": 247170 }, { "epoch": 0.9408280870564771, "grad_norm": 0.13532081246376038, "learning_rate": 0.0005, "loss": 2.0985, "step": 247180 }, { "epoch": 0.9408661495245998, "grad_norm": 0.14050805568695068, "learning_rate": 0.0005, "loss": 2.1226, "step": 247190 }, { "epoch": 0.9409042119927225, "grad_norm": 0.11687342077493668, "learning_rate": 0.0005, "loss": 2.0951, "step": 247200 }, { "epoch": 0.9409422744608451, "grad_norm": 0.1321887969970703, "learning_rate": 0.0005, "loss": 2.1039, "step": 247210 }, { "epoch": 0.9409803369289679, "grad_norm": 0.13208705186843872, "learning_rate": 0.0005, "loss": 2.1058, "step": 247220 }, { "epoch": 0.9410183993970905, "grad_norm": 0.1374688446521759, "learning_rate": 0.0005, "loss": 2.1033, "step": 247230 }, { "epoch": 0.9410564618652132, "grad_norm": 0.12851817905902863, "learning_rate": 0.0005, "loss": 2.0954, "step": 247240 }, { "epoch": 0.9410945243333358, "grad_norm": 0.1238558441400528, "learning_rate": 0.0005, "loss": 2.1036, "step": 247250 }, { "epoch": 0.9411325868014585, "grad_norm": 0.1303836703300476, "learning_rate": 0.0005, "loss": 2.1036, "step": 247260 }, { "epoch": 0.9411706492695813, "grad_norm": 0.1266433149576187, "learning_rate": 0.0005, "loss": 2.1014, "step": 247270 }, { "epoch": 0.9412087117377039, "grad_norm": 0.12886379659175873, "learning_rate": 0.0005, "loss": 2.1136, "step": 247280 }, { "epoch": 0.9412467742058266, "grad_norm": 0.1281256079673767, "learning_rate": 0.0005, "loss": 2.1112, "step": 247290 }, { "epoch": 0.9412848366739492, "grad_norm": 0.11581743508577347, "learning_rate": 0.0005, "loss": 2.0954, "step": 247300 }, { "epoch": 0.941322899142072, "grad_norm": 0.12750853598117828, "learning_rate": 0.0005, "loss": 2.1053, "step": 247310 }, { "epoch": 0.9413609616101947, "grad_norm": 0.1209491565823555, "learning_rate": 0.0005, "loss": 2.1049, "step": 247320 }, { "epoch": 0.9413990240783173, "grad_norm": 0.13058196008205414, "learning_rate": 0.0005, "loss": 2.1105, "step": 247330 }, { "epoch": 0.94143708654644, "grad_norm": 0.1260518729686737, "learning_rate": 0.0005, "loss": 2.1128, "step": 247340 }, { "epoch": 0.9414751490145628, "grad_norm": 0.13261444866657257, "learning_rate": 0.0005, "loss": 2.0957, "step": 247350 }, { "epoch": 0.9415132114826854, "grad_norm": 0.11897791922092438, "learning_rate": 0.0005, "loss": 2.0984, "step": 247360 }, { "epoch": 0.9415512739508081, "grad_norm": 0.13250933587551117, "learning_rate": 0.0005, "loss": 2.1055, "step": 247370 }, { "epoch": 0.9415893364189307, "grad_norm": 0.12644755840301514, "learning_rate": 0.0005, "loss": 2.0994, "step": 247380 }, { "epoch": 0.9416273988870534, "grad_norm": 0.12750305235385895, "learning_rate": 0.0005, "loss": 2.0909, "step": 247390 }, { "epoch": 0.9416654613551761, "grad_norm": 0.1279606968164444, "learning_rate": 0.0005, "loss": 2.1115, "step": 247400 }, { "epoch": 0.9417035238232988, "grad_norm": 0.12689395248889923, "learning_rate": 0.0005, "loss": 2.1138, "step": 247410 }, { "epoch": 0.9417415862914215, "grad_norm": 0.13141341507434845, "learning_rate": 0.0005, "loss": 2.1012, "step": 247420 }, { "epoch": 0.9417796487595441, "grad_norm": 0.12511730194091797, "learning_rate": 0.0005, "loss": 2.0976, "step": 247430 }, { "epoch": 0.9418177112276669, "grad_norm": 0.12846639752388, "learning_rate": 0.0005, "loss": 2.0834, "step": 247440 }, { "epoch": 0.9418557736957895, "grad_norm": 0.14211717247962952, "learning_rate": 0.0005, "loss": 2.1066, "step": 247450 }, { "epoch": 0.9418938361639122, "grad_norm": 0.5756912231445312, "learning_rate": 0.0005, "loss": 2.1088, "step": 247460 }, { "epoch": 0.9419318986320349, "grad_norm": 0.12941479682922363, "learning_rate": 0.0005, "loss": 2.0928, "step": 247470 }, { "epoch": 0.9419699611001576, "grad_norm": 0.12610690295696259, "learning_rate": 0.0005, "loss": 2.0897, "step": 247480 }, { "epoch": 0.9420080235682803, "grad_norm": 0.13161595165729523, "learning_rate": 0.0005, "loss": 2.109, "step": 247490 }, { "epoch": 0.9420460860364029, "grad_norm": 0.13354597985744476, "learning_rate": 0.0005, "loss": 2.089, "step": 247500 }, { "epoch": 0.9420841485045256, "grad_norm": 0.13382618129253387, "learning_rate": 0.0005, "loss": 2.1034, "step": 247510 }, { "epoch": 0.9421222109726483, "grad_norm": 0.18865372240543365, "learning_rate": 0.0005, "loss": 2.1201, "step": 247520 }, { "epoch": 0.942160273440771, "grad_norm": 0.1534956693649292, "learning_rate": 0.0005, "loss": 2.0872, "step": 247530 }, { "epoch": 0.9421983359088937, "grad_norm": 0.11790409684181213, "learning_rate": 0.0005, "loss": 2.0983, "step": 247540 }, { "epoch": 0.9422363983770163, "grad_norm": 0.1351659893989563, "learning_rate": 0.0005, "loss": 2.088, "step": 247550 }, { "epoch": 0.942274460845139, "grad_norm": 0.1758767068386078, "learning_rate": 0.0005, "loss": 2.1036, "step": 247560 }, { "epoch": 0.9423125233132618, "grad_norm": 0.13484224677085876, "learning_rate": 0.0005, "loss": 2.1167, "step": 247570 }, { "epoch": 0.9423505857813844, "grad_norm": 0.12165403366088867, "learning_rate": 0.0005, "loss": 2.1024, "step": 247580 }, { "epoch": 0.9423886482495071, "grad_norm": 0.11824820935726166, "learning_rate": 0.0005, "loss": 2.1098, "step": 247590 }, { "epoch": 0.9424267107176297, "grad_norm": 1.1216968297958374, "learning_rate": 0.0005, "loss": 2.0955, "step": 247600 }, { "epoch": 0.9424647731857525, "grad_norm": 0.12063819169998169, "learning_rate": 0.0005, "loss": 2.1108, "step": 247610 }, { "epoch": 0.9425028356538752, "grad_norm": 0.1480524092912674, "learning_rate": 0.0005, "loss": 2.1013, "step": 247620 }, { "epoch": 0.9425408981219978, "grad_norm": 0.13503408432006836, "learning_rate": 0.0005, "loss": 2.1038, "step": 247630 }, { "epoch": 0.9425789605901205, "grad_norm": 0.13576354086399078, "learning_rate": 0.0005, "loss": 2.101, "step": 247640 }, { "epoch": 0.9426170230582432, "grad_norm": 0.1289045363664627, "learning_rate": 0.0005, "loss": 2.0925, "step": 247650 }, { "epoch": 0.9426550855263659, "grad_norm": 0.11602700501680374, "learning_rate": 0.0005, "loss": 2.0865, "step": 247660 }, { "epoch": 0.9426931479944886, "grad_norm": 0.12578940391540527, "learning_rate": 0.0005, "loss": 2.0986, "step": 247670 }, { "epoch": 0.9427312104626112, "grad_norm": 0.13036979734897614, "learning_rate": 0.0005, "loss": 2.0812, "step": 247680 }, { "epoch": 0.9427692729307339, "grad_norm": 0.13717511296272278, "learning_rate": 0.0005, "loss": 2.1052, "step": 247690 }, { "epoch": 0.9428073353988566, "grad_norm": 0.12567205727100372, "learning_rate": 0.0005, "loss": 2.0959, "step": 247700 }, { "epoch": 0.9428453978669793, "grad_norm": 0.12764301896095276, "learning_rate": 0.0005, "loss": 2.1024, "step": 247710 }, { "epoch": 0.942883460335102, "grad_norm": 0.11658774316310883, "learning_rate": 0.0005, "loss": 2.1039, "step": 247720 }, { "epoch": 0.9429215228032246, "grad_norm": 0.12862013280391693, "learning_rate": 0.0005, "loss": 2.0962, "step": 247730 }, { "epoch": 0.9429595852713474, "grad_norm": 0.13183024525642395, "learning_rate": 0.0005, "loss": 2.1047, "step": 247740 }, { "epoch": 0.94299764773947, "grad_norm": 0.12923283874988556, "learning_rate": 0.0005, "loss": 2.1027, "step": 247750 }, { "epoch": 0.9430357102075927, "grad_norm": 0.1428668349981308, "learning_rate": 0.0005, "loss": 2.0905, "step": 247760 }, { "epoch": 0.9430737726757153, "grad_norm": 0.1358453780412674, "learning_rate": 0.0005, "loss": 2.0955, "step": 247770 }, { "epoch": 0.9431118351438381, "grad_norm": 0.12302374839782715, "learning_rate": 0.0005, "loss": 2.0954, "step": 247780 }, { "epoch": 0.9431498976119608, "grad_norm": 0.1263979822397232, "learning_rate": 0.0005, "loss": 2.0955, "step": 247790 }, { "epoch": 0.9431879600800834, "grad_norm": 0.13652725517749786, "learning_rate": 0.0005, "loss": 2.1084, "step": 247800 }, { "epoch": 0.9432260225482061, "grad_norm": 0.12320644408464432, "learning_rate": 0.0005, "loss": 2.1066, "step": 247810 }, { "epoch": 0.9432640850163287, "grad_norm": 0.12279729545116425, "learning_rate": 0.0005, "loss": 2.1042, "step": 247820 }, { "epoch": 0.9433021474844515, "grad_norm": 0.14760860800743103, "learning_rate": 0.0005, "loss": 2.0997, "step": 247830 }, { "epoch": 0.9433402099525742, "grad_norm": 0.13388857245445251, "learning_rate": 0.0005, "loss": 2.1191, "step": 247840 }, { "epoch": 0.9433782724206968, "grad_norm": 0.129547119140625, "learning_rate": 0.0005, "loss": 2.1103, "step": 247850 }, { "epoch": 0.9434163348888195, "grad_norm": 0.129354789853096, "learning_rate": 0.0005, "loss": 2.0993, "step": 247860 }, { "epoch": 0.9434543973569423, "grad_norm": 0.12934815883636475, "learning_rate": 0.0005, "loss": 2.1133, "step": 247870 }, { "epoch": 0.9434924598250649, "grad_norm": 0.1424483209848404, "learning_rate": 0.0005, "loss": 2.0971, "step": 247880 }, { "epoch": 0.9435305222931876, "grad_norm": 0.12889623641967773, "learning_rate": 0.0005, "loss": 2.0925, "step": 247890 }, { "epoch": 0.9435685847613102, "grad_norm": 0.13118860125541687, "learning_rate": 0.0005, "loss": 2.1085, "step": 247900 }, { "epoch": 0.943606647229433, "grad_norm": 0.13081367313861847, "learning_rate": 0.0005, "loss": 2.0936, "step": 247910 }, { "epoch": 0.9436447096975557, "grad_norm": 0.12048673629760742, "learning_rate": 0.0005, "loss": 2.117, "step": 247920 }, { "epoch": 0.9436827721656783, "grad_norm": 0.12423050403594971, "learning_rate": 0.0005, "loss": 2.0875, "step": 247930 }, { "epoch": 0.943720834633801, "grad_norm": 0.13558757305145264, "learning_rate": 0.0005, "loss": 2.0964, "step": 247940 }, { "epoch": 0.9437588971019236, "grad_norm": 0.12062255293130875, "learning_rate": 0.0005, "loss": 2.1027, "step": 247950 }, { "epoch": 0.9437969595700464, "grad_norm": 0.1254424899816513, "learning_rate": 0.0005, "loss": 2.1003, "step": 247960 }, { "epoch": 0.943835022038169, "grad_norm": 0.1260339617729187, "learning_rate": 0.0005, "loss": 2.0948, "step": 247970 }, { "epoch": 0.9438730845062917, "grad_norm": 0.1362634301185608, "learning_rate": 0.0005, "loss": 2.0929, "step": 247980 }, { "epoch": 0.9439111469744144, "grad_norm": 0.12181158363819122, "learning_rate": 0.0005, "loss": 2.1001, "step": 247990 }, { "epoch": 0.9439492094425371, "grad_norm": 0.12834899127483368, "learning_rate": 0.0005, "loss": 2.1178, "step": 248000 }, { "epoch": 0.9439872719106598, "grad_norm": 0.13318775594234467, "learning_rate": 0.0005, "loss": 2.0983, "step": 248010 }, { "epoch": 0.9440253343787824, "grad_norm": 0.11598256975412369, "learning_rate": 0.0005, "loss": 2.1021, "step": 248020 }, { "epoch": 0.9440633968469051, "grad_norm": 0.12331517040729523, "learning_rate": 0.0005, "loss": 2.0933, "step": 248030 }, { "epoch": 0.9441014593150279, "grad_norm": 0.13327628374099731, "learning_rate": 0.0005, "loss": 2.0928, "step": 248040 }, { "epoch": 0.9441395217831505, "grad_norm": 0.127610981464386, "learning_rate": 0.0005, "loss": 2.0876, "step": 248050 }, { "epoch": 0.9441775842512732, "grad_norm": 0.13593189418315887, "learning_rate": 0.0005, "loss": 2.0969, "step": 248060 }, { "epoch": 0.9442156467193958, "grad_norm": 0.12497703731060028, "learning_rate": 0.0005, "loss": 2.0892, "step": 248070 }, { "epoch": 0.9442537091875186, "grad_norm": 0.1429484635591507, "learning_rate": 0.0005, "loss": 2.0871, "step": 248080 }, { "epoch": 0.9442917716556413, "grad_norm": 0.13125035166740417, "learning_rate": 0.0005, "loss": 2.0917, "step": 248090 }, { "epoch": 0.9443298341237639, "grad_norm": 0.13275553286075592, "learning_rate": 0.0005, "loss": 2.0919, "step": 248100 }, { "epoch": 0.9443678965918866, "grad_norm": 0.12504088878631592, "learning_rate": 0.0005, "loss": 2.1143, "step": 248110 }, { "epoch": 0.9444059590600092, "grad_norm": 0.1175403743982315, "learning_rate": 0.0005, "loss": 2.1118, "step": 248120 }, { "epoch": 0.944444021528132, "grad_norm": 0.1290964037179947, "learning_rate": 0.0005, "loss": 2.0974, "step": 248130 }, { "epoch": 0.9444820839962547, "grad_norm": 0.13197334110736847, "learning_rate": 0.0005, "loss": 2.0911, "step": 248140 }, { "epoch": 0.9445201464643773, "grad_norm": 0.12857557833194733, "learning_rate": 0.0005, "loss": 2.1063, "step": 248150 }, { "epoch": 0.9445582089325, "grad_norm": 0.12041082978248596, "learning_rate": 0.0005, "loss": 2.1015, "step": 248160 }, { "epoch": 0.9445962714006227, "grad_norm": 0.12345477193593979, "learning_rate": 0.0005, "loss": 2.1055, "step": 248170 }, { "epoch": 0.9446343338687454, "grad_norm": 0.12356548011302948, "learning_rate": 0.0005, "loss": 2.1122, "step": 248180 }, { "epoch": 0.9446723963368681, "grad_norm": 0.14413070678710938, "learning_rate": 0.0005, "loss": 2.0933, "step": 248190 }, { "epoch": 0.9447104588049907, "grad_norm": 0.13794247806072235, "learning_rate": 0.0005, "loss": 2.0998, "step": 248200 }, { "epoch": 0.9447485212731135, "grad_norm": 0.12572318315505981, "learning_rate": 0.0005, "loss": 2.1056, "step": 248210 }, { "epoch": 0.9447865837412361, "grad_norm": 0.12373986095190048, "learning_rate": 0.0005, "loss": 2.1031, "step": 248220 }, { "epoch": 0.9448246462093588, "grad_norm": 0.1262924075126648, "learning_rate": 0.0005, "loss": 2.108, "step": 248230 }, { "epoch": 0.9448627086774815, "grad_norm": 0.13063013553619385, "learning_rate": 0.0005, "loss": 2.1124, "step": 248240 }, { "epoch": 0.9449007711456041, "grad_norm": 0.12661738693714142, "learning_rate": 0.0005, "loss": 2.1015, "step": 248250 }, { "epoch": 0.9449388336137269, "grad_norm": 0.12877686321735382, "learning_rate": 0.0005, "loss": 2.1158, "step": 248260 }, { "epoch": 0.9449768960818495, "grad_norm": 0.1472814381122589, "learning_rate": 0.0005, "loss": 2.1109, "step": 248270 }, { "epoch": 0.9450149585499722, "grad_norm": 0.13327832520008087, "learning_rate": 0.0005, "loss": 2.1098, "step": 248280 }, { "epoch": 0.9450530210180949, "grad_norm": 0.14059068262577057, "learning_rate": 0.0005, "loss": 2.0912, "step": 248290 }, { "epoch": 0.9450910834862176, "grad_norm": 0.13199837505817413, "learning_rate": 0.0005, "loss": 2.1185, "step": 248300 }, { "epoch": 0.9451291459543403, "grad_norm": 0.15628120303153992, "learning_rate": 0.0005, "loss": 2.0906, "step": 248310 }, { "epoch": 0.9451672084224629, "grad_norm": 0.12141703814268112, "learning_rate": 0.0005, "loss": 2.0871, "step": 248320 }, { "epoch": 0.9452052708905856, "grad_norm": 0.13831599056720734, "learning_rate": 0.0005, "loss": 2.1066, "step": 248330 }, { "epoch": 0.9452433333587084, "grad_norm": 0.1294797658920288, "learning_rate": 0.0005, "loss": 2.1104, "step": 248340 }, { "epoch": 0.945281395826831, "grad_norm": 0.1410902440547943, "learning_rate": 0.0005, "loss": 2.1107, "step": 248350 }, { "epoch": 0.9453194582949537, "grad_norm": 0.1276555210351944, "learning_rate": 0.0005, "loss": 2.114, "step": 248360 }, { "epoch": 0.9453575207630763, "grad_norm": 0.12058349698781967, "learning_rate": 0.0005, "loss": 2.0932, "step": 248370 }, { "epoch": 0.9453955832311991, "grad_norm": 0.12352532893419266, "learning_rate": 0.0005, "loss": 2.1064, "step": 248380 }, { "epoch": 0.9454336456993218, "grad_norm": 0.13646119832992554, "learning_rate": 0.0005, "loss": 2.1133, "step": 248390 }, { "epoch": 0.9454717081674444, "grad_norm": 0.13688063621520996, "learning_rate": 0.0005, "loss": 2.1084, "step": 248400 }, { "epoch": 0.9455097706355671, "grad_norm": 0.12911947071552277, "learning_rate": 0.0005, "loss": 2.1124, "step": 248410 }, { "epoch": 0.9455478331036897, "grad_norm": 0.12154773622751236, "learning_rate": 0.0005, "loss": 2.1031, "step": 248420 }, { "epoch": 0.9455858955718125, "grad_norm": 0.12716318666934967, "learning_rate": 0.0005, "loss": 2.0912, "step": 248430 }, { "epoch": 0.9456239580399352, "grad_norm": 0.13813437521457672, "learning_rate": 0.0005, "loss": 2.0894, "step": 248440 }, { "epoch": 0.9456620205080578, "grad_norm": 0.14493019878864288, "learning_rate": 0.0005, "loss": 2.1078, "step": 248450 }, { "epoch": 0.9457000829761805, "grad_norm": 0.1254408359527588, "learning_rate": 0.0005, "loss": 2.1012, "step": 248460 }, { "epoch": 0.9457381454443032, "grad_norm": 0.11893883347511292, "learning_rate": 0.0005, "loss": 2.1041, "step": 248470 }, { "epoch": 0.9457762079124259, "grad_norm": 0.12803934514522552, "learning_rate": 0.0005, "loss": 2.1083, "step": 248480 }, { "epoch": 0.9458142703805485, "grad_norm": 0.1335584819316864, "learning_rate": 0.0005, "loss": 2.1051, "step": 248490 }, { "epoch": 0.9458523328486712, "grad_norm": 0.12377738207578659, "learning_rate": 0.0005, "loss": 2.1059, "step": 248500 }, { "epoch": 0.945890395316794, "grad_norm": 0.12790720164775848, "learning_rate": 0.0005, "loss": 2.1145, "step": 248510 }, { "epoch": 0.9459284577849166, "grad_norm": 0.12881170213222504, "learning_rate": 0.0005, "loss": 2.1081, "step": 248520 }, { "epoch": 0.9459665202530393, "grad_norm": 0.12704525887966156, "learning_rate": 0.0005, "loss": 2.1022, "step": 248530 }, { "epoch": 0.946004582721162, "grad_norm": 0.1225334033370018, "learning_rate": 0.0005, "loss": 2.1058, "step": 248540 }, { "epoch": 0.9460426451892846, "grad_norm": 0.1326608657836914, "learning_rate": 0.0005, "loss": 2.1084, "step": 248550 }, { "epoch": 0.9460807076574074, "grad_norm": 0.13305629789829254, "learning_rate": 0.0005, "loss": 2.1014, "step": 248560 }, { "epoch": 0.94611877012553, "grad_norm": 0.1200888454914093, "learning_rate": 0.0005, "loss": 2.1172, "step": 248570 }, { "epoch": 0.9461568325936527, "grad_norm": 0.1219412088394165, "learning_rate": 0.0005, "loss": 2.0992, "step": 248580 }, { "epoch": 0.9461948950617753, "grad_norm": 0.12386654317378998, "learning_rate": 0.0005, "loss": 2.0999, "step": 248590 }, { "epoch": 0.9462329575298981, "grad_norm": 0.13242636620998383, "learning_rate": 0.0005, "loss": 2.1036, "step": 248600 }, { "epoch": 0.9462710199980208, "grad_norm": 0.13440661132335663, "learning_rate": 0.0005, "loss": 2.1085, "step": 248610 }, { "epoch": 0.9463090824661434, "grad_norm": 0.12407206743955612, "learning_rate": 0.0005, "loss": 2.0971, "step": 248620 }, { "epoch": 0.9463471449342661, "grad_norm": 0.1191297248005867, "learning_rate": 0.0005, "loss": 2.1073, "step": 248630 }, { "epoch": 0.9463852074023889, "grad_norm": 0.13571171462535858, "learning_rate": 0.0005, "loss": 2.0961, "step": 248640 }, { "epoch": 0.9464232698705115, "grad_norm": 0.11383175104856491, "learning_rate": 0.0005, "loss": 2.0871, "step": 248650 }, { "epoch": 0.9464613323386342, "grad_norm": 0.11983030289411545, "learning_rate": 0.0005, "loss": 2.0944, "step": 248660 }, { "epoch": 0.9464993948067568, "grad_norm": 0.1278083771467209, "learning_rate": 0.0005, "loss": 2.1011, "step": 248670 }, { "epoch": 0.9465374572748795, "grad_norm": 0.13592961430549622, "learning_rate": 0.0005, "loss": 2.101, "step": 248680 }, { "epoch": 0.9465755197430022, "grad_norm": 0.12375950068235397, "learning_rate": 0.0005, "loss": 2.1153, "step": 248690 }, { "epoch": 0.9466135822111249, "grad_norm": 0.12705379724502563, "learning_rate": 0.0005, "loss": 2.1013, "step": 248700 }, { "epoch": 0.9466516446792476, "grad_norm": 0.1328490674495697, "learning_rate": 0.0005, "loss": 2.1078, "step": 248710 }, { "epoch": 0.9466897071473702, "grad_norm": 0.12585778534412384, "learning_rate": 0.0005, "loss": 2.117, "step": 248720 }, { "epoch": 0.946727769615493, "grad_norm": 0.12267450988292694, "learning_rate": 0.0005, "loss": 2.0945, "step": 248730 }, { "epoch": 0.9467658320836156, "grad_norm": 0.12765011191368103, "learning_rate": 0.0005, "loss": 2.1021, "step": 248740 }, { "epoch": 0.9468038945517383, "grad_norm": 0.1310986429452896, "learning_rate": 0.0005, "loss": 2.1065, "step": 248750 }, { "epoch": 0.946841957019861, "grad_norm": 0.11241777241230011, "learning_rate": 0.0005, "loss": 2.0968, "step": 248760 }, { "epoch": 0.9468800194879837, "grad_norm": 0.12467561662197113, "learning_rate": 0.0005, "loss": 2.0972, "step": 248770 }, { "epoch": 0.9469180819561064, "grad_norm": 0.13519595563411713, "learning_rate": 0.0005, "loss": 2.1063, "step": 248780 }, { "epoch": 0.946956144424229, "grad_norm": 0.12287317216396332, "learning_rate": 0.0005, "loss": 2.1096, "step": 248790 }, { "epoch": 0.9469942068923517, "grad_norm": 0.324131041765213, "learning_rate": 0.0005, "loss": 2.0982, "step": 248800 }, { "epoch": 0.9470322693604745, "grad_norm": 0.15612849593162537, "learning_rate": 0.0005, "loss": 2.1101, "step": 248810 }, { "epoch": 0.9470703318285971, "grad_norm": 0.12411175668239594, "learning_rate": 0.0005, "loss": 2.1011, "step": 248820 }, { "epoch": 0.9471083942967198, "grad_norm": 0.11235556751489639, "learning_rate": 0.0005, "loss": 2.1089, "step": 248830 }, { "epoch": 0.9471464567648424, "grad_norm": 0.12063275277614594, "learning_rate": 0.0005, "loss": 2.1187, "step": 248840 }, { "epoch": 0.9471845192329651, "grad_norm": 0.12278567999601364, "learning_rate": 0.0005, "loss": 2.1022, "step": 248850 }, { "epoch": 0.9472225817010879, "grad_norm": 0.13061478734016418, "learning_rate": 0.0005, "loss": 2.094, "step": 248860 }, { "epoch": 0.9472606441692105, "grad_norm": 0.1256982833147049, "learning_rate": 0.0005, "loss": 2.1104, "step": 248870 }, { "epoch": 0.9472987066373332, "grad_norm": 0.11956235766410828, "learning_rate": 0.0005, "loss": 2.1075, "step": 248880 }, { "epoch": 0.9473367691054558, "grad_norm": 0.11729145050048828, "learning_rate": 0.0005, "loss": 2.1044, "step": 248890 }, { "epoch": 0.9473748315735786, "grad_norm": 0.15000468492507935, "learning_rate": 0.0005, "loss": 2.104, "step": 248900 }, { "epoch": 0.9474128940417013, "grad_norm": 0.12541407346725464, "learning_rate": 0.0005, "loss": 2.0987, "step": 248910 }, { "epoch": 0.9474509565098239, "grad_norm": 0.13246984779834747, "learning_rate": 0.0005, "loss": 2.1019, "step": 248920 }, { "epoch": 0.9474890189779466, "grad_norm": 0.12549960613250732, "learning_rate": 0.0005, "loss": 2.0964, "step": 248930 }, { "epoch": 0.9475270814460693, "grad_norm": 0.1670214980840683, "learning_rate": 0.0005, "loss": 2.1118, "step": 248940 }, { "epoch": 0.947565143914192, "grad_norm": 0.1267949342727661, "learning_rate": 0.0005, "loss": 2.098, "step": 248950 }, { "epoch": 0.9476032063823147, "grad_norm": 0.13572907447814941, "learning_rate": 0.0005, "loss": 2.097, "step": 248960 }, { "epoch": 0.9476412688504373, "grad_norm": 0.1221279725432396, "learning_rate": 0.0005, "loss": 2.1115, "step": 248970 }, { "epoch": 0.94767933131856, "grad_norm": 0.13833478093147278, "learning_rate": 0.0005, "loss": 2.1056, "step": 248980 }, { "epoch": 0.9477173937866827, "grad_norm": 0.1366894692182541, "learning_rate": 0.0005, "loss": 2.1062, "step": 248990 }, { "epoch": 0.9477554562548054, "grad_norm": 0.12488869577646255, "learning_rate": 0.0005, "loss": 2.1104, "step": 249000 }, { "epoch": 0.947793518722928, "grad_norm": 0.12555605173110962, "learning_rate": 0.0005, "loss": 2.116, "step": 249010 }, { "epoch": 0.9478315811910507, "grad_norm": 0.12620453536510468, "learning_rate": 0.0005, "loss": 2.1055, "step": 249020 }, { "epoch": 0.9478696436591735, "grad_norm": 0.11639034003019333, "learning_rate": 0.0005, "loss": 2.0893, "step": 249030 }, { "epoch": 0.9479077061272961, "grad_norm": 0.1186845600605011, "learning_rate": 0.0005, "loss": 2.0902, "step": 249040 }, { "epoch": 0.9479457685954188, "grad_norm": 0.1193389892578125, "learning_rate": 0.0005, "loss": 2.1028, "step": 249050 }, { "epoch": 0.9479838310635414, "grad_norm": 0.12538054585456848, "learning_rate": 0.0005, "loss": 2.1147, "step": 249060 }, { "epoch": 0.9480218935316642, "grad_norm": 0.14422617852687836, "learning_rate": 0.0005, "loss": 2.1313, "step": 249070 }, { "epoch": 0.9480599559997869, "grad_norm": 0.12844279408454895, "learning_rate": 0.0005, "loss": 2.0998, "step": 249080 }, { "epoch": 0.9480980184679095, "grad_norm": 0.14020352065563202, "learning_rate": 0.0005, "loss": 2.1053, "step": 249090 }, { "epoch": 0.9481360809360322, "grad_norm": 0.134184330701828, "learning_rate": 0.0005, "loss": 2.1042, "step": 249100 }, { "epoch": 0.9481741434041548, "grad_norm": 0.1266484558582306, "learning_rate": 0.0005, "loss": 2.1079, "step": 249110 }, { "epoch": 0.9482122058722776, "grad_norm": 0.1303141862154007, "learning_rate": 0.0005, "loss": 2.106, "step": 249120 }, { "epoch": 0.9482502683404003, "grad_norm": 0.135605126619339, "learning_rate": 0.0005, "loss": 2.0991, "step": 249130 }, { "epoch": 0.9482883308085229, "grad_norm": 0.1270579844713211, "learning_rate": 0.0005, "loss": 2.0937, "step": 249140 }, { "epoch": 0.9483263932766456, "grad_norm": 0.13215014338493347, "learning_rate": 0.0005, "loss": 2.1145, "step": 249150 }, { "epoch": 0.9483644557447684, "grad_norm": 0.1274470090866089, "learning_rate": 0.0005, "loss": 2.1004, "step": 249160 }, { "epoch": 0.948402518212891, "grad_norm": 0.12177138775587082, "learning_rate": 0.0005, "loss": 2.1, "step": 249170 }, { "epoch": 0.9484405806810137, "grad_norm": 0.13077914714813232, "learning_rate": 0.0005, "loss": 2.1041, "step": 249180 }, { "epoch": 0.9484786431491363, "grad_norm": 0.12208457291126251, "learning_rate": 0.0005, "loss": 2.1037, "step": 249190 }, { "epoch": 0.9485167056172591, "grad_norm": 0.11963622272014618, "learning_rate": 0.0005, "loss": 2.1094, "step": 249200 }, { "epoch": 0.9485547680853817, "grad_norm": 0.12783189117908478, "learning_rate": 0.0005, "loss": 2.1061, "step": 249210 }, { "epoch": 0.9485928305535044, "grad_norm": 0.12605759501457214, "learning_rate": 0.0005, "loss": 2.1072, "step": 249220 }, { "epoch": 0.9486308930216271, "grad_norm": 0.12036082148551941, "learning_rate": 0.0005, "loss": 2.1177, "step": 249230 }, { "epoch": 0.9486689554897498, "grad_norm": 0.1330842226743698, "learning_rate": 0.0005, "loss": 2.1113, "step": 249240 }, { "epoch": 0.9487070179578725, "grad_norm": 0.12136498838663101, "learning_rate": 0.0005, "loss": 2.1144, "step": 249250 }, { "epoch": 0.9487450804259951, "grad_norm": 0.12630410492420197, "learning_rate": 0.0005, "loss": 2.1001, "step": 249260 }, { "epoch": 0.9487831428941178, "grad_norm": 0.11599517613649368, "learning_rate": 0.0005, "loss": 2.1049, "step": 249270 }, { "epoch": 0.9488212053622405, "grad_norm": 0.12922899425029755, "learning_rate": 0.0005, "loss": 2.101, "step": 249280 }, { "epoch": 0.9488592678303632, "grad_norm": 0.1108672022819519, "learning_rate": 0.0005, "loss": 2.0947, "step": 249290 }, { "epoch": 0.9488973302984859, "grad_norm": 0.12226884067058563, "learning_rate": 0.0005, "loss": 2.0969, "step": 249300 }, { "epoch": 0.9489353927666085, "grad_norm": 0.13119786977767944, "learning_rate": 0.0005, "loss": 2.0973, "step": 249310 }, { "epoch": 0.9489734552347312, "grad_norm": 0.13422635197639465, "learning_rate": 0.0005, "loss": 2.1018, "step": 249320 }, { "epoch": 0.949011517702854, "grad_norm": 0.13358595967292786, "learning_rate": 0.0005, "loss": 2.0927, "step": 249330 }, { "epoch": 0.9490495801709766, "grad_norm": 0.1265745759010315, "learning_rate": 0.0005, "loss": 2.1053, "step": 249340 }, { "epoch": 0.9490876426390993, "grad_norm": 0.12609122693538666, "learning_rate": 0.0005, "loss": 2.1144, "step": 249350 }, { "epoch": 0.9491257051072219, "grad_norm": 0.13447849452495575, "learning_rate": 0.0005, "loss": 2.1172, "step": 249360 }, { "epoch": 0.9491637675753447, "grad_norm": 0.12742824852466583, "learning_rate": 0.0005, "loss": 2.1091, "step": 249370 }, { "epoch": 0.9492018300434674, "grad_norm": 0.13406914472579956, "learning_rate": 0.0005, "loss": 2.1143, "step": 249380 }, { "epoch": 0.94923989251159, "grad_norm": 0.11564747989177704, "learning_rate": 0.0005, "loss": 2.0953, "step": 249390 }, { "epoch": 0.9492779549797127, "grad_norm": 0.1280737817287445, "learning_rate": 0.0005, "loss": 2.1177, "step": 249400 }, { "epoch": 0.9493160174478353, "grad_norm": 0.12426292151212692, "learning_rate": 0.0005, "loss": 2.0874, "step": 249410 }, { "epoch": 0.9493540799159581, "grad_norm": 0.16510789096355438, "learning_rate": 0.0005, "loss": 2.1088, "step": 249420 }, { "epoch": 0.9493921423840808, "grad_norm": 0.12697450816631317, "learning_rate": 0.0005, "loss": 2.1111, "step": 249430 }, { "epoch": 0.9494302048522034, "grad_norm": 0.12335682660341263, "learning_rate": 0.0005, "loss": 2.0911, "step": 249440 }, { "epoch": 0.9494682673203261, "grad_norm": 0.1332320123910904, "learning_rate": 0.0005, "loss": 2.0922, "step": 249450 }, { "epoch": 0.9495063297884488, "grad_norm": 0.1257762312889099, "learning_rate": 0.0005, "loss": 2.1131, "step": 249460 }, { "epoch": 0.9495443922565715, "grad_norm": 0.1309550553560257, "learning_rate": 0.0005, "loss": 2.1085, "step": 249470 }, { "epoch": 0.9495824547246942, "grad_norm": 0.12939979135990143, "learning_rate": 0.0005, "loss": 2.1081, "step": 249480 }, { "epoch": 0.9496205171928168, "grad_norm": 0.14679569005966187, "learning_rate": 0.0005, "loss": 2.0969, "step": 249490 }, { "epoch": 0.9496585796609396, "grad_norm": 0.12528207898139954, "learning_rate": 0.0005, "loss": 2.0869, "step": 249500 }, { "epoch": 0.9496966421290622, "grad_norm": 0.12247055023908615, "learning_rate": 0.0005, "loss": 2.1172, "step": 249510 }, { "epoch": 0.9497347045971849, "grad_norm": 0.12854456901550293, "learning_rate": 0.0005, "loss": 2.0919, "step": 249520 }, { "epoch": 0.9497727670653076, "grad_norm": 0.12855258584022522, "learning_rate": 0.0005, "loss": 2.1202, "step": 249530 }, { "epoch": 0.9498108295334302, "grad_norm": 0.12317433953285217, "learning_rate": 0.0005, "loss": 2.1034, "step": 249540 }, { "epoch": 0.949848892001553, "grad_norm": 0.12889666855335236, "learning_rate": 0.0005, "loss": 2.1098, "step": 249550 }, { "epoch": 0.9498869544696756, "grad_norm": 0.14366105198860168, "learning_rate": 0.0005, "loss": 2.1181, "step": 249560 }, { "epoch": 0.9499250169377983, "grad_norm": 0.12399886548519135, "learning_rate": 0.0005, "loss": 2.0979, "step": 249570 }, { "epoch": 0.949963079405921, "grad_norm": 0.135155588388443, "learning_rate": 0.0005, "loss": 2.1094, "step": 249580 }, { "epoch": 0.9500011418740437, "grad_norm": 0.1358819603919983, "learning_rate": 0.0005, "loss": 2.1038, "step": 249590 }, { "epoch": 0.9500392043421664, "grad_norm": 0.14831727743148804, "learning_rate": 0.0005, "loss": 2.1037, "step": 249600 }, { "epoch": 0.950077266810289, "grad_norm": 0.12076780200004578, "learning_rate": 0.0005, "loss": 2.1058, "step": 249610 }, { "epoch": 0.9501153292784117, "grad_norm": 0.13320599496364594, "learning_rate": 0.0005, "loss": 2.0904, "step": 249620 }, { "epoch": 0.9501533917465345, "grad_norm": 0.1212395578622818, "learning_rate": 0.0005, "loss": 2.0867, "step": 249630 }, { "epoch": 0.9501914542146571, "grad_norm": 0.1279253512620926, "learning_rate": 0.0005, "loss": 2.0957, "step": 249640 }, { "epoch": 0.9502295166827798, "grad_norm": 0.1288677155971527, "learning_rate": 0.0005, "loss": 2.1055, "step": 249650 }, { "epoch": 0.9502675791509024, "grad_norm": 0.13300663232803345, "learning_rate": 0.0005, "loss": 2.1129, "step": 249660 }, { "epoch": 0.9503056416190252, "grad_norm": 0.1199730783700943, "learning_rate": 0.0005, "loss": 2.1016, "step": 249670 }, { "epoch": 0.9503437040871479, "grad_norm": 0.1486423760652542, "learning_rate": 0.0005, "loss": 2.0971, "step": 249680 }, { "epoch": 0.9503817665552705, "grad_norm": 0.12857380509376526, "learning_rate": 0.0005, "loss": 2.1181, "step": 249690 }, { "epoch": 0.9504198290233932, "grad_norm": 0.1329220086336136, "learning_rate": 0.0005, "loss": 2.1051, "step": 249700 }, { "epoch": 0.9504578914915158, "grad_norm": 0.1408364474773407, "learning_rate": 0.0005, "loss": 2.1045, "step": 249710 }, { "epoch": 0.9504959539596386, "grad_norm": 0.12741191685199738, "learning_rate": 0.0005, "loss": 2.0829, "step": 249720 }, { "epoch": 0.9505340164277613, "grad_norm": 0.13314951956272125, "learning_rate": 0.0005, "loss": 2.1102, "step": 249730 }, { "epoch": 0.9505720788958839, "grad_norm": 0.38605743646621704, "learning_rate": 0.0005, "loss": 2.1011, "step": 249740 }, { "epoch": 0.9506101413640066, "grad_norm": 0.13181251287460327, "learning_rate": 0.0005, "loss": 2.1101, "step": 249750 }, { "epoch": 0.9506482038321293, "grad_norm": 0.13556815683841705, "learning_rate": 0.0005, "loss": 2.099, "step": 249760 }, { "epoch": 0.950686266300252, "grad_norm": 0.12179868668317795, "learning_rate": 0.0005, "loss": 2.0837, "step": 249770 }, { "epoch": 0.9507243287683746, "grad_norm": 0.14233249425888062, "learning_rate": 0.0005, "loss": 2.1113, "step": 249780 }, { "epoch": 0.9507623912364973, "grad_norm": 0.13981717824935913, "learning_rate": 0.0005, "loss": 2.1053, "step": 249790 }, { "epoch": 0.9508004537046201, "grad_norm": 0.14350582659244537, "learning_rate": 0.0005, "loss": 2.0909, "step": 249800 }, { "epoch": 0.9508385161727427, "grad_norm": 0.14137966930866241, "learning_rate": 0.0005, "loss": 2.1323, "step": 249810 }, { "epoch": 0.9508765786408654, "grad_norm": 0.1286575198173523, "learning_rate": 0.0005, "loss": 2.1118, "step": 249820 }, { "epoch": 0.950914641108988, "grad_norm": 0.12449745088815689, "learning_rate": 0.0005, "loss": 2.0988, "step": 249830 }, { "epoch": 0.9509527035771107, "grad_norm": 0.12966904044151306, "learning_rate": 0.0005, "loss": 2.0999, "step": 249840 }, { "epoch": 0.9509907660452335, "grad_norm": 0.13560040295124054, "learning_rate": 0.0005, "loss": 2.1054, "step": 249850 }, { "epoch": 0.9510288285133561, "grad_norm": 0.12174062430858612, "learning_rate": 0.0005, "loss": 2.1084, "step": 249860 }, { "epoch": 0.9510668909814788, "grad_norm": 0.12325585633516312, "learning_rate": 0.0005, "loss": 2.1071, "step": 249870 }, { "epoch": 0.9511049534496014, "grad_norm": 0.13168565928936005, "learning_rate": 0.0005, "loss": 2.0978, "step": 249880 }, { "epoch": 0.9511430159177242, "grad_norm": 0.11944977194070816, "learning_rate": 0.0005, "loss": 2.113, "step": 249890 }, { "epoch": 0.9511810783858469, "grad_norm": 0.12736931443214417, "learning_rate": 0.0005, "loss": 2.1085, "step": 249900 }, { "epoch": 0.9512191408539695, "grad_norm": 0.11464428901672363, "learning_rate": 0.0005, "loss": 2.109, "step": 249910 }, { "epoch": 0.9512572033220922, "grad_norm": 0.1317955106496811, "learning_rate": 0.0005, "loss": 2.097, "step": 249920 }, { "epoch": 0.951295265790215, "grad_norm": 0.1277564913034439, "learning_rate": 0.0005, "loss": 2.1075, "step": 249930 }, { "epoch": 0.9513333282583376, "grad_norm": 0.13540315628051758, "learning_rate": 0.0005, "loss": 2.1096, "step": 249940 }, { "epoch": 0.9513713907264603, "grad_norm": 0.13464035093784332, "learning_rate": 0.0005, "loss": 2.0946, "step": 249950 }, { "epoch": 0.9514094531945829, "grad_norm": 0.1361413598060608, "learning_rate": 0.0005, "loss": 2.1157, "step": 249960 }, { "epoch": 0.9514475156627056, "grad_norm": 0.1233205646276474, "learning_rate": 0.0005, "loss": 2.11, "step": 249970 }, { "epoch": 0.9514855781308283, "grad_norm": 0.13336284458637238, "learning_rate": 0.0005, "loss": 2.0963, "step": 249980 }, { "epoch": 0.951523640598951, "grad_norm": 0.12346182018518448, "learning_rate": 0.0005, "loss": 2.1006, "step": 249990 }, { "epoch": 0.9515617030670737, "grad_norm": 0.12522675096988678, "learning_rate": 0.0005, "loss": 2.12, "step": 250000 }, { "epoch": 0.9515997655351963, "grad_norm": 0.12237617373466492, "learning_rate": 0.0005, "loss": 2.0996, "step": 250010 }, { "epoch": 0.9516378280033191, "grad_norm": 0.13073542714118958, "learning_rate": 0.0005, "loss": 2.085, "step": 250020 }, { "epoch": 0.9516758904714417, "grad_norm": 0.13486787676811218, "learning_rate": 0.0005, "loss": 2.1008, "step": 250030 }, { "epoch": 0.9517139529395644, "grad_norm": 0.11351048201322556, "learning_rate": 0.0005, "loss": 2.1068, "step": 250040 }, { "epoch": 0.9517520154076871, "grad_norm": 0.13018272817134857, "learning_rate": 0.0005, "loss": 2.0918, "step": 250050 }, { "epoch": 0.9517900778758098, "grad_norm": 0.12174280732870102, "learning_rate": 0.0005, "loss": 2.1071, "step": 250060 }, { "epoch": 0.9518281403439325, "grad_norm": 0.12217634916305542, "learning_rate": 0.0005, "loss": 2.0947, "step": 250070 }, { "epoch": 0.9518662028120551, "grad_norm": 0.11637187749147415, "learning_rate": 0.0005, "loss": 2.0821, "step": 250080 }, { "epoch": 0.9519042652801778, "grad_norm": 0.13275794684886932, "learning_rate": 0.0005, "loss": 2.095, "step": 250090 }, { "epoch": 0.9519423277483006, "grad_norm": 0.12678100168704987, "learning_rate": 0.0005, "loss": 2.1104, "step": 250100 }, { "epoch": 0.9519803902164232, "grad_norm": 0.12198396027088165, "learning_rate": 0.0005, "loss": 2.1168, "step": 250110 }, { "epoch": 0.9520184526845459, "grad_norm": 0.13225769996643066, "learning_rate": 0.0005, "loss": 2.0756, "step": 250120 }, { "epoch": 0.9520565151526685, "grad_norm": 0.12288650125265121, "learning_rate": 0.0005, "loss": 2.104, "step": 250130 }, { "epoch": 0.9520945776207912, "grad_norm": 0.1239933893084526, "learning_rate": 0.0005, "loss": 2.1034, "step": 250140 }, { "epoch": 0.952132640088914, "grad_norm": 0.12927429378032684, "learning_rate": 0.0005, "loss": 2.1074, "step": 250150 }, { "epoch": 0.9521707025570366, "grad_norm": 0.12449338287115097, "learning_rate": 0.0005, "loss": 2.109, "step": 250160 }, { "epoch": 0.9522087650251593, "grad_norm": 0.11477695405483246, "learning_rate": 0.0005, "loss": 2.102, "step": 250170 }, { "epoch": 0.9522468274932819, "grad_norm": 0.12830223143100739, "learning_rate": 0.0005, "loss": 2.0995, "step": 250180 }, { "epoch": 0.9522848899614047, "grad_norm": 0.12702523171901703, "learning_rate": 0.0005, "loss": 2.0972, "step": 250190 }, { "epoch": 0.9523229524295274, "grad_norm": 0.12919588387012482, "learning_rate": 0.0005, "loss": 2.108, "step": 250200 }, { "epoch": 0.95236101489765, "grad_norm": 0.127348393201828, "learning_rate": 0.0005, "loss": 2.0885, "step": 250210 }, { "epoch": 0.9523990773657727, "grad_norm": 0.11610375344753265, "learning_rate": 0.0005, "loss": 2.0882, "step": 250220 }, { "epoch": 0.9524371398338954, "grad_norm": 0.12946386635303497, "learning_rate": 0.0005, "loss": 2.1101, "step": 250230 }, { "epoch": 0.9524752023020181, "grad_norm": 0.1378440260887146, "learning_rate": 0.0005, "loss": 2.1029, "step": 250240 }, { "epoch": 0.9525132647701408, "grad_norm": 0.13060183823108673, "learning_rate": 0.0005, "loss": 2.1015, "step": 250250 }, { "epoch": 0.9525513272382634, "grad_norm": 0.12115290760993958, "learning_rate": 0.0005, "loss": 2.1038, "step": 250260 }, { "epoch": 0.9525893897063861, "grad_norm": 0.15255451202392578, "learning_rate": 0.0005, "loss": 2.1064, "step": 250270 }, { "epoch": 0.9526274521745088, "grad_norm": 0.13809344172477722, "learning_rate": 0.0005, "loss": 2.1076, "step": 250280 }, { "epoch": 0.9526655146426315, "grad_norm": 0.12919829785823822, "learning_rate": 0.0005, "loss": 2.0889, "step": 250290 }, { "epoch": 0.9527035771107542, "grad_norm": 0.12677544355392456, "learning_rate": 0.0005, "loss": 2.1066, "step": 250300 }, { "epoch": 0.9527416395788768, "grad_norm": 0.13873299956321716, "learning_rate": 0.0005, "loss": 2.0926, "step": 250310 }, { "epoch": 0.9527797020469996, "grad_norm": 0.1411902904510498, "learning_rate": 0.0005, "loss": 2.1008, "step": 250320 }, { "epoch": 0.9528177645151222, "grad_norm": 0.11741477996110916, "learning_rate": 0.0005, "loss": 2.0815, "step": 250330 }, { "epoch": 0.9528558269832449, "grad_norm": 0.13320787250995636, "learning_rate": 0.0005, "loss": 2.0868, "step": 250340 }, { "epoch": 0.9528938894513675, "grad_norm": 0.11807240545749664, "learning_rate": 0.0005, "loss": 2.1052, "step": 250350 }, { "epoch": 0.9529319519194903, "grad_norm": 0.13958625495433807, "learning_rate": 0.0005, "loss": 2.0908, "step": 250360 }, { "epoch": 0.952970014387613, "grad_norm": 0.12476134300231934, "learning_rate": 0.0005, "loss": 2.1189, "step": 250370 }, { "epoch": 0.9530080768557356, "grad_norm": 0.13018640875816345, "learning_rate": 0.0005, "loss": 2.1106, "step": 250380 }, { "epoch": 0.9530461393238583, "grad_norm": 0.12088967114686966, "learning_rate": 0.0005, "loss": 2.1134, "step": 250390 }, { "epoch": 0.953084201791981, "grad_norm": 0.11502324044704437, "learning_rate": 0.0005, "loss": 2.1032, "step": 250400 }, { "epoch": 0.9531222642601037, "grad_norm": 0.12260492146015167, "learning_rate": 0.0005, "loss": 2.1069, "step": 250410 }, { "epoch": 0.9531603267282264, "grad_norm": 0.12876258790493011, "learning_rate": 0.0005, "loss": 2.0924, "step": 250420 }, { "epoch": 0.953198389196349, "grad_norm": 0.13146209716796875, "learning_rate": 0.0005, "loss": 2.1143, "step": 250430 }, { "epoch": 0.9532364516644717, "grad_norm": 0.1314568817615509, "learning_rate": 0.0005, "loss": 2.0992, "step": 250440 }, { "epoch": 0.9532745141325945, "grad_norm": 0.12796203792095184, "learning_rate": 0.0005, "loss": 2.1, "step": 250450 }, { "epoch": 0.9533125766007171, "grad_norm": 0.14050385355949402, "learning_rate": 0.0005, "loss": 2.1035, "step": 250460 }, { "epoch": 0.9533506390688398, "grad_norm": 0.12566983699798584, "learning_rate": 0.0005, "loss": 2.1013, "step": 250470 }, { "epoch": 0.9533887015369624, "grad_norm": 0.13052697479724884, "learning_rate": 0.0005, "loss": 2.1125, "step": 250480 }, { "epoch": 0.9534267640050852, "grad_norm": 0.1295281946659088, "learning_rate": 0.0005, "loss": 2.0874, "step": 250490 }, { "epoch": 0.9534648264732078, "grad_norm": 0.12345805764198303, "learning_rate": 0.0005, "loss": 2.1025, "step": 250500 }, { "epoch": 0.9535028889413305, "grad_norm": 0.14121460914611816, "learning_rate": 0.0005, "loss": 2.114, "step": 250510 }, { "epoch": 0.9535409514094532, "grad_norm": 0.12259361147880554, "learning_rate": 0.0005, "loss": 2.0837, "step": 250520 }, { "epoch": 0.9535790138775759, "grad_norm": 0.1267411708831787, "learning_rate": 0.0005, "loss": 2.1178, "step": 250530 }, { "epoch": 0.9536170763456986, "grad_norm": 0.12196764349937439, "learning_rate": 0.0005, "loss": 2.1178, "step": 250540 }, { "epoch": 0.9536551388138212, "grad_norm": 0.1457517147064209, "learning_rate": 0.0005, "loss": 2.1152, "step": 250550 }, { "epoch": 0.9536932012819439, "grad_norm": 0.1303631216287613, "learning_rate": 0.0005, "loss": 2.0985, "step": 250560 }, { "epoch": 0.9537312637500666, "grad_norm": 0.12143102288246155, "learning_rate": 0.0005, "loss": 2.1066, "step": 250570 }, { "epoch": 0.9537693262181893, "grad_norm": 0.14176031947135925, "learning_rate": 0.0005, "loss": 2.1064, "step": 250580 }, { "epoch": 0.953807388686312, "grad_norm": 0.13000178337097168, "learning_rate": 0.0005, "loss": 2.1093, "step": 250590 }, { "epoch": 0.9538454511544346, "grad_norm": 0.1253688484430313, "learning_rate": 0.0005, "loss": 2.11, "step": 250600 }, { "epoch": 0.9538835136225573, "grad_norm": 0.12691253423690796, "learning_rate": 0.0005, "loss": 2.0735, "step": 250610 }, { "epoch": 0.9539215760906801, "grad_norm": 0.12398819625377655, "learning_rate": 0.0005, "loss": 2.1024, "step": 250620 }, { "epoch": 0.9539596385588027, "grad_norm": 0.12237461656332016, "learning_rate": 0.0005, "loss": 2.1147, "step": 250630 }, { "epoch": 0.9539977010269254, "grad_norm": 0.12592361867427826, "learning_rate": 0.0005, "loss": 2.1065, "step": 250640 }, { "epoch": 0.954035763495048, "grad_norm": 0.1414032280445099, "learning_rate": 0.0005, "loss": 2.1024, "step": 250650 }, { "epoch": 0.9540738259631708, "grad_norm": 0.13277390599250793, "learning_rate": 0.0005, "loss": 2.1083, "step": 250660 }, { "epoch": 0.9541118884312935, "grad_norm": 0.12286174297332764, "learning_rate": 0.0005, "loss": 2.0943, "step": 250670 }, { "epoch": 0.9541499508994161, "grad_norm": 0.11582817882299423, "learning_rate": 0.0005, "loss": 2.0956, "step": 250680 }, { "epoch": 0.9541880133675388, "grad_norm": 0.13498404622077942, "learning_rate": 0.0005, "loss": 2.1024, "step": 250690 }, { "epoch": 0.9542260758356614, "grad_norm": 0.14900074899196625, "learning_rate": 0.0005, "loss": 2.1048, "step": 250700 }, { "epoch": 0.9542641383037842, "grad_norm": 0.12041672319173813, "learning_rate": 0.0005, "loss": 2.1047, "step": 250710 }, { "epoch": 0.9543022007719069, "grad_norm": 0.13749726116657257, "learning_rate": 0.0005, "loss": 2.0916, "step": 250720 }, { "epoch": 0.9543402632400295, "grad_norm": 0.12858223915100098, "learning_rate": 0.0005, "loss": 2.1169, "step": 250730 }, { "epoch": 0.9543783257081522, "grad_norm": 0.14657242596149445, "learning_rate": 0.0005, "loss": 2.0959, "step": 250740 }, { "epoch": 0.9544163881762749, "grad_norm": 0.12545934319496155, "learning_rate": 0.0005, "loss": 2.1024, "step": 250750 }, { "epoch": 0.9544544506443976, "grad_norm": 0.1151088997721672, "learning_rate": 0.0005, "loss": 2.0965, "step": 250760 }, { "epoch": 0.9544925131125203, "grad_norm": 0.13286983966827393, "learning_rate": 0.0005, "loss": 2.1202, "step": 250770 }, { "epoch": 0.9545305755806429, "grad_norm": 0.1329016238451004, "learning_rate": 0.0005, "loss": 2.0976, "step": 250780 }, { "epoch": 0.9545686380487657, "grad_norm": 0.1329929530620575, "learning_rate": 0.0005, "loss": 2.1052, "step": 250790 }, { "epoch": 0.9546067005168883, "grad_norm": 0.1350843906402588, "learning_rate": 0.0005, "loss": 2.0979, "step": 250800 }, { "epoch": 0.954644762985011, "grad_norm": 0.12170596420764923, "learning_rate": 0.0005, "loss": 2.1036, "step": 250810 }, { "epoch": 0.9546828254531337, "grad_norm": 0.12434626370668411, "learning_rate": 0.0005, "loss": 2.1074, "step": 250820 }, { "epoch": 0.9547208879212563, "grad_norm": 0.14334000647068024, "learning_rate": 0.0005, "loss": 2.1035, "step": 250830 }, { "epoch": 0.9547589503893791, "grad_norm": 0.14487650990486145, "learning_rate": 0.0005, "loss": 2.1188, "step": 250840 }, { "epoch": 0.9547970128575017, "grad_norm": 0.12593317031860352, "learning_rate": 0.0005, "loss": 2.1118, "step": 250850 }, { "epoch": 0.9548350753256244, "grad_norm": 0.14340145885944366, "learning_rate": 0.0005, "loss": 2.1, "step": 250860 }, { "epoch": 0.954873137793747, "grad_norm": 0.13837972283363342, "learning_rate": 0.0005, "loss": 2.1082, "step": 250870 }, { "epoch": 0.9549112002618698, "grad_norm": 0.12254208326339722, "learning_rate": 0.0005, "loss": 2.1015, "step": 250880 }, { "epoch": 0.9549492627299925, "grad_norm": 0.12815473973751068, "learning_rate": 0.0005, "loss": 2.0997, "step": 250890 }, { "epoch": 0.9549873251981151, "grad_norm": 0.12106409668922424, "learning_rate": 0.0005, "loss": 2.1084, "step": 250900 }, { "epoch": 0.9550253876662378, "grad_norm": 0.12017683684825897, "learning_rate": 0.0005, "loss": 2.1038, "step": 250910 }, { "epoch": 0.9550634501343606, "grad_norm": 0.12627241015434265, "learning_rate": 0.0005, "loss": 2.1004, "step": 250920 }, { "epoch": 0.9551015126024832, "grad_norm": 0.1460886001586914, "learning_rate": 0.0005, "loss": 2.1058, "step": 250930 }, { "epoch": 0.9551395750706059, "grad_norm": 0.11303062736988068, "learning_rate": 0.0005, "loss": 2.096, "step": 250940 }, { "epoch": 0.9551776375387285, "grad_norm": 0.1309528350830078, "learning_rate": 0.0005, "loss": 2.1002, "step": 250950 }, { "epoch": 0.9552157000068513, "grad_norm": 0.12723702192306519, "learning_rate": 0.0005, "loss": 2.0887, "step": 250960 }, { "epoch": 0.955253762474974, "grad_norm": 0.12411960959434509, "learning_rate": 0.0005, "loss": 2.1121, "step": 250970 }, { "epoch": 0.9552918249430966, "grad_norm": 0.1408742070198059, "learning_rate": 0.0005, "loss": 2.1134, "step": 250980 }, { "epoch": 0.9553298874112193, "grad_norm": 0.12967275083065033, "learning_rate": 0.0005, "loss": 2.1009, "step": 250990 }, { "epoch": 0.9553679498793419, "grad_norm": 0.1268027126789093, "learning_rate": 0.0005, "loss": 2.1118, "step": 251000 }, { "epoch": 0.9554060123474647, "grad_norm": 0.12056492269039154, "learning_rate": 0.0005, "loss": 2.1047, "step": 251010 }, { "epoch": 0.9554440748155874, "grad_norm": 0.11946653574705124, "learning_rate": 0.0005, "loss": 2.1063, "step": 251020 }, { "epoch": 0.95548213728371, "grad_norm": 0.12903720140457153, "learning_rate": 0.0005, "loss": 2.1178, "step": 251030 }, { "epoch": 0.9555201997518327, "grad_norm": 0.1267389953136444, "learning_rate": 0.0005, "loss": 2.1034, "step": 251040 }, { "epoch": 0.9555582622199554, "grad_norm": 0.13117201626300812, "learning_rate": 0.0005, "loss": 2.0986, "step": 251050 }, { "epoch": 0.9555963246880781, "grad_norm": 0.1332862228155136, "learning_rate": 0.0005, "loss": 2.1046, "step": 251060 }, { "epoch": 0.9556343871562007, "grad_norm": 0.12589438259601593, "learning_rate": 0.0005, "loss": 2.0995, "step": 251070 }, { "epoch": 0.9556724496243234, "grad_norm": 0.1279170662164688, "learning_rate": 0.0005, "loss": 2.1022, "step": 251080 }, { "epoch": 0.9557105120924462, "grad_norm": 0.14559973776340485, "learning_rate": 0.0005, "loss": 2.1035, "step": 251090 }, { "epoch": 0.9557485745605688, "grad_norm": 0.13227277994155884, "learning_rate": 0.0005, "loss": 2.103, "step": 251100 }, { "epoch": 0.9557866370286915, "grad_norm": 0.11522812396287918, "learning_rate": 0.0005, "loss": 2.1013, "step": 251110 }, { "epoch": 0.9558246994968141, "grad_norm": 0.12452112138271332, "learning_rate": 0.0005, "loss": 2.1061, "step": 251120 }, { "epoch": 0.9558627619649368, "grad_norm": 0.12298731505870819, "learning_rate": 0.0005, "loss": 2.09, "step": 251130 }, { "epoch": 0.9559008244330596, "grad_norm": 0.1283821314573288, "learning_rate": 0.0005, "loss": 2.0968, "step": 251140 }, { "epoch": 0.9559388869011822, "grad_norm": 0.13891930878162384, "learning_rate": 0.0005, "loss": 2.1116, "step": 251150 }, { "epoch": 0.9559769493693049, "grad_norm": 0.13013017177581787, "learning_rate": 0.0005, "loss": 2.1005, "step": 251160 }, { "epoch": 0.9560150118374275, "grad_norm": 0.1186857670545578, "learning_rate": 0.0005, "loss": 2.1043, "step": 251170 }, { "epoch": 0.9560530743055503, "grad_norm": 0.12192530930042267, "learning_rate": 0.0005, "loss": 2.109, "step": 251180 }, { "epoch": 0.956091136773673, "grad_norm": 0.12259349972009659, "learning_rate": 0.0005, "loss": 2.1232, "step": 251190 }, { "epoch": 0.9561291992417956, "grad_norm": 0.12459017336368561, "learning_rate": 0.0005, "loss": 2.0999, "step": 251200 }, { "epoch": 0.9561672617099183, "grad_norm": 0.11912458389997482, "learning_rate": 0.0005, "loss": 2.0879, "step": 251210 }, { "epoch": 0.956205324178041, "grad_norm": 0.12418442964553833, "learning_rate": 0.0005, "loss": 2.1174, "step": 251220 }, { "epoch": 0.9562433866461637, "grad_norm": 0.13457264006137848, "learning_rate": 0.0005, "loss": 2.103, "step": 251230 }, { "epoch": 0.9562814491142864, "grad_norm": 0.1312561333179474, "learning_rate": 0.0005, "loss": 2.0901, "step": 251240 }, { "epoch": 0.956319511582409, "grad_norm": 0.13382896780967712, "learning_rate": 0.0005, "loss": 2.0959, "step": 251250 }, { "epoch": 0.9563575740505317, "grad_norm": 0.14410291612148285, "learning_rate": 0.0005, "loss": 2.1038, "step": 251260 }, { "epoch": 0.9563956365186544, "grad_norm": 0.13317954540252686, "learning_rate": 0.0005, "loss": 2.1252, "step": 251270 }, { "epoch": 0.9564336989867771, "grad_norm": 0.1316411942243576, "learning_rate": 0.0005, "loss": 2.0885, "step": 251280 }, { "epoch": 0.9564717614548998, "grad_norm": 0.1131930723786354, "learning_rate": 0.0005, "loss": 2.0913, "step": 251290 }, { "epoch": 0.9565098239230224, "grad_norm": 0.11551398038864136, "learning_rate": 0.0005, "loss": 2.1056, "step": 251300 }, { "epoch": 0.9565478863911452, "grad_norm": 0.12596704065799713, "learning_rate": 0.0005, "loss": 2.1022, "step": 251310 }, { "epoch": 0.9565859488592678, "grad_norm": 0.13516521453857422, "learning_rate": 0.0005, "loss": 2.1058, "step": 251320 }, { "epoch": 0.9566240113273905, "grad_norm": 0.12555758655071259, "learning_rate": 0.0005, "loss": 2.1043, "step": 251330 }, { "epoch": 0.9566620737955132, "grad_norm": 0.1379236876964569, "learning_rate": 0.0005, "loss": 2.1108, "step": 251340 }, { "epoch": 0.9567001362636359, "grad_norm": 0.12100891023874283, "learning_rate": 0.0005, "loss": 2.1139, "step": 251350 }, { "epoch": 0.9567381987317586, "grad_norm": 0.12747138738632202, "learning_rate": 0.0005, "loss": 2.0993, "step": 251360 }, { "epoch": 0.9567762611998812, "grad_norm": 0.12468067556619644, "learning_rate": 0.0005, "loss": 2.1049, "step": 251370 }, { "epoch": 0.9568143236680039, "grad_norm": 0.1281834840774536, "learning_rate": 0.0005, "loss": 2.1007, "step": 251380 }, { "epoch": 0.9568523861361267, "grad_norm": 0.14125867187976837, "learning_rate": 0.0005, "loss": 2.1012, "step": 251390 }, { "epoch": 0.9568904486042493, "grad_norm": 0.17697103321552277, "learning_rate": 0.0005, "loss": 2.1086, "step": 251400 }, { "epoch": 0.956928511072372, "grad_norm": 0.12031698226928711, "learning_rate": 0.0005, "loss": 2.09, "step": 251410 }, { "epoch": 0.9569665735404946, "grad_norm": 0.11731946468353271, "learning_rate": 0.0005, "loss": 2.091, "step": 251420 }, { "epoch": 0.9570046360086173, "grad_norm": 0.13509485125541687, "learning_rate": 0.0005, "loss": 2.1095, "step": 251430 }, { "epoch": 0.9570426984767401, "grad_norm": 0.1525716781616211, "learning_rate": 0.0005, "loss": 2.1049, "step": 251440 }, { "epoch": 0.9570807609448627, "grad_norm": 0.12425057590007782, "learning_rate": 0.0005, "loss": 2.0994, "step": 251450 }, { "epoch": 0.9571188234129854, "grad_norm": 0.13297343254089355, "learning_rate": 0.0005, "loss": 2.0951, "step": 251460 }, { "epoch": 0.957156885881108, "grad_norm": 0.13009850680828094, "learning_rate": 0.0005, "loss": 2.0987, "step": 251470 }, { "epoch": 0.9571949483492308, "grad_norm": 0.1400795876979828, "learning_rate": 0.0005, "loss": 2.1099, "step": 251480 }, { "epoch": 0.9572330108173535, "grad_norm": 0.12518252432346344, "learning_rate": 0.0005, "loss": 2.1116, "step": 251490 }, { "epoch": 0.9572710732854761, "grad_norm": 0.1322515457868576, "learning_rate": 0.0005, "loss": 2.099, "step": 251500 }, { "epoch": 0.9573091357535988, "grad_norm": 0.12189028412103653, "learning_rate": 0.0005, "loss": 2.1105, "step": 251510 }, { "epoch": 0.9573471982217215, "grad_norm": 0.12501998245716095, "learning_rate": 0.0005, "loss": 2.0942, "step": 251520 }, { "epoch": 0.9573852606898442, "grad_norm": 0.13441771268844604, "learning_rate": 0.0005, "loss": 2.0733, "step": 251530 }, { "epoch": 0.9574233231579669, "grad_norm": 0.13036592304706573, "learning_rate": 0.0005, "loss": 2.0949, "step": 251540 }, { "epoch": 0.9574613856260895, "grad_norm": 0.12414134293794632, "learning_rate": 0.0005, "loss": 2.0805, "step": 251550 }, { "epoch": 0.9574994480942122, "grad_norm": 0.12663614749908447, "learning_rate": 0.0005, "loss": 2.0944, "step": 251560 }, { "epoch": 0.9575375105623349, "grad_norm": 0.13920724391937256, "learning_rate": 0.0005, "loss": 2.1114, "step": 251570 }, { "epoch": 0.9575755730304576, "grad_norm": 0.3212215006351471, "learning_rate": 0.0005, "loss": 2.0987, "step": 251580 }, { "epoch": 0.9576136354985803, "grad_norm": 0.1401003748178482, "learning_rate": 0.0005, "loss": 2.1007, "step": 251590 }, { "epoch": 0.9576516979667029, "grad_norm": 0.1583040952682495, "learning_rate": 0.0005, "loss": 2.1093, "step": 251600 }, { "epoch": 0.9576897604348257, "grad_norm": 0.13942931592464447, "learning_rate": 0.0005, "loss": 2.0953, "step": 251610 }, { "epoch": 0.9577278229029483, "grad_norm": 0.12837275862693787, "learning_rate": 0.0005, "loss": 2.0995, "step": 251620 }, { "epoch": 0.957765885371071, "grad_norm": 0.12775376439094543, "learning_rate": 0.0005, "loss": 2.1114, "step": 251630 }, { "epoch": 0.9578039478391936, "grad_norm": 0.12998957931995392, "learning_rate": 0.0005, "loss": 2.0986, "step": 251640 }, { "epoch": 0.9578420103073164, "grad_norm": 0.12760597467422485, "learning_rate": 0.0005, "loss": 2.1028, "step": 251650 }, { "epoch": 0.9578800727754391, "grad_norm": 0.12280615419149399, "learning_rate": 0.0005, "loss": 2.099, "step": 251660 }, { "epoch": 0.9579181352435617, "grad_norm": 0.1304440051317215, "learning_rate": 0.0005, "loss": 2.0834, "step": 251670 }, { "epoch": 0.9579561977116844, "grad_norm": 0.1276025027036667, "learning_rate": 0.0005, "loss": 2.1106, "step": 251680 }, { "epoch": 0.957994260179807, "grad_norm": 0.13778327405452728, "learning_rate": 0.0005, "loss": 2.097, "step": 251690 }, { "epoch": 0.9580323226479298, "grad_norm": 0.13166651129722595, "learning_rate": 0.0005, "loss": 2.1152, "step": 251700 }, { "epoch": 0.9580703851160525, "grad_norm": 0.12186914682388306, "learning_rate": 0.0005, "loss": 2.1001, "step": 251710 }, { "epoch": 0.9581084475841751, "grad_norm": 0.13413405418395996, "learning_rate": 0.0005, "loss": 2.1076, "step": 251720 }, { "epoch": 0.9581465100522978, "grad_norm": 0.1275538057088852, "learning_rate": 0.0005, "loss": 2.1032, "step": 251730 }, { "epoch": 0.9581845725204206, "grad_norm": 0.14755627512931824, "learning_rate": 0.0005, "loss": 2.0927, "step": 251740 }, { "epoch": 0.9582226349885432, "grad_norm": 0.12701916694641113, "learning_rate": 0.0005, "loss": 2.1127, "step": 251750 }, { "epoch": 0.9582606974566659, "grad_norm": 0.12311451882123947, "learning_rate": 0.0005, "loss": 2.1054, "step": 251760 }, { "epoch": 0.9582987599247885, "grad_norm": 0.1216728538274765, "learning_rate": 0.0005, "loss": 2.1063, "step": 251770 }, { "epoch": 0.9583368223929113, "grad_norm": 0.1265268474817276, "learning_rate": 0.0005, "loss": 2.116, "step": 251780 }, { "epoch": 0.958374884861034, "grad_norm": 0.12738136947155, "learning_rate": 0.0005, "loss": 2.0876, "step": 251790 }, { "epoch": 0.9584129473291566, "grad_norm": 0.1373567283153534, "learning_rate": 0.0005, "loss": 2.112, "step": 251800 }, { "epoch": 0.9584510097972793, "grad_norm": 0.12077868729829788, "learning_rate": 0.0005, "loss": 2.0954, "step": 251810 }, { "epoch": 0.958489072265402, "grad_norm": 0.1335574835538864, "learning_rate": 0.0005, "loss": 2.1098, "step": 251820 }, { "epoch": 0.9585271347335247, "grad_norm": 0.13100647926330566, "learning_rate": 0.0005, "loss": 2.1068, "step": 251830 }, { "epoch": 0.9585651972016473, "grad_norm": 0.12595607340335846, "learning_rate": 0.0005, "loss": 2.1068, "step": 251840 }, { "epoch": 0.95860325966977, "grad_norm": 0.1261446624994278, "learning_rate": 0.0005, "loss": 2.1106, "step": 251850 }, { "epoch": 0.9586413221378927, "grad_norm": 0.12131789326667786, "learning_rate": 0.0005, "loss": 2.125, "step": 251860 }, { "epoch": 0.9586793846060154, "grad_norm": 0.12954165041446686, "learning_rate": 0.0005, "loss": 2.1045, "step": 251870 }, { "epoch": 0.9587174470741381, "grad_norm": 0.14390873908996582, "learning_rate": 0.0005, "loss": 2.1136, "step": 251880 }, { "epoch": 0.9587555095422607, "grad_norm": 0.13217221200466156, "learning_rate": 0.0005, "loss": 2.0832, "step": 251890 }, { "epoch": 0.9587935720103834, "grad_norm": 0.11833815276622772, "learning_rate": 0.0005, "loss": 2.1054, "step": 251900 }, { "epoch": 0.9588316344785062, "grad_norm": 0.11794855445623398, "learning_rate": 0.0005, "loss": 2.1003, "step": 251910 }, { "epoch": 0.9588696969466288, "grad_norm": 0.14406496286392212, "learning_rate": 0.0005, "loss": 2.095, "step": 251920 }, { "epoch": 0.9589077594147515, "grad_norm": 0.14679089188575745, "learning_rate": 0.0005, "loss": 2.1045, "step": 251930 }, { "epoch": 0.9589458218828741, "grad_norm": 0.13904447853565216, "learning_rate": 0.0005, "loss": 2.1123, "step": 251940 }, { "epoch": 0.9589838843509969, "grad_norm": 0.12721426784992218, "learning_rate": 0.0005, "loss": 2.0897, "step": 251950 }, { "epoch": 0.9590219468191196, "grad_norm": 0.12797985970973969, "learning_rate": 0.0005, "loss": 2.1177, "step": 251960 }, { "epoch": 0.9590600092872422, "grad_norm": 0.12952707707881927, "learning_rate": 0.0005, "loss": 2.1081, "step": 251970 }, { "epoch": 0.9590980717553649, "grad_norm": 0.1243777871131897, "learning_rate": 0.0005, "loss": 2.0988, "step": 251980 }, { "epoch": 0.9591361342234875, "grad_norm": 0.12574772536754608, "learning_rate": 0.0005, "loss": 2.093, "step": 251990 }, { "epoch": 0.9591741966916103, "grad_norm": 0.1342536360025406, "learning_rate": 0.0005, "loss": 2.1098, "step": 252000 }, { "epoch": 0.959212259159733, "grad_norm": 0.1257815659046173, "learning_rate": 0.0005, "loss": 2.1066, "step": 252010 }, { "epoch": 0.9592503216278556, "grad_norm": 0.13442516326904297, "learning_rate": 0.0005, "loss": 2.0896, "step": 252020 }, { "epoch": 0.9592883840959783, "grad_norm": 0.12347886711359024, "learning_rate": 0.0005, "loss": 2.0999, "step": 252030 }, { "epoch": 0.959326446564101, "grad_norm": 0.14267504215240479, "learning_rate": 0.0005, "loss": 2.0846, "step": 252040 }, { "epoch": 0.9593645090322237, "grad_norm": 0.13044388592243195, "learning_rate": 0.0005, "loss": 2.1122, "step": 252050 }, { "epoch": 0.9594025715003464, "grad_norm": 0.1280493289232254, "learning_rate": 0.0005, "loss": 2.0957, "step": 252060 }, { "epoch": 0.959440633968469, "grad_norm": 0.13191363215446472, "learning_rate": 0.0005, "loss": 2.1011, "step": 252070 }, { "epoch": 0.9594786964365918, "grad_norm": 0.13211052119731903, "learning_rate": 0.0005, "loss": 2.0857, "step": 252080 }, { "epoch": 0.9595167589047144, "grad_norm": 0.11889991164207458, "learning_rate": 0.0005, "loss": 2.0901, "step": 252090 }, { "epoch": 0.9595548213728371, "grad_norm": 0.13696099817752838, "learning_rate": 0.0005, "loss": 2.0881, "step": 252100 }, { "epoch": 0.9595928838409598, "grad_norm": 0.13332092761993408, "learning_rate": 0.0005, "loss": 2.0939, "step": 252110 }, { "epoch": 0.9596309463090824, "grad_norm": 0.1378851681947708, "learning_rate": 0.0005, "loss": 2.116, "step": 252120 }, { "epoch": 0.9596690087772052, "grad_norm": 0.1309574693441391, "learning_rate": 0.0005, "loss": 2.1047, "step": 252130 }, { "epoch": 0.9597070712453278, "grad_norm": 0.11649864166975021, "learning_rate": 0.0005, "loss": 2.1094, "step": 252140 }, { "epoch": 0.9597451337134505, "grad_norm": 0.11125954240560532, "learning_rate": 0.0005, "loss": 2.1184, "step": 252150 }, { "epoch": 0.9597831961815732, "grad_norm": 0.1262616515159607, "learning_rate": 0.0005, "loss": 2.0887, "step": 252160 }, { "epoch": 0.9598212586496959, "grad_norm": 0.13262724876403809, "learning_rate": 0.0005, "loss": 2.104, "step": 252170 }, { "epoch": 0.9598593211178186, "grad_norm": 0.1290009468793869, "learning_rate": 0.0005, "loss": 2.0881, "step": 252180 }, { "epoch": 0.9598973835859412, "grad_norm": 0.12596750259399414, "learning_rate": 0.0005, "loss": 2.1015, "step": 252190 }, { "epoch": 0.9599354460540639, "grad_norm": 0.12755310535430908, "learning_rate": 0.0005, "loss": 2.0998, "step": 252200 }, { "epoch": 0.9599735085221867, "grad_norm": 0.11970789730548859, "learning_rate": 0.0005, "loss": 2.1023, "step": 252210 }, { "epoch": 0.9600115709903093, "grad_norm": 0.1304336041212082, "learning_rate": 0.0005, "loss": 2.1106, "step": 252220 }, { "epoch": 0.960049633458432, "grad_norm": 0.12059587985277176, "learning_rate": 0.0005, "loss": 2.1111, "step": 252230 }, { "epoch": 0.9600876959265546, "grad_norm": 0.13717228174209595, "learning_rate": 0.0005, "loss": 2.0955, "step": 252240 }, { "epoch": 0.9601257583946774, "grad_norm": 0.1227838546037674, "learning_rate": 0.0005, "loss": 2.1007, "step": 252250 }, { "epoch": 0.9601638208628, "grad_norm": 0.12704740464687347, "learning_rate": 0.0005, "loss": 2.1005, "step": 252260 }, { "epoch": 0.9602018833309227, "grad_norm": 0.12922018766403198, "learning_rate": 0.0005, "loss": 2.1097, "step": 252270 }, { "epoch": 0.9602399457990454, "grad_norm": 0.12147005647420883, "learning_rate": 0.0005, "loss": 2.0903, "step": 252280 }, { "epoch": 0.960278008267168, "grad_norm": 0.14183133840560913, "learning_rate": 0.0005, "loss": 2.0952, "step": 252290 }, { "epoch": 0.9603160707352908, "grad_norm": 0.1297820508480072, "learning_rate": 0.0005, "loss": 2.1091, "step": 252300 }, { "epoch": 0.9603541332034135, "grad_norm": 0.12243583053350449, "learning_rate": 0.0005, "loss": 2.0976, "step": 252310 }, { "epoch": 0.9603921956715361, "grad_norm": 0.1582963615655899, "learning_rate": 0.0005, "loss": 2.098, "step": 252320 }, { "epoch": 0.9604302581396588, "grad_norm": 0.11770734190940857, "learning_rate": 0.0005, "loss": 2.11, "step": 252330 }, { "epoch": 0.9604683206077815, "grad_norm": 0.14207595586776733, "learning_rate": 0.0005, "loss": 2.1037, "step": 252340 }, { "epoch": 0.9605063830759042, "grad_norm": 0.13186196982860565, "learning_rate": 0.0005, "loss": 2.1103, "step": 252350 }, { "epoch": 0.9605444455440268, "grad_norm": 0.1168905720114708, "learning_rate": 0.0005, "loss": 2.096, "step": 252360 }, { "epoch": 0.9605825080121495, "grad_norm": 0.12174654006958008, "learning_rate": 0.0005, "loss": 2.0896, "step": 252370 }, { "epoch": 0.9606205704802723, "grad_norm": 0.11918246001005173, "learning_rate": 0.0005, "loss": 2.1093, "step": 252380 }, { "epoch": 0.9606586329483949, "grad_norm": 0.13458192348480225, "learning_rate": 0.0005, "loss": 2.0968, "step": 252390 }, { "epoch": 0.9606966954165176, "grad_norm": 0.14331522583961487, "learning_rate": 0.0005, "loss": 2.1062, "step": 252400 }, { "epoch": 0.9607347578846402, "grad_norm": 0.13765034079551697, "learning_rate": 0.0005, "loss": 2.103, "step": 252410 }, { "epoch": 0.9607728203527629, "grad_norm": 0.12519164383411407, "learning_rate": 0.0005, "loss": 2.1021, "step": 252420 }, { "epoch": 0.9608108828208857, "grad_norm": 0.1264149695634842, "learning_rate": 0.0005, "loss": 2.1114, "step": 252430 }, { "epoch": 0.9608489452890083, "grad_norm": 0.122474804520607, "learning_rate": 0.0005, "loss": 2.1011, "step": 252440 }, { "epoch": 0.960887007757131, "grad_norm": 0.1438497006893158, "learning_rate": 0.0005, "loss": 2.0948, "step": 252450 }, { "epoch": 0.9609250702252536, "grad_norm": 0.14879588782787323, "learning_rate": 0.0005, "loss": 2.0969, "step": 252460 }, { "epoch": 0.9609631326933764, "grad_norm": 0.12623494863510132, "learning_rate": 0.0005, "loss": 2.1011, "step": 252470 }, { "epoch": 0.9610011951614991, "grad_norm": 0.123557910323143, "learning_rate": 0.0005, "loss": 2.0947, "step": 252480 }, { "epoch": 0.9610392576296217, "grad_norm": 0.12479705363512039, "learning_rate": 0.0005, "loss": 2.1036, "step": 252490 }, { "epoch": 0.9610773200977444, "grad_norm": 0.12509945034980774, "learning_rate": 0.0005, "loss": 2.1003, "step": 252500 }, { "epoch": 0.9611153825658671, "grad_norm": 0.13864733278751373, "learning_rate": 0.0005, "loss": 2.1095, "step": 252510 }, { "epoch": 0.9611534450339898, "grad_norm": 0.13315652310848236, "learning_rate": 0.0005, "loss": 2.1071, "step": 252520 }, { "epoch": 0.9611915075021125, "grad_norm": 0.12884598970413208, "learning_rate": 0.0005, "loss": 2.105, "step": 252530 }, { "epoch": 0.9612295699702351, "grad_norm": 0.12486692517995834, "learning_rate": 0.0005, "loss": 2.1053, "step": 252540 }, { "epoch": 0.9612676324383578, "grad_norm": 0.14396265149116516, "learning_rate": 0.0005, "loss": 2.0992, "step": 252550 }, { "epoch": 0.9613056949064805, "grad_norm": 0.13405755162239075, "learning_rate": 0.0005, "loss": 2.1064, "step": 252560 }, { "epoch": 0.9613437573746032, "grad_norm": 0.12350235134363174, "learning_rate": 0.0005, "loss": 2.1074, "step": 252570 }, { "epoch": 0.9613818198427259, "grad_norm": 0.12725189328193665, "learning_rate": 0.0005, "loss": 2.1084, "step": 252580 }, { "epoch": 0.9614198823108485, "grad_norm": 0.13886277377605438, "learning_rate": 0.0005, "loss": 2.1094, "step": 252590 }, { "epoch": 0.9614579447789713, "grad_norm": 0.14337188005447388, "learning_rate": 0.0005, "loss": 2.1127, "step": 252600 }, { "epoch": 0.9614960072470939, "grad_norm": 0.14677828550338745, "learning_rate": 0.0005, "loss": 2.1115, "step": 252610 }, { "epoch": 0.9615340697152166, "grad_norm": 0.12397907674312592, "learning_rate": 0.0005, "loss": 2.0889, "step": 252620 }, { "epoch": 0.9615721321833393, "grad_norm": 0.13534249365329742, "learning_rate": 0.0005, "loss": 2.1007, "step": 252630 }, { "epoch": 0.961610194651462, "grad_norm": 0.12256189435720444, "learning_rate": 0.0005, "loss": 2.1039, "step": 252640 }, { "epoch": 0.9616482571195847, "grad_norm": 0.13429589569568634, "learning_rate": 0.0005, "loss": 2.1116, "step": 252650 }, { "epoch": 0.9616863195877073, "grad_norm": 0.13483218848705292, "learning_rate": 0.0005, "loss": 2.0942, "step": 252660 }, { "epoch": 0.96172438205583, "grad_norm": 0.13158905506134033, "learning_rate": 0.0005, "loss": 2.1111, "step": 252670 }, { "epoch": 0.9617624445239528, "grad_norm": 0.12446098774671555, "learning_rate": 0.0005, "loss": 2.0948, "step": 252680 }, { "epoch": 0.9618005069920754, "grad_norm": 0.14150193333625793, "learning_rate": 0.0005, "loss": 2.1104, "step": 252690 }, { "epoch": 0.9618385694601981, "grad_norm": 0.13725963234901428, "learning_rate": 0.0005, "loss": 2.1063, "step": 252700 }, { "epoch": 0.9618766319283207, "grad_norm": 0.12844370305538177, "learning_rate": 0.0005, "loss": 2.1062, "step": 252710 }, { "epoch": 0.9619146943964434, "grad_norm": 0.12074138969182968, "learning_rate": 0.0005, "loss": 2.1057, "step": 252720 }, { "epoch": 0.9619527568645662, "grad_norm": 0.13764441013336182, "learning_rate": 0.0005, "loss": 2.1045, "step": 252730 }, { "epoch": 0.9619908193326888, "grad_norm": 0.11331465095281601, "learning_rate": 0.0005, "loss": 2.1046, "step": 252740 }, { "epoch": 0.9620288818008115, "grad_norm": 0.1203673928976059, "learning_rate": 0.0005, "loss": 2.1041, "step": 252750 }, { "epoch": 0.9620669442689341, "grad_norm": 0.13739249110221863, "learning_rate": 0.0005, "loss": 2.1038, "step": 252760 }, { "epoch": 0.9621050067370569, "grad_norm": 0.1379999965429306, "learning_rate": 0.0005, "loss": 2.1108, "step": 252770 }, { "epoch": 0.9621430692051796, "grad_norm": 0.13139371573925018, "learning_rate": 0.0005, "loss": 2.1026, "step": 252780 }, { "epoch": 0.9621811316733022, "grad_norm": 0.12953905761241913, "learning_rate": 0.0005, "loss": 2.0802, "step": 252790 }, { "epoch": 0.9622191941414249, "grad_norm": 0.11608421057462692, "learning_rate": 0.0005, "loss": 2.1026, "step": 252800 }, { "epoch": 0.9622572566095476, "grad_norm": 0.14513848721981049, "learning_rate": 0.0005, "loss": 2.1069, "step": 252810 }, { "epoch": 0.9622953190776703, "grad_norm": 0.13420742750167847, "learning_rate": 0.0005, "loss": 2.1121, "step": 252820 }, { "epoch": 0.962333381545793, "grad_norm": 0.11324869096279144, "learning_rate": 0.0005, "loss": 2.1062, "step": 252830 }, { "epoch": 0.9623714440139156, "grad_norm": 0.12924343347549438, "learning_rate": 0.0005, "loss": 2.1018, "step": 252840 }, { "epoch": 0.9624095064820383, "grad_norm": 0.14046475291252136, "learning_rate": 0.0005, "loss": 2.1061, "step": 252850 }, { "epoch": 0.962447568950161, "grad_norm": 0.13469430804252625, "learning_rate": 0.0005, "loss": 2.0985, "step": 252860 }, { "epoch": 0.9624856314182837, "grad_norm": 0.1296015977859497, "learning_rate": 0.0005, "loss": 2.0947, "step": 252870 }, { "epoch": 0.9625236938864064, "grad_norm": 0.12833398580551147, "learning_rate": 0.0005, "loss": 2.0935, "step": 252880 }, { "epoch": 0.962561756354529, "grad_norm": 0.13369536399841309, "learning_rate": 0.0005, "loss": 2.0997, "step": 252890 }, { "epoch": 0.9625998188226518, "grad_norm": 0.14273881912231445, "learning_rate": 0.0005, "loss": 2.1012, "step": 252900 }, { "epoch": 0.9626378812907744, "grad_norm": 0.1325225681066513, "learning_rate": 0.0005, "loss": 2.0974, "step": 252910 }, { "epoch": 0.9626759437588971, "grad_norm": 0.13713519275188446, "learning_rate": 0.0005, "loss": 2.1037, "step": 252920 }, { "epoch": 0.9627140062270197, "grad_norm": 0.12612666189670563, "learning_rate": 0.0005, "loss": 2.0957, "step": 252930 }, { "epoch": 0.9627520686951425, "grad_norm": 0.11918192356824875, "learning_rate": 0.0005, "loss": 2.1125, "step": 252940 }, { "epoch": 0.9627901311632652, "grad_norm": 0.12291216105222702, "learning_rate": 0.0005, "loss": 2.1067, "step": 252950 }, { "epoch": 0.9628281936313878, "grad_norm": 0.13093940913677216, "learning_rate": 0.0005, "loss": 2.1106, "step": 252960 }, { "epoch": 0.9628662560995105, "grad_norm": 0.11661577969789505, "learning_rate": 0.0005, "loss": 2.1099, "step": 252970 }, { "epoch": 0.9629043185676333, "grad_norm": 0.13057559728622437, "learning_rate": 0.0005, "loss": 2.0996, "step": 252980 }, { "epoch": 0.9629423810357559, "grad_norm": 0.13503484427928925, "learning_rate": 0.0005, "loss": 2.1006, "step": 252990 }, { "epoch": 0.9629804435038786, "grad_norm": 0.14270184934139252, "learning_rate": 0.0005, "loss": 2.0977, "step": 253000 }, { "epoch": 0.9630185059720012, "grad_norm": 0.13299335539340973, "learning_rate": 0.0005, "loss": 2.1092, "step": 253010 }, { "epoch": 0.9630565684401239, "grad_norm": 0.12235192954540253, "learning_rate": 0.0005, "loss": 2.1008, "step": 253020 }, { "epoch": 0.9630946309082467, "grad_norm": 0.125530406832695, "learning_rate": 0.0005, "loss": 2.1036, "step": 253030 }, { "epoch": 0.9631326933763693, "grad_norm": 0.15567998588085175, "learning_rate": 0.0005, "loss": 2.1096, "step": 253040 }, { "epoch": 0.963170755844492, "grad_norm": 0.13265834748744965, "learning_rate": 0.0005, "loss": 2.1032, "step": 253050 }, { "epoch": 0.9632088183126146, "grad_norm": 0.1267433613538742, "learning_rate": 0.0005, "loss": 2.0917, "step": 253060 }, { "epoch": 0.9632468807807374, "grad_norm": 0.24809212982654572, "learning_rate": 0.0005, "loss": 2.0975, "step": 253070 }, { "epoch": 0.96328494324886, "grad_norm": 0.13036325573921204, "learning_rate": 0.0005, "loss": 2.1048, "step": 253080 }, { "epoch": 0.9633230057169827, "grad_norm": 0.12346664816141129, "learning_rate": 0.0005, "loss": 2.1207, "step": 253090 }, { "epoch": 0.9633610681851054, "grad_norm": 0.13149957358837128, "learning_rate": 0.0005, "loss": 2.111, "step": 253100 }, { "epoch": 0.9633991306532281, "grad_norm": 0.12425676733255386, "learning_rate": 0.0005, "loss": 2.0961, "step": 253110 }, { "epoch": 0.9634371931213508, "grad_norm": 0.12317558377981186, "learning_rate": 0.0005, "loss": 2.1083, "step": 253120 }, { "epoch": 0.9634752555894734, "grad_norm": 0.126312255859375, "learning_rate": 0.0005, "loss": 2.0861, "step": 253130 }, { "epoch": 0.9635133180575961, "grad_norm": 0.13709893822669983, "learning_rate": 0.0005, "loss": 2.0907, "step": 253140 }, { "epoch": 0.9635513805257188, "grad_norm": 0.13229143619537354, "learning_rate": 0.0005, "loss": 2.0939, "step": 253150 }, { "epoch": 0.9635894429938415, "grad_norm": 0.13252483308315277, "learning_rate": 0.0005, "loss": 2.0892, "step": 253160 }, { "epoch": 0.9636275054619642, "grad_norm": 0.12838731706142426, "learning_rate": 0.0005, "loss": 2.0912, "step": 253170 }, { "epoch": 0.9636655679300868, "grad_norm": 0.13725703954696655, "learning_rate": 0.0005, "loss": 2.0939, "step": 253180 }, { "epoch": 0.9637036303982095, "grad_norm": 0.12590914964675903, "learning_rate": 0.0005, "loss": 2.1061, "step": 253190 }, { "epoch": 0.9637416928663323, "grad_norm": 0.1262999176979065, "learning_rate": 0.0005, "loss": 2.108, "step": 253200 }, { "epoch": 0.9637797553344549, "grad_norm": 0.1354227513074875, "learning_rate": 0.0005, "loss": 2.0905, "step": 253210 }, { "epoch": 0.9638178178025776, "grad_norm": 0.13120831549167633, "learning_rate": 0.0005, "loss": 2.0871, "step": 253220 }, { "epoch": 0.9638558802707002, "grad_norm": 0.12905097007751465, "learning_rate": 0.0005, "loss": 2.1037, "step": 253230 }, { "epoch": 0.963893942738823, "grad_norm": 0.12586815655231476, "learning_rate": 0.0005, "loss": 2.109, "step": 253240 }, { "epoch": 0.9639320052069457, "grad_norm": 0.12380609661340714, "learning_rate": 0.0005, "loss": 2.1068, "step": 253250 }, { "epoch": 0.9639700676750683, "grad_norm": 0.13315235078334808, "learning_rate": 0.0005, "loss": 2.0977, "step": 253260 }, { "epoch": 0.964008130143191, "grad_norm": 0.12807630002498627, "learning_rate": 0.0005, "loss": 2.1072, "step": 253270 }, { "epoch": 0.9640461926113136, "grad_norm": 0.12301483005285263, "learning_rate": 0.0005, "loss": 2.107, "step": 253280 }, { "epoch": 0.9640842550794364, "grad_norm": 0.12575186789035797, "learning_rate": 0.0005, "loss": 2.1132, "step": 253290 }, { "epoch": 0.9641223175475591, "grad_norm": 0.1165456622838974, "learning_rate": 0.0005, "loss": 2.1141, "step": 253300 }, { "epoch": 0.9641603800156817, "grad_norm": 0.13741955161094666, "learning_rate": 0.0005, "loss": 2.0996, "step": 253310 }, { "epoch": 0.9641984424838044, "grad_norm": 0.12666748464107513, "learning_rate": 0.0005, "loss": 2.1145, "step": 253320 }, { "epoch": 0.9642365049519271, "grad_norm": 0.12276309728622437, "learning_rate": 0.0005, "loss": 2.1224, "step": 253330 }, { "epoch": 0.9642745674200498, "grad_norm": 0.13581430912017822, "learning_rate": 0.0005, "loss": 2.1112, "step": 253340 }, { "epoch": 0.9643126298881725, "grad_norm": 0.7123417854309082, "learning_rate": 0.0005, "loss": 2.0961, "step": 253350 }, { "epoch": 0.9643506923562951, "grad_norm": 0.1316785216331482, "learning_rate": 0.0005, "loss": 2.1045, "step": 253360 }, { "epoch": 0.9643887548244179, "grad_norm": 0.11734849959611893, "learning_rate": 0.0005, "loss": 2.1039, "step": 253370 }, { "epoch": 0.9644268172925405, "grad_norm": 0.1331537514925003, "learning_rate": 0.0005, "loss": 2.1075, "step": 253380 }, { "epoch": 0.9644648797606632, "grad_norm": 0.1352957934141159, "learning_rate": 0.0005, "loss": 2.0968, "step": 253390 }, { "epoch": 0.9645029422287859, "grad_norm": 0.12489691376686096, "learning_rate": 0.0005, "loss": 2.0979, "step": 253400 }, { "epoch": 0.9645410046969086, "grad_norm": 0.12582498788833618, "learning_rate": 0.0005, "loss": 2.1205, "step": 253410 }, { "epoch": 0.9645790671650313, "grad_norm": 0.11681034415960312, "learning_rate": 0.0005, "loss": 2.1028, "step": 253420 }, { "epoch": 0.9646171296331539, "grad_norm": 0.12178188562393188, "learning_rate": 0.0005, "loss": 2.0971, "step": 253430 }, { "epoch": 0.9646551921012766, "grad_norm": 0.13658106327056885, "learning_rate": 0.0005, "loss": 2.096, "step": 253440 }, { "epoch": 0.9646932545693993, "grad_norm": 0.13959579169750214, "learning_rate": 0.0005, "loss": 2.1042, "step": 253450 }, { "epoch": 0.964731317037522, "grad_norm": 0.1278458833694458, "learning_rate": 0.0005, "loss": 2.1072, "step": 253460 }, { "epoch": 0.9647693795056447, "grad_norm": 0.13757051527500153, "learning_rate": 0.0005, "loss": 2.1154, "step": 253470 }, { "epoch": 0.9648074419737673, "grad_norm": 0.12881222367286682, "learning_rate": 0.0005, "loss": 2.1, "step": 253480 }, { "epoch": 0.96484550444189, "grad_norm": 0.1218414306640625, "learning_rate": 0.0005, "loss": 2.0919, "step": 253490 }, { "epoch": 0.9648835669100128, "grad_norm": 0.12052089720964432, "learning_rate": 0.0005, "loss": 2.1018, "step": 253500 }, { "epoch": 0.9649216293781354, "grad_norm": 0.14013579487800598, "learning_rate": 0.0005, "loss": 2.0872, "step": 253510 }, { "epoch": 0.9649596918462581, "grad_norm": 0.1209578886628151, "learning_rate": 0.0005, "loss": 2.1122, "step": 253520 }, { "epoch": 0.9649977543143807, "grad_norm": 0.12182780355215073, "learning_rate": 0.0005, "loss": 2.114, "step": 253530 }, { "epoch": 0.9650358167825035, "grad_norm": 0.13168083131313324, "learning_rate": 0.0005, "loss": 2.0986, "step": 253540 }, { "epoch": 0.9650738792506262, "grad_norm": 0.13472476601600647, "learning_rate": 0.0005, "loss": 2.0966, "step": 253550 }, { "epoch": 0.9651119417187488, "grad_norm": 0.1423880010843277, "learning_rate": 0.0005, "loss": 2.1011, "step": 253560 }, { "epoch": 0.9651500041868715, "grad_norm": 0.13923941552639008, "learning_rate": 0.0005, "loss": 2.0933, "step": 253570 }, { "epoch": 0.9651880666549941, "grad_norm": 0.12023203074932098, "learning_rate": 0.0005, "loss": 2.0855, "step": 253580 }, { "epoch": 0.9652261291231169, "grad_norm": 0.12728914618492126, "learning_rate": 0.0005, "loss": 2.0936, "step": 253590 }, { "epoch": 0.9652641915912396, "grad_norm": 0.13072176277637482, "learning_rate": 0.0005, "loss": 2.0938, "step": 253600 }, { "epoch": 0.9653022540593622, "grad_norm": 0.12164273858070374, "learning_rate": 0.0005, "loss": 2.1051, "step": 253610 }, { "epoch": 0.9653403165274849, "grad_norm": 0.14329518377780914, "learning_rate": 0.0005, "loss": 2.0966, "step": 253620 }, { "epoch": 0.9653783789956076, "grad_norm": 0.13620802760124207, "learning_rate": 0.0005, "loss": 2.1011, "step": 253630 }, { "epoch": 0.9654164414637303, "grad_norm": 0.13148976862430573, "learning_rate": 0.0005, "loss": 2.1179, "step": 253640 }, { "epoch": 0.965454503931853, "grad_norm": 0.19821810722351074, "learning_rate": 0.0005, "loss": 2.1049, "step": 253650 }, { "epoch": 0.9654925663999756, "grad_norm": 0.1319485753774643, "learning_rate": 0.0005, "loss": 2.1143, "step": 253660 }, { "epoch": 0.9655306288680984, "grad_norm": 0.1330292969942093, "learning_rate": 0.0005, "loss": 2.1015, "step": 253670 }, { "epoch": 0.965568691336221, "grad_norm": 0.12223409116268158, "learning_rate": 0.0005, "loss": 2.0926, "step": 253680 }, { "epoch": 0.9656067538043437, "grad_norm": 0.12201818823814392, "learning_rate": 0.0005, "loss": 2.0748, "step": 253690 }, { "epoch": 0.9656448162724663, "grad_norm": 0.1322978287935257, "learning_rate": 0.0005, "loss": 2.0986, "step": 253700 }, { "epoch": 0.965682878740589, "grad_norm": 0.1483135223388672, "learning_rate": 0.0005, "loss": 2.1028, "step": 253710 }, { "epoch": 0.9657209412087118, "grad_norm": 0.1406850814819336, "learning_rate": 0.0005, "loss": 2.0886, "step": 253720 }, { "epoch": 0.9657590036768344, "grad_norm": 0.13997170329093933, "learning_rate": 0.0005, "loss": 2.1102, "step": 253730 }, { "epoch": 0.9657970661449571, "grad_norm": 0.12795639038085938, "learning_rate": 0.0005, "loss": 2.0969, "step": 253740 }, { "epoch": 0.9658351286130797, "grad_norm": 0.1439167857170105, "learning_rate": 0.0005, "loss": 2.0977, "step": 253750 }, { "epoch": 0.9658731910812025, "grad_norm": 0.12860262393951416, "learning_rate": 0.0005, "loss": 2.1072, "step": 253760 }, { "epoch": 0.9659112535493252, "grad_norm": 0.11895686388015747, "learning_rate": 0.0005, "loss": 2.1015, "step": 253770 }, { "epoch": 0.9659493160174478, "grad_norm": 0.1427023708820343, "learning_rate": 0.0005, "loss": 2.1077, "step": 253780 }, { "epoch": 0.9659873784855705, "grad_norm": 0.13326308131217957, "learning_rate": 0.0005, "loss": 2.0997, "step": 253790 }, { "epoch": 0.9660254409536932, "grad_norm": 0.12903240323066711, "learning_rate": 0.0005, "loss": 2.1192, "step": 253800 }, { "epoch": 0.9660635034218159, "grad_norm": 0.1381160020828247, "learning_rate": 0.0005, "loss": 2.1097, "step": 253810 }, { "epoch": 0.9661015658899386, "grad_norm": 0.12535077333450317, "learning_rate": 0.0005, "loss": 2.0907, "step": 253820 }, { "epoch": 0.9661396283580612, "grad_norm": 0.1265016347169876, "learning_rate": 0.0005, "loss": 2.1151, "step": 253830 }, { "epoch": 0.966177690826184, "grad_norm": 0.12111052125692368, "learning_rate": 0.0005, "loss": 2.1203, "step": 253840 }, { "epoch": 0.9662157532943066, "grad_norm": 0.13855448365211487, "learning_rate": 0.0005, "loss": 2.1037, "step": 253850 }, { "epoch": 0.9662538157624293, "grad_norm": 0.1279250979423523, "learning_rate": 0.0005, "loss": 2.1122, "step": 253860 }, { "epoch": 0.966291878230552, "grad_norm": 0.1239514946937561, "learning_rate": 0.0005, "loss": 2.1065, "step": 253870 }, { "epoch": 0.9663299406986746, "grad_norm": 0.11943173408508301, "learning_rate": 0.0005, "loss": 2.1046, "step": 253880 }, { "epoch": 0.9663680031667974, "grad_norm": 0.1287466436624527, "learning_rate": 0.0005, "loss": 2.1045, "step": 253890 }, { "epoch": 0.96640606563492, "grad_norm": 0.11794771999120712, "learning_rate": 0.0005, "loss": 2.1018, "step": 253900 }, { "epoch": 0.9664441281030427, "grad_norm": 0.1384105086326599, "learning_rate": 0.0005, "loss": 2.0973, "step": 253910 }, { "epoch": 0.9664821905711654, "grad_norm": 0.12583598494529724, "learning_rate": 0.0005, "loss": 2.0939, "step": 253920 }, { "epoch": 0.9665202530392881, "grad_norm": 0.1272657811641693, "learning_rate": 0.0005, "loss": 2.1159, "step": 253930 }, { "epoch": 0.9665583155074108, "grad_norm": 0.1401505172252655, "learning_rate": 0.0005, "loss": 2.1001, "step": 253940 }, { "epoch": 0.9665963779755334, "grad_norm": 0.13038720190525055, "learning_rate": 0.0005, "loss": 2.1041, "step": 253950 }, { "epoch": 0.9666344404436561, "grad_norm": 0.12374894320964813, "learning_rate": 0.0005, "loss": 2.1053, "step": 253960 }, { "epoch": 0.9666725029117789, "grad_norm": 0.12792405486106873, "learning_rate": 0.0005, "loss": 2.0947, "step": 253970 }, { "epoch": 0.9667105653799015, "grad_norm": 0.1282413899898529, "learning_rate": 0.0005, "loss": 2.1038, "step": 253980 }, { "epoch": 0.9667486278480242, "grad_norm": 0.1365712434053421, "learning_rate": 0.0005, "loss": 2.1017, "step": 253990 }, { "epoch": 0.9667866903161468, "grad_norm": 0.1373814046382904, "learning_rate": 0.0005, "loss": 2.0883, "step": 254000 }, { "epoch": 0.9668247527842695, "grad_norm": 0.1197550892829895, "learning_rate": 0.0005, "loss": 2.1055, "step": 254010 }, { "epoch": 0.9668628152523923, "grad_norm": 0.13073822855949402, "learning_rate": 0.0005, "loss": 2.1036, "step": 254020 }, { "epoch": 0.9669008777205149, "grad_norm": 0.13818205893039703, "learning_rate": 0.0005, "loss": 2.1046, "step": 254030 }, { "epoch": 0.9669389401886376, "grad_norm": 0.12917038798332214, "learning_rate": 0.0005, "loss": 2.117, "step": 254040 }, { "epoch": 0.9669770026567602, "grad_norm": 0.13809436559677124, "learning_rate": 0.0005, "loss": 2.1106, "step": 254050 }, { "epoch": 0.967015065124883, "grad_norm": 0.13827873766422272, "learning_rate": 0.0005, "loss": 2.0976, "step": 254060 }, { "epoch": 0.9670531275930057, "grad_norm": 0.12173813581466675, "learning_rate": 0.0005, "loss": 2.1217, "step": 254070 }, { "epoch": 0.9670911900611283, "grad_norm": 0.12423427402973175, "learning_rate": 0.0005, "loss": 2.1051, "step": 254080 }, { "epoch": 0.967129252529251, "grad_norm": 0.12945401668548584, "learning_rate": 0.0005, "loss": 2.0869, "step": 254090 }, { "epoch": 0.9671673149973737, "grad_norm": 0.14030809700489044, "learning_rate": 0.0005, "loss": 2.0967, "step": 254100 }, { "epoch": 0.9672053774654964, "grad_norm": 0.14602228999137878, "learning_rate": 0.0005, "loss": 2.1087, "step": 254110 }, { "epoch": 0.967243439933619, "grad_norm": 0.11788609623908997, "learning_rate": 0.0005, "loss": 2.0985, "step": 254120 }, { "epoch": 0.9672815024017417, "grad_norm": 0.1414944976568222, "learning_rate": 0.0005, "loss": 2.103, "step": 254130 }, { "epoch": 0.9673195648698644, "grad_norm": 0.13184891641139984, "learning_rate": 0.0005, "loss": 2.1069, "step": 254140 }, { "epoch": 0.9673576273379871, "grad_norm": 0.1220046728849411, "learning_rate": 0.0005, "loss": 2.0957, "step": 254150 }, { "epoch": 0.9673956898061098, "grad_norm": 0.12934859097003937, "learning_rate": 0.0005, "loss": 2.0952, "step": 254160 }, { "epoch": 0.9674337522742325, "grad_norm": 0.13587023317813873, "learning_rate": 0.0005, "loss": 2.1004, "step": 254170 }, { "epoch": 0.9674718147423551, "grad_norm": 0.13337966799736023, "learning_rate": 0.0005, "loss": 2.0932, "step": 254180 }, { "epoch": 0.9675098772104779, "grad_norm": 0.11864124983549118, "learning_rate": 0.0005, "loss": 2.1067, "step": 254190 }, { "epoch": 0.9675479396786005, "grad_norm": 0.1444726586341858, "learning_rate": 0.0005, "loss": 2.1063, "step": 254200 }, { "epoch": 0.9675860021467232, "grad_norm": 0.1322479099035263, "learning_rate": 0.0005, "loss": 2.1044, "step": 254210 }, { "epoch": 0.9676240646148458, "grad_norm": 0.13444893062114716, "learning_rate": 0.0005, "loss": 2.1099, "step": 254220 }, { "epoch": 0.9676621270829686, "grad_norm": 0.14414022862911224, "learning_rate": 0.0005, "loss": 2.1184, "step": 254230 }, { "epoch": 0.9677001895510913, "grad_norm": 0.12338683009147644, "learning_rate": 0.0005, "loss": 2.0885, "step": 254240 }, { "epoch": 0.9677382520192139, "grad_norm": 0.12411805987358093, "learning_rate": 0.0005, "loss": 2.0986, "step": 254250 }, { "epoch": 0.9677763144873366, "grad_norm": 0.14127685129642487, "learning_rate": 0.0005, "loss": 2.1122, "step": 254260 }, { "epoch": 0.9678143769554594, "grad_norm": 0.12266780436038971, "learning_rate": 0.0005, "loss": 2.1026, "step": 254270 }, { "epoch": 0.967852439423582, "grad_norm": 0.12482955306768417, "learning_rate": 0.0005, "loss": 2.1065, "step": 254280 }, { "epoch": 0.9678905018917047, "grad_norm": 0.12805862724781036, "learning_rate": 0.0005, "loss": 2.1001, "step": 254290 }, { "epoch": 0.9679285643598273, "grad_norm": 0.15698951482772827, "learning_rate": 0.0005, "loss": 2.1077, "step": 254300 }, { "epoch": 0.96796662682795, "grad_norm": 0.13735942542552948, "learning_rate": 0.0005, "loss": 2.1004, "step": 254310 }, { "epoch": 0.9680046892960728, "grad_norm": 0.12623661756515503, "learning_rate": 0.0005, "loss": 2.0795, "step": 254320 }, { "epoch": 0.9680427517641954, "grad_norm": 0.13633745908737183, "learning_rate": 0.0005, "loss": 2.0998, "step": 254330 }, { "epoch": 0.9680808142323181, "grad_norm": 0.12903860211372375, "learning_rate": 0.0005, "loss": 2.0983, "step": 254340 }, { "epoch": 0.9681188767004407, "grad_norm": 0.12975889444351196, "learning_rate": 0.0005, "loss": 2.1125, "step": 254350 }, { "epoch": 0.9681569391685635, "grad_norm": 0.12580707669258118, "learning_rate": 0.0005, "loss": 2.1001, "step": 254360 }, { "epoch": 0.9681950016366861, "grad_norm": 0.13266442716121674, "learning_rate": 0.0005, "loss": 2.0895, "step": 254370 }, { "epoch": 0.9682330641048088, "grad_norm": 0.13872191309928894, "learning_rate": 0.0005, "loss": 2.1022, "step": 254380 }, { "epoch": 0.9682711265729315, "grad_norm": 0.12552133202552795, "learning_rate": 0.0005, "loss": 2.0975, "step": 254390 }, { "epoch": 0.9683091890410542, "grad_norm": 0.12063095718622208, "learning_rate": 0.0005, "loss": 2.0861, "step": 254400 }, { "epoch": 0.9683472515091769, "grad_norm": 0.13410405814647675, "learning_rate": 0.0005, "loss": 2.1049, "step": 254410 }, { "epoch": 0.9683853139772995, "grad_norm": 0.12568983435630798, "learning_rate": 0.0005, "loss": 2.1096, "step": 254420 }, { "epoch": 0.9684233764454222, "grad_norm": 0.12332507967948914, "learning_rate": 0.0005, "loss": 2.1037, "step": 254430 }, { "epoch": 0.9684614389135449, "grad_norm": 0.11628450453281403, "learning_rate": 0.0005, "loss": 2.0986, "step": 254440 }, { "epoch": 0.9684995013816676, "grad_norm": 0.13699153065681458, "learning_rate": 0.0005, "loss": 2.097, "step": 254450 }, { "epoch": 0.9685375638497903, "grad_norm": 0.1462022215127945, "learning_rate": 0.0005, "loss": 2.0915, "step": 254460 }, { "epoch": 0.9685756263179129, "grad_norm": 0.1329510509967804, "learning_rate": 0.0005, "loss": 2.1062, "step": 254470 }, { "epoch": 0.9686136887860356, "grad_norm": 0.1387140452861786, "learning_rate": 0.0005, "loss": 2.0968, "step": 254480 }, { "epoch": 0.9686517512541584, "grad_norm": 0.11960545927286148, "learning_rate": 0.0005, "loss": 2.1025, "step": 254490 }, { "epoch": 0.968689813722281, "grad_norm": 0.1454916149377823, "learning_rate": 0.0005, "loss": 2.1077, "step": 254500 }, { "epoch": 0.9687278761904037, "grad_norm": 0.12710386514663696, "learning_rate": 0.0005, "loss": 2.0936, "step": 254510 }, { "epoch": 0.9687659386585263, "grad_norm": 0.13227415084838867, "learning_rate": 0.0005, "loss": 2.1251, "step": 254520 }, { "epoch": 0.9688040011266491, "grad_norm": 0.12211448699235916, "learning_rate": 0.0005, "loss": 2.0956, "step": 254530 }, { "epoch": 0.9688420635947718, "grad_norm": 0.12017636746168137, "learning_rate": 0.0005, "loss": 2.0948, "step": 254540 }, { "epoch": 0.9688801260628944, "grad_norm": 0.12308043986558914, "learning_rate": 0.0005, "loss": 2.1094, "step": 254550 }, { "epoch": 0.9689181885310171, "grad_norm": 0.1376669704914093, "learning_rate": 0.0005, "loss": 2.0883, "step": 254560 }, { "epoch": 0.9689562509991397, "grad_norm": 0.13200579583644867, "learning_rate": 0.0005, "loss": 2.1023, "step": 254570 }, { "epoch": 0.9689943134672625, "grad_norm": 0.13734455406665802, "learning_rate": 0.0005, "loss": 2.1043, "step": 254580 }, { "epoch": 0.9690323759353852, "grad_norm": 0.12204215675592422, "learning_rate": 0.0005, "loss": 2.0854, "step": 254590 }, { "epoch": 0.9690704384035078, "grad_norm": 0.13001269102096558, "learning_rate": 0.0005, "loss": 2.1021, "step": 254600 }, { "epoch": 0.9691085008716305, "grad_norm": 0.11727369576692581, "learning_rate": 0.0005, "loss": 2.0948, "step": 254610 }, { "epoch": 0.9691465633397532, "grad_norm": 0.13287939131259918, "learning_rate": 0.0005, "loss": 2.1055, "step": 254620 }, { "epoch": 0.9691846258078759, "grad_norm": 0.11994440853595734, "learning_rate": 0.0005, "loss": 2.1109, "step": 254630 }, { "epoch": 0.9692226882759986, "grad_norm": 0.13018877804279327, "learning_rate": 0.0005, "loss": 2.0963, "step": 254640 }, { "epoch": 0.9692607507441212, "grad_norm": 0.12774427235126495, "learning_rate": 0.0005, "loss": 2.1035, "step": 254650 }, { "epoch": 0.969298813212244, "grad_norm": 0.11620493233203888, "learning_rate": 0.0005, "loss": 2.1038, "step": 254660 }, { "epoch": 0.9693368756803666, "grad_norm": 0.13221842050552368, "learning_rate": 0.0005, "loss": 2.1146, "step": 254670 }, { "epoch": 0.9693749381484893, "grad_norm": 0.1154850572347641, "learning_rate": 0.0005, "loss": 2.1077, "step": 254680 }, { "epoch": 0.969413000616612, "grad_norm": 0.1304425150156021, "learning_rate": 0.0005, "loss": 2.0954, "step": 254690 }, { "epoch": 0.9694510630847347, "grad_norm": 0.11422496289014816, "learning_rate": 0.0005, "loss": 2.0953, "step": 254700 }, { "epoch": 0.9694891255528574, "grad_norm": 0.12208788841962814, "learning_rate": 0.0005, "loss": 2.1081, "step": 254710 }, { "epoch": 0.96952718802098, "grad_norm": 0.14281286299228668, "learning_rate": 0.0005, "loss": 2.1073, "step": 254720 }, { "epoch": 0.9695652504891027, "grad_norm": 0.12920963764190674, "learning_rate": 0.0005, "loss": 2.0878, "step": 254730 }, { "epoch": 0.9696033129572253, "grad_norm": 0.139565572142601, "learning_rate": 0.0005, "loss": 2.0963, "step": 254740 }, { "epoch": 0.9696413754253481, "grad_norm": 0.14187435805797577, "learning_rate": 0.0005, "loss": 2.1109, "step": 254750 }, { "epoch": 0.9696794378934708, "grad_norm": 0.1298711597919464, "learning_rate": 0.0005, "loss": 2.1269, "step": 254760 }, { "epoch": 0.9697175003615934, "grad_norm": 0.12016897648572922, "learning_rate": 0.0005, "loss": 2.0986, "step": 254770 }, { "epoch": 0.9697555628297161, "grad_norm": 0.13304503262043, "learning_rate": 0.0005, "loss": 2.1018, "step": 254780 }, { "epoch": 0.9697936252978389, "grad_norm": 0.13235604763031006, "learning_rate": 0.0005, "loss": 2.0974, "step": 254790 }, { "epoch": 0.9698316877659615, "grad_norm": 0.1423034816980362, "learning_rate": 0.0005, "loss": 2.1051, "step": 254800 }, { "epoch": 0.9698697502340842, "grad_norm": 0.13728663325309753, "learning_rate": 0.0005, "loss": 2.0886, "step": 254810 }, { "epoch": 0.9699078127022068, "grad_norm": 0.12065032124519348, "learning_rate": 0.0005, "loss": 2.1058, "step": 254820 }, { "epoch": 0.9699458751703296, "grad_norm": 0.11827119439840317, "learning_rate": 0.0005, "loss": 2.1181, "step": 254830 }, { "epoch": 0.9699839376384523, "grad_norm": 0.1292085349559784, "learning_rate": 0.0005, "loss": 2.1087, "step": 254840 }, { "epoch": 0.9700220001065749, "grad_norm": 0.12470462918281555, "learning_rate": 0.0005, "loss": 2.1157, "step": 254850 }, { "epoch": 0.9700600625746976, "grad_norm": 0.1269422173500061, "learning_rate": 0.0005, "loss": 2.1037, "step": 254860 }, { "epoch": 0.9700981250428202, "grad_norm": 0.1338520497083664, "learning_rate": 0.0005, "loss": 2.0934, "step": 254870 }, { "epoch": 0.970136187510943, "grad_norm": 0.11972904205322266, "learning_rate": 0.0005, "loss": 2.1087, "step": 254880 }, { "epoch": 0.9701742499790657, "grad_norm": 0.1291525512933731, "learning_rate": 0.0005, "loss": 2.097, "step": 254890 }, { "epoch": 0.9702123124471883, "grad_norm": 0.14123444259166718, "learning_rate": 0.0005, "loss": 2.1106, "step": 254900 }, { "epoch": 0.970250374915311, "grad_norm": 0.1271352916955948, "learning_rate": 0.0005, "loss": 2.109, "step": 254910 }, { "epoch": 0.9702884373834337, "grad_norm": 0.12444078922271729, "learning_rate": 0.0005, "loss": 2.1071, "step": 254920 }, { "epoch": 0.9703264998515564, "grad_norm": 0.11825793236494064, "learning_rate": 0.0005, "loss": 2.0996, "step": 254930 }, { "epoch": 0.970364562319679, "grad_norm": 0.21717515587806702, "learning_rate": 0.0005, "loss": 2.116, "step": 254940 }, { "epoch": 0.9704026247878017, "grad_norm": 0.12200090289115906, "learning_rate": 0.0005, "loss": 2.1098, "step": 254950 }, { "epoch": 0.9704406872559245, "grad_norm": 0.13350056111812592, "learning_rate": 0.0005, "loss": 2.1092, "step": 254960 }, { "epoch": 0.9704787497240471, "grad_norm": 0.13846023380756378, "learning_rate": 0.0005, "loss": 2.079, "step": 254970 }, { "epoch": 0.9705168121921698, "grad_norm": 0.13522903621196747, "learning_rate": 0.0005, "loss": 2.105, "step": 254980 }, { "epoch": 0.9705548746602924, "grad_norm": 0.12254560738801956, "learning_rate": 0.0005, "loss": 2.1038, "step": 254990 }, { "epoch": 0.9705929371284151, "grad_norm": 0.12815439701080322, "learning_rate": 0.0005, "loss": 2.1073, "step": 255000 }, { "epoch": 0.9706309995965379, "grad_norm": 0.13445810973644257, "learning_rate": 0.0005, "loss": 2.1042, "step": 255010 }, { "epoch": 0.9706690620646605, "grad_norm": 0.13216426968574524, "learning_rate": 0.0005, "loss": 2.1021, "step": 255020 }, { "epoch": 0.9707071245327832, "grad_norm": 0.12756191194057465, "learning_rate": 0.0005, "loss": 2.1066, "step": 255030 }, { "epoch": 0.9707451870009058, "grad_norm": 0.1667054444551468, "learning_rate": 0.0005, "loss": 2.11, "step": 255040 }, { "epoch": 0.9707832494690286, "grad_norm": 0.12336678802967072, "learning_rate": 0.0005, "loss": 2.1008, "step": 255050 }, { "epoch": 0.9708213119371513, "grad_norm": 0.12533093988895416, "learning_rate": 0.0005, "loss": 2.0936, "step": 255060 }, { "epoch": 0.9708593744052739, "grad_norm": 0.13144513964653015, "learning_rate": 0.0005, "loss": 2.1049, "step": 255070 }, { "epoch": 0.9708974368733966, "grad_norm": 0.12879325449466705, "learning_rate": 0.0005, "loss": 2.1056, "step": 255080 }, { "epoch": 0.9709354993415193, "grad_norm": 0.12825438380241394, "learning_rate": 0.0005, "loss": 2.0877, "step": 255090 }, { "epoch": 0.970973561809642, "grad_norm": 0.13372738659381866, "learning_rate": 0.0005, "loss": 2.1045, "step": 255100 }, { "epoch": 0.9710116242777647, "grad_norm": 0.1324717253446579, "learning_rate": 0.0005, "loss": 2.1106, "step": 255110 }, { "epoch": 0.9710496867458873, "grad_norm": 0.14835681021213531, "learning_rate": 0.0005, "loss": 2.1078, "step": 255120 }, { "epoch": 0.9710877492140101, "grad_norm": 0.13331232964992523, "learning_rate": 0.0005, "loss": 2.1067, "step": 255130 }, { "epoch": 0.9711258116821327, "grad_norm": 0.12525902688503265, "learning_rate": 0.0005, "loss": 2.107, "step": 255140 }, { "epoch": 0.9711638741502554, "grad_norm": 0.125664621591568, "learning_rate": 0.0005, "loss": 2.1051, "step": 255150 }, { "epoch": 0.9712019366183781, "grad_norm": 0.12486566603183746, "learning_rate": 0.0005, "loss": 2.0942, "step": 255160 }, { "epoch": 0.9712399990865007, "grad_norm": 0.14651164412498474, "learning_rate": 0.0005, "loss": 2.0772, "step": 255170 }, { "epoch": 0.9712780615546235, "grad_norm": 0.13397106528282166, "learning_rate": 0.0005, "loss": 2.0997, "step": 255180 }, { "epoch": 0.9713161240227461, "grad_norm": 0.12492938339710236, "learning_rate": 0.0005, "loss": 2.0929, "step": 255190 }, { "epoch": 0.9713541864908688, "grad_norm": 0.1352192461490631, "learning_rate": 0.0005, "loss": 2.1015, "step": 255200 }, { "epoch": 0.9713922489589915, "grad_norm": 0.1237134337425232, "learning_rate": 0.0005, "loss": 2.0918, "step": 255210 }, { "epoch": 0.9714303114271142, "grad_norm": 0.12343886494636536, "learning_rate": 0.0005, "loss": 2.1087, "step": 255220 }, { "epoch": 0.9714683738952369, "grad_norm": 0.1212475448846817, "learning_rate": 0.0005, "loss": 2.0963, "step": 255230 }, { "epoch": 0.9715064363633595, "grad_norm": 0.13539819419384003, "learning_rate": 0.0005, "loss": 2.1005, "step": 255240 }, { "epoch": 0.9715444988314822, "grad_norm": 0.13426490128040314, "learning_rate": 0.0005, "loss": 2.1194, "step": 255250 }, { "epoch": 0.971582561299605, "grad_norm": 0.13717344403266907, "learning_rate": 0.0005, "loss": 2.1006, "step": 255260 }, { "epoch": 0.9716206237677276, "grad_norm": 0.12221307307481766, "learning_rate": 0.0005, "loss": 2.1114, "step": 255270 }, { "epoch": 0.9716586862358503, "grad_norm": 0.13117346167564392, "learning_rate": 0.0005, "loss": 2.0914, "step": 255280 }, { "epoch": 0.9716967487039729, "grad_norm": 0.13155390322208405, "learning_rate": 0.0005, "loss": 2.1129, "step": 255290 }, { "epoch": 0.9717348111720956, "grad_norm": 0.1376398205757141, "learning_rate": 0.0005, "loss": 2.104, "step": 255300 }, { "epoch": 0.9717728736402184, "grad_norm": 0.12305990606546402, "learning_rate": 0.0005, "loss": 2.1019, "step": 255310 }, { "epoch": 0.971810936108341, "grad_norm": 0.11314533650875092, "learning_rate": 0.0005, "loss": 2.1048, "step": 255320 }, { "epoch": 0.9718489985764637, "grad_norm": 0.13132306933403015, "learning_rate": 0.0005, "loss": 2.0983, "step": 255330 }, { "epoch": 0.9718870610445863, "grad_norm": 0.11744492501020432, "learning_rate": 0.0005, "loss": 2.0982, "step": 255340 }, { "epoch": 0.9719251235127091, "grad_norm": 0.12644986808300018, "learning_rate": 0.0005, "loss": 2.1055, "step": 255350 }, { "epoch": 0.9719631859808318, "grad_norm": 0.1316252052783966, "learning_rate": 0.0005, "loss": 2.116, "step": 255360 }, { "epoch": 0.9720012484489544, "grad_norm": 0.12514221668243408, "learning_rate": 0.0005, "loss": 2.11, "step": 255370 }, { "epoch": 0.9720393109170771, "grad_norm": 0.14039510488510132, "learning_rate": 0.0005, "loss": 2.093, "step": 255380 }, { "epoch": 0.9720773733851998, "grad_norm": 0.11962796747684479, "learning_rate": 0.0005, "loss": 2.0862, "step": 255390 }, { "epoch": 0.9721154358533225, "grad_norm": 0.15293170511722565, "learning_rate": 0.0005, "loss": 2.119, "step": 255400 }, { "epoch": 0.9721534983214452, "grad_norm": 0.15546253323554993, "learning_rate": 0.0005, "loss": 2.112, "step": 255410 }, { "epoch": 0.9721915607895678, "grad_norm": 0.1338612586259842, "learning_rate": 0.0005, "loss": 2.1022, "step": 255420 }, { "epoch": 0.9722296232576905, "grad_norm": 0.1300361603498459, "learning_rate": 0.0005, "loss": 2.1109, "step": 255430 }, { "epoch": 0.9722676857258132, "grad_norm": 0.12052548676729202, "learning_rate": 0.0005, "loss": 2.1214, "step": 255440 }, { "epoch": 0.9723057481939359, "grad_norm": 0.1294412463903427, "learning_rate": 0.0005, "loss": 2.1124, "step": 255450 }, { "epoch": 0.9723438106620585, "grad_norm": 0.12222734093666077, "learning_rate": 0.0005, "loss": 2.0998, "step": 255460 }, { "epoch": 0.9723818731301812, "grad_norm": 0.12707626819610596, "learning_rate": 0.0005, "loss": 2.0934, "step": 255470 }, { "epoch": 0.972419935598304, "grad_norm": 0.13399982452392578, "learning_rate": 0.0005, "loss": 2.0904, "step": 255480 }, { "epoch": 0.9724579980664266, "grad_norm": 0.13506804406642914, "learning_rate": 0.0005, "loss": 2.106, "step": 255490 }, { "epoch": 0.9724960605345493, "grad_norm": 0.12390710413455963, "learning_rate": 0.0005, "loss": 2.1014, "step": 255500 }, { "epoch": 0.972534123002672, "grad_norm": 0.12795017659664154, "learning_rate": 0.0005, "loss": 2.0959, "step": 255510 }, { "epoch": 0.9725721854707947, "grad_norm": 0.12792456150054932, "learning_rate": 0.0005, "loss": 2.1079, "step": 255520 }, { "epoch": 0.9726102479389174, "grad_norm": 0.13905619084835052, "learning_rate": 0.0005, "loss": 2.1041, "step": 255530 }, { "epoch": 0.97264831040704, "grad_norm": 0.13615982234477997, "learning_rate": 0.0005, "loss": 2.1096, "step": 255540 }, { "epoch": 0.9726863728751627, "grad_norm": 0.13537728786468506, "learning_rate": 0.0005, "loss": 2.1119, "step": 255550 }, { "epoch": 0.9727244353432855, "grad_norm": 0.1334761083126068, "learning_rate": 0.0005, "loss": 2.0939, "step": 255560 }, { "epoch": 0.9727624978114081, "grad_norm": 0.12943799793720245, "learning_rate": 0.0005, "loss": 2.0996, "step": 255570 }, { "epoch": 0.9728005602795308, "grad_norm": 0.13745969533920288, "learning_rate": 0.0005, "loss": 2.1078, "step": 255580 }, { "epoch": 0.9728386227476534, "grad_norm": 0.12578512728214264, "learning_rate": 0.0005, "loss": 2.1024, "step": 255590 }, { "epoch": 0.9728766852157761, "grad_norm": 0.12996302545070648, "learning_rate": 0.0005, "loss": 2.1121, "step": 255600 }, { "epoch": 0.9729147476838989, "grad_norm": 0.1376398652791977, "learning_rate": 0.0005, "loss": 2.1122, "step": 255610 }, { "epoch": 0.9729528101520215, "grad_norm": 0.12311550974845886, "learning_rate": 0.0005, "loss": 2.1122, "step": 255620 }, { "epoch": 0.9729908726201442, "grad_norm": 0.12581251561641693, "learning_rate": 0.0005, "loss": 2.1075, "step": 255630 }, { "epoch": 0.9730289350882668, "grad_norm": 0.13663913309574127, "learning_rate": 0.0005, "loss": 2.1153, "step": 255640 }, { "epoch": 0.9730669975563896, "grad_norm": 0.11359215527772903, "learning_rate": 0.0005, "loss": 2.1072, "step": 255650 }, { "epoch": 0.9731050600245122, "grad_norm": 0.12716831266880035, "learning_rate": 0.0005, "loss": 2.1028, "step": 255660 }, { "epoch": 0.9731431224926349, "grad_norm": 0.1292915642261505, "learning_rate": 0.0005, "loss": 2.1037, "step": 255670 }, { "epoch": 0.9731811849607576, "grad_norm": 0.13148799538612366, "learning_rate": 0.0005, "loss": 2.1084, "step": 255680 }, { "epoch": 0.9732192474288803, "grad_norm": 0.1377020627260208, "learning_rate": 0.0005, "loss": 2.107, "step": 255690 }, { "epoch": 0.973257309897003, "grad_norm": 0.1219463124871254, "learning_rate": 0.0005, "loss": 2.0958, "step": 255700 }, { "epoch": 0.9732953723651256, "grad_norm": 0.12844008207321167, "learning_rate": 0.0005, "loss": 2.1124, "step": 255710 }, { "epoch": 0.9733334348332483, "grad_norm": 0.13267263770103455, "learning_rate": 0.0005, "loss": 2.1149, "step": 255720 }, { "epoch": 0.973371497301371, "grad_norm": 0.1315184086561203, "learning_rate": 0.0005, "loss": 2.1089, "step": 255730 }, { "epoch": 0.9734095597694937, "grad_norm": 0.2425239086151123, "learning_rate": 0.0005, "loss": 2.109, "step": 255740 }, { "epoch": 0.9734476222376164, "grad_norm": 0.12007585912942886, "learning_rate": 0.0005, "loss": 2.113, "step": 255750 }, { "epoch": 0.973485684705739, "grad_norm": 0.1381920874118805, "learning_rate": 0.0005, "loss": 2.0868, "step": 255760 }, { "epoch": 0.9735237471738617, "grad_norm": 0.1297314167022705, "learning_rate": 0.0005, "loss": 2.0955, "step": 255770 }, { "epoch": 0.9735618096419845, "grad_norm": 0.13172182440757751, "learning_rate": 0.0005, "loss": 2.0912, "step": 255780 }, { "epoch": 0.9735998721101071, "grad_norm": 0.12290968745946884, "learning_rate": 0.0005, "loss": 2.0875, "step": 255790 }, { "epoch": 0.9736379345782298, "grad_norm": 0.13406983017921448, "learning_rate": 0.0005, "loss": 2.1213, "step": 255800 }, { "epoch": 0.9736759970463524, "grad_norm": 0.6703738570213318, "learning_rate": 0.0005, "loss": 2.0934, "step": 255810 }, { "epoch": 0.9737140595144752, "grad_norm": 0.13410161435604095, "learning_rate": 0.0005, "loss": 2.1113, "step": 255820 }, { "epoch": 0.9737521219825979, "grad_norm": 0.13230681419372559, "learning_rate": 0.0005, "loss": 2.1086, "step": 255830 }, { "epoch": 0.9737901844507205, "grad_norm": 0.13530078530311584, "learning_rate": 0.0005, "loss": 2.096, "step": 255840 }, { "epoch": 0.9738282469188432, "grad_norm": 0.12839284539222717, "learning_rate": 0.0005, "loss": 2.1086, "step": 255850 }, { "epoch": 0.9738663093869658, "grad_norm": 0.11974814534187317, "learning_rate": 0.0005, "loss": 2.0928, "step": 255860 }, { "epoch": 0.9739043718550886, "grad_norm": 0.15206706523895264, "learning_rate": 0.0005, "loss": 2.0834, "step": 255870 }, { "epoch": 0.9739424343232113, "grad_norm": 0.12297140806913376, "learning_rate": 0.0005, "loss": 2.1059, "step": 255880 }, { "epoch": 0.9739804967913339, "grad_norm": 0.12922076880931854, "learning_rate": 0.0005, "loss": 2.1042, "step": 255890 }, { "epoch": 0.9740185592594566, "grad_norm": 0.12766559422016144, "learning_rate": 0.0005, "loss": 2.0981, "step": 255900 }, { "epoch": 0.9740566217275793, "grad_norm": 0.13466320931911469, "learning_rate": 0.0005, "loss": 2.105, "step": 255910 }, { "epoch": 0.974094684195702, "grad_norm": 0.1484542340040207, "learning_rate": 0.0005, "loss": 2.0963, "step": 255920 }, { "epoch": 0.9741327466638247, "grad_norm": 0.12738987803459167, "learning_rate": 0.0005, "loss": 2.0969, "step": 255930 }, { "epoch": 0.9741708091319473, "grad_norm": 0.12110450118780136, "learning_rate": 0.0005, "loss": 2.09, "step": 255940 }, { "epoch": 0.9742088716000701, "grad_norm": 0.12763312458992004, "learning_rate": 0.0005, "loss": 2.0964, "step": 255950 }, { "epoch": 0.9742469340681927, "grad_norm": 0.12843506038188934, "learning_rate": 0.0005, "loss": 2.0991, "step": 255960 }, { "epoch": 0.9742849965363154, "grad_norm": 0.13136166334152222, "learning_rate": 0.0005, "loss": 2.0912, "step": 255970 }, { "epoch": 0.974323059004438, "grad_norm": 0.13471366465091705, "learning_rate": 0.0005, "loss": 2.1052, "step": 255980 }, { "epoch": 0.9743611214725608, "grad_norm": 0.12480809539556503, "learning_rate": 0.0005, "loss": 2.1054, "step": 255990 }, { "epoch": 0.9743991839406835, "grad_norm": 0.1274518221616745, "learning_rate": 0.0005, "loss": 2.096, "step": 256000 }, { "epoch": 0.9744372464088061, "grad_norm": 0.12159202992916107, "learning_rate": 0.0005, "loss": 2.1118, "step": 256010 }, { "epoch": 0.9744753088769288, "grad_norm": 0.12531977891921997, "learning_rate": 0.0005, "loss": 2.1038, "step": 256020 }, { "epoch": 0.9745133713450514, "grad_norm": 0.15594400465488434, "learning_rate": 0.0005, "loss": 2.1049, "step": 256030 }, { "epoch": 0.9745514338131742, "grad_norm": 0.16119563579559326, "learning_rate": 0.0005, "loss": 2.1165, "step": 256040 }, { "epoch": 0.9745894962812969, "grad_norm": 0.1301833689212799, "learning_rate": 0.0005, "loss": 2.0964, "step": 256050 }, { "epoch": 0.9746275587494195, "grad_norm": 0.12218587845563889, "learning_rate": 0.0005, "loss": 2.0993, "step": 256060 }, { "epoch": 0.9746656212175422, "grad_norm": 0.12528090178966522, "learning_rate": 0.0005, "loss": 2.0978, "step": 256070 }, { "epoch": 0.974703683685665, "grad_norm": 0.1265835464000702, "learning_rate": 0.0005, "loss": 2.0981, "step": 256080 }, { "epoch": 0.9747417461537876, "grad_norm": 0.13827918469905853, "learning_rate": 0.0005, "loss": 2.1101, "step": 256090 }, { "epoch": 0.9747798086219103, "grad_norm": 0.11425582319498062, "learning_rate": 0.0005, "loss": 2.1045, "step": 256100 }, { "epoch": 0.9748178710900329, "grad_norm": 0.14054735004901886, "learning_rate": 0.0005, "loss": 2.0978, "step": 256110 }, { "epoch": 0.9748559335581557, "grad_norm": 0.13398562371730804, "learning_rate": 0.0005, "loss": 2.1077, "step": 256120 }, { "epoch": 0.9748939960262784, "grad_norm": 0.13601486384868622, "learning_rate": 0.0005, "loss": 2.1013, "step": 256130 }, { "epoch": 0.974932058494401, "grad_norm": 0.13452279567718506, "learning_rate": 0.0005, "loss": 2.112, "step": 256140 }, { "epoch": 0.9749701209625237, "grad_norm": 0.12645120918750763, "learning_rate": 0.0005, "loss": 2.1128, "step": 256150 }, { "epoch": 0.9750081834306463, "grad_norm": 0.15241266787052155, "learning_rate": 0.0005, "loss": 2.0933, "step": 256160 }, { "epoch": 0.9750462458987691, "grad_norm": 0.12823283672332764, "learning_rate": 0.0005, "loss": 2.0942, "step": 256170 }, { "epoch": 0.9750843083668917, "grad_norm": 0.14209270477294922, "learning_rate": 0.0005, "loss": 2.0944, "step": 256180 }, { "epoch": 0.9751223708350144, "grad_norm": 0.12420807033777237, "learning_rate": 0.0005, "loss": 2.0904, "step": 256190 }, { "epoch": 0.9751604333031371, "grad_norm": 0.12384863197803497, "learning_rate": 0.0005, "loss": 2.1159, "step": 256200 }, { "epoch": 0.9751984957712598, "grad_norm": 0.12487807869911194, "learning_rate": 0.0005, "loss": 2.1077, "step": 256210 }, { "epoch": 0.9752365582393825, "grad_norm": 0.14678291976451874, "learning_rate": 0.0005, "loss": 2.1118, "step": 256220 }, { "epoch": 0.9752746207075051, "grad_norm": 0.12178803980350494, "learning_rate": 0.0005, "loss": 2.1002, "step": 256230 }, { "epoch": 0.9753126831756278, "grad_norm": 0.12519116699695587, "learning_rate": 0.0005, "loss": 2.0979, "step": 256240 }, { "epoch": 0.9753507456437506, "grad_norm": 0.13480907678604126, "learning_rate": 0.0005, "loss": 2.0952, "step": 256250 }, { "epoch": 0.9753888081118732, "grad_norm": 0.13024158775806427, "learning_rate": 0.0005, "loss": 2.11, "step": 256260 }, { "epoch": 0.9754268705799959, "grad_norm": 0.13576485216617584, "learning_rate": 0.0005, "loss": 2.1083, "step": 256270 }, { "epoch": 0.9754649330481185, "grad_norm": 0.12468088418245316, "learning_rate": 0.0005, "loss": 2.1016, "step": 256280 }, { "epoch": 0.9755029955162412, "grad_norm": 0.15353313088417053, "learning_rate": 0.0005, "loss": 2.0962, "step": 256290 }, { "epoch": 0.975541057984364, "grad_norm": 0.13093069195747375, "learning_rate": 0.0005, "loss": 2.107, "step": 256300 }, { "epoch": 0.9755791204524866, "grad_norm": 0.1228041797876358, "learning_rate": 0.0005, "loss": 2.1167, "step": 256310 }, { "epoch": 0.9756171829206093, "grad_norm": 0.13330857455730438, "learning_rate": 0.0005, "loss": 2.0943, "step": 256320 }, { "epoch": 0.9756552453887319, "grad_norm": 0.13814891874790192, "learning_rate": 0.0005, "loss": 2.1114, "step": 256330 }, { "epoch": 0.9756933078568547, "grad_norm": 0.14975720643997192, "learning_rate": 0.0005, "loss": 2.1034, "step": 256340 }, { "epoch": 0.9757313703249774, "grad_norm": 0.12255514413118362, "learning_rate": 0.0005, "loss": 2.0978, "step": 256350 }, { "epoch": 0.9757694327931, "grad_norm": 0.12346848845481873, "learning_rate": 0.0005, "loss": 2.0936, "step": 256360 }, { "epoch": 0.9758074952612227, "grad_norm": 0.12416546046733856, "learning_rate": 0.0005, "loss": 2.0857, "step": 256370 }, { "epoch": 0.9758455577293454, "grad_norm": 0.12955905497074127, "learning_rate": 0.0005, "loss": 2.0978, "step": 256380 }, { "epoch": 0.9758836201974681, "grad_norm": 0.12957718968391418, "learning_rate": 0.0005, "loss": 2.1036, "step": 256390 }, { "epoch": 0.9759216826655908, "grad_norm": 0.13651643693447113, "learning_rate": 0.0005, "loss": 2.117, "step": 256400 }, { "epoch": 0.9759597451337134, "grad_norm": 0.1301625818014145, "learning_rate": 0.0005, "loss": 2.0937, "step": 256410 }, { "epoch": 0.9759978076018362, "grad_norm": 0.12493173032999039, "learning_rate": 0.0005, "loss": 2.1075, "step": 256420 }, { "epoch": 0.9760358700699588, "grad_norm": 0.131301611661911, "learning_rate": 0.0005, "loss": 2.1124, "step": 256430 }, { "epoch": 0.9760739325380815, "grad_norm": 0.12295681983232498, "learning_rate": 0.0005, "loss": 2.1025, "step": 256440 }, { "epoch": 0.9761119950062042, "grad_norm": 0.1482170820236206, "learning_rate": 0.0005, "loss": 2.1174, "step": 256450 }, { "epoch": 0.9761500574743268, "grad_norm": 0.1427377164363861, "learning_rate": 0.0005, "loss": 2.103, "step": 256460 }, { "epoch": 0.9761881199424496, "grad_norm": 0.135903999209404, "learning_rate": 0.0005, "loss": 2.0978, "step": 256470 }, { "epoch": 0.9762261824105722, "grad_norm": 0.13628506660461426, "learning_rate": 0.0005, "loss": 2.0984, "step": 256480 }, { "epoch": 0.9762642448786949, "grad_norm": 0.12419886887073517, "learning_rate": 0.0005, "loss": 2.1005, "step": 256490 }, { "epoch": 0.9763023073468176, "grad_norm": 0.11160758882761002, "learning_rate": 0.0005, "loss": 2.0999, "step": 256500 }, { "epoch": 0.9763403698149403, "grad_norm": 0.1205335259437561, "learning_rate": 0.0005, "loss": 2.1041, "step": 256510 }, { "epoch": 0.976378432283063, "grad_norm": 0.12622901797294617, "learning_rate": 0.0005, "loss": 2.0983, "step": 256520 }, { "epoch": 0.9764164947511856, "grad_norm": 0.1198456808924675, "learning_rate": 0.0005, "loss": 2.1122, "step": 256530 }, { "epoch": 0.9764545572193083, "grad_norm": 0.12420783191919327, "learning_rate": 0.0005, "loss": 2.0867, "step": 256540 }, { "epoch": 0.9764926196874311, "grad_norm": 0.12044156342744827, "learning_rate": 0.0005, "loss": 2.0954, "step": 256550 }, { "epoch": 0.9765306821555537, "grad_norm": 0.12951701879501343, "learning_rate": 0.0005, "loss": 2.0966, "step": 256560 }, { "epoch": 0.9765687446236764, "grad_norm": 0.1298401802778244, "learning_rate": 0.0005, "loss": 2.0968, "step": 256570 }, { "epoch": 0.976606807091799, "grad_norm": 0.15672166645526886, "learning_rate": 0.0005, "loss": 2.0813, "step": 256580 }, { "epoch": 0.9766448695599217, "grad_norm": 0.1251695454120636, "learning_rate": 0.0005, "loss": 2.1108, "step": 256590 }, { "epoch": 0.9766829320280445, "grad_norm": 0.1194787546992302, "learning_rate": 0.0005, "loss": 2.1129, "step": 256600 }, { "epoch": 0.9767209944961671, "grad_norm": 0.13885675370693207, "learning_rate": 0.0005, "loss": 2.1059, "step": 256610 }, { "epoch": 0.9767590569642898, "grad_norm": 0.12232273817062378, "learning_rate": 0.0005, "loss": 2.0931, "step": 256620 }, { "epoch": 0.9767971194324124, "grad_norm": 0.12373682111501694, "learning_rate": 0.0005, "loss": 2.1054, "step": 256630 }, { "epoch": 0.9768351819005352, "grad_norm": 0.13903123140335083, "learning_rate": 0.0005, "loss": 2.1126, "step": 256640 }, { "epoch": 0.9768732443686579, "grad_norm": 0.11475900560617447, "learning_rate": 0.0005, "loss": 2.1159, "step": 256650 }, { "epoch": 0.9769113068367805, "grad_norm": 0.12367875128984451, "learning_rate": 0.0005, "loss": 2.1138, "step": 256660 }, { "epoch": 0.9769493693049032, "grad_norm": 0.12255427241325378, "learning_rate": 0.0005, "loss": 2.1037, "step": 256670 }, { "epoch": 0.9769874317730259, "grad_norm": 0.1237168237566948, "learning_rate": 0.0005, "loss": 2.0923, "step": 256680 }, { "epoch": 0.9770254942411486, "grad_norm": 0.12001582235097885, "learning_rate": 0.0005, "loss": 2.1031, "step": 256690 }, { "epoch": 0.9770635567092713, "grad_norm": 0.13157282769680023, "learning_rate": 0.0005, "loss": 2.1116, "step": 256700 }, { "epoch": 0.9771016191773939, "grad_norm": 0.11652835458517075, "learning_rate": 0.0005, "loss": 2.1021, "step": 256710 }, { "epoch": 0.9771396816455166, "grad_norm": 0.12160100042819977, "learning_rate": 0.0005, "loss": 2.0893, "step": 256720 }, { "epoch": 0.9771777441136393, "grad_norm": 0.1366545855998993, "learning_rate": 0.0005, "loss": 2.1043, "step": 256730 }, { "epoch": 0.977215806581762, "grad_norm": 0.12709447741508484, "learning_rate": 0.0005, "loss": 2.1166, "step": 256740 }, { "epoch": 0.9772538690498846, "grad_norm": 0.11968369036912918, "learning_rate": 0.0005, "loss": 2.1012, "step": 256750 }, { "epoch": 0.9772919315180073, "grad_norm": 0.14591091871261597, "learning_rate": 0.0005, "loss": 2.1055, "step": 256760 }, { "epoch": 0.9773299939861301, "grad_norm": 0.12609747052192688, "learning_rate": 0.0005, "loss": 2.102, "step": 256770 }, { "epoch": 0.9773680564542527, "grad_norm": 0.12567102909088135, "learning_rate": 0.0005, "loss": 2.11, "step": 256780 }, { "epoch": 0.9774061189223754, "grad_norm": 0.13073763251304626, "learning_rate": 0.0005, "loss": 2.103, "step": 256790 }, { "epoch": 0.977444181390498, "grad_norm": 0.12760724127292633, "learning_rate": 0.0005, "loss": 2.0813, "step": 256800 }, { "epoch": 0.9774822438586208, "grad_norm": 0.1319473534822464, "learning_rate": 0.0005, "loss": 2.0978, "step": 256810 }, { "epoch": 0.9775203063267435, "grad_norm": 0.12671951949596405, "learning_rate": 0.0005, "loss": 2.0981, "step": 256820 }, { "epoch": 0.9775583687948661, "grad_norm": 0.11461569368839264, "learning_rate": 0.0005, "loss": 2.1044, "step": 256830 }, { "epoch": 0.9775964312629888, "grad_norm": 0.12155576050281525, "learning_rate": 0.0005, "loss": 2.1023, "step": 256840 }, { "epoch": 0.9776344937311116, "grad_norm": 0.12291843444108963, "learning_rate": 0.0005, "loss": 2.0947, "step": 256850 }, { "epoch": 0.9776725561992342, "grad_norm": 0.12945066392421722, "learning_rate": 0.0005, "loss": 2.097, "step": 256860 }, { "epoch": 0.9777106186673569, "grad_norm": 0.12481513619422913, "learning_rate": 0.0005, "loss": 2.0899, "step": 256870 }, { "epoch": 0.9777486811354795, "grad_norm": 0.12574784457683563, "learning_rate": 0.0005, "loss": 2.1112, "step": 256880 }, { "epoch": 0.9777867436036022, "grad_norm": 0.1317308396100998, "learning_rate": 0.0005, "loss": 2.1114, "step": 256890 }, { "epoch": 0.977824806071725, "grad_norm": 0.1266586184501648, "learning_rate": 0.0005, "loss": 2.1065, "step": 256900 }, { "epoch": 0.9778628685398476, "grad_norm": 0.1240110993385315, "learning_rate": 0.0005, "loss": 2.1187, "step": 256910 }, { "epoch": 0.9779009310079703, "grad_norm": 0.1257583349943161, "learning_rate": 0.0005, "loss": 2.1086, "step": 256920 }, { "epoch": 0.9779389934760929, "grad_norm": 0.1170019805431366, "learning_rate": 0.0005, "loss": 2.1124, "step": 256930 }, { "epoch": 0.9779770559442157, "grad_norm": 0.11175654083490372, "learning_rate": 0.0005, "loss": 2.1025, "step": 256940 }, { "epoch": 0.9780151184123383, "grad_norm": 0.14043356478214264, "learning_rate": 0.0005, "loss": 2.1071, "step": 256950 }, { "epoch": 0.978053180880461, "grad_norm": 0.11840981245040894, "learning_rate": 0.0005, "loss": 2.0961, "step": 256960 }, { "epoch": 0.9780912433485837, "grad_norm": 0.1315300315618515, "learning_rate": 0.0005, "loss": 2.1114, "step": 256970 }, { "epoch": 0.9781293058167064, "grad_norm": 0.1293240189552307, "learning_rate": 0.0005, "loss": 2.1022, "step": 256980 }, { "epoch": 0.9781673682848291, "grad_norm": 0.13710515201091766, "learning_rate": 0.0005, "loss": 2.0878, "step": 256990 }, { "epoch": 0.9782054307529517, "grad_norm": 0.1184496060013771, "learning_rate": 0.0005, "loss": 2.0932, "step": 257000 }, { "epoch": 0.9782434932210744, "grad_norm": 0.13085511326789856, "learning_rate": 0.0005, "loss": 2.1056, "step": 257010 }, { "epoch": 0.9782815556891971, "grad_norm": 0.11874709278345108, "learning_rate": 0.0005, "loss": 2.1174, "step": 257020 }, { "epoch": 0.9783196181573198, "grad_norm": 0.14223746955394745, "learning_rate": 0.0005, "loss": 2.0975, "step": 257030 }, { "epoch": 0.9783576806254425, "grad_norm": 0.12502065300941467, "learning_rate": 0.0005, "loss": 2.0923, "step": 257040 }, { "epoch": 0.9783957430935651, "grad_norm": 0.12484754621982574, "learning_rate": 0.0005, "loss": 2.1034, "step": 257050 }, { "epoch": 0.9784338055616878, "grad_norm": 0.11882039904594421, "learning_rate": 0.0005, "loss": 2.1048, "step": 257060 }, { "epoch": 0.9784718680298106, "grad_norm": 0.15266261994838715, "learning_rate": 0.0005, "loss": 2.1046, "step": 257070 }, { "epoch": 0.9785099304979332, "grad_norm": 0.1286851167678833, "learning_rate": 0.0005, "loss": 2.0931, "step": 257080 }, { "epoch": 0.9785479929660559, "grad_norm": 0.1353611946105957, "learning_rate": 0.0005, "loss": 2.1102, "step": 257090 }, { "epoch": 0.9785860554341785, "grad_norm": 0.12660717964172363, "learning_rate": 0.0005, "loss": 2.123, "step": 257100 }, { "epoch": 0.9786241179023013, "grad_norm": 0.12552636861801147, "learning_rate": 0.0005, "loss": 2.1144, "step": 257110 }, { "epoch": 0.978662180370424, "grad_norm": 0.12433915585279465, "learning_rate": 0.0005, "loss": 2.1109, "step": 257120 }, { "epoch": 0.9787002428385466, "grad_norm": 0.12207551300525665, "learning_rate": 0.0005, "loss": 2.1026, "step": 257130 }, { "epoch": 0.9787383053066693, "grad_norm": 0.12603680789470673, "learning_rate": 0.0005, "loss": 2.1011, "step": 257140 }, { "epoch": 0.9787763677747919, "grad_norm": 0.12861515581607819, "learning_rate": 0.0005, "loss": 2.1063, "step": 257150 }, { "epoch": 0.9788144302429147, "grad_norm": 0.1411086916923523, "learning_rate": 0.0005, "loss": 2.0968, "step": 257160 }, { "epoch": 0.9788524927110374, "grad_norm": 0.14067700505256653, "learning_rate": 0.0005, "loss": 2.0986, "step": 257170 }, { "epoch": 0.97889055517916, "grad_norm": 0.12675721943378448, "learning_rate": 0.0005, "loss": 2.0871, "step": 257180 }, { "epoch": 0.9789286176472827, "grad_norm": 0.12993498146533966, "learning_rate": 0.0005, "loss": 2.0869, "step": 257190 }, { "epoch": 0.9789666801154054, "grad_norm": 0.1242540255188942, "learning_rate": 0.0005, "loss": 2.1088, "step": 257200 }, { "epoch": 0.9790047425835281, "grad_norm": 0.12612493336200714, "learning_rate": 0.0005, "loss": 2.1137, "step": 257210 }, { "epoch": 0.9790428050516508, "grad_norm": 0.11633577197790146, "learning_rate": 0.0005, "loss": 2.1077, "step": 257220 }, { "epoch": 0.9790808675197734, "grad_norm": 0.1153443455696106, "learning_rate": 0.0005, "loss": 2.0937, "step": 257230 }, { "epoch": 0.9791189299878962, "grad_norm": 0.1141563132405281, "learning_rate": 0.0005, "loss": 2.1033, "step": 257240 }, { "epoch": 0.9791569924560188, "grad_norm": 0.12455535680055618, "learning_rate": 0.0005, "loss": 2.1123, "step": 257250 }, { "epoch": 0.9791950549241415, "grad_norm": 0.1222526803612709, "learning_rate": 0.0005, "loss": 2.0995, "step": 257260 }, { "epoch": 0.9792331173922642, "grad_norm": 0.12650750577449799, "learning_rate": 0.0005, "loss": 2.0894, "step": 257270 }, { "epoch": 0.9792711798603869, "grad_norm": 0.1303875595331192, "learning_rate": 0.0005, "loss": 2.1011, "step": 257280 }, { "epoch": 0.9793092423285096, "grad_norm": 0.12938658893108368, "learning_rate": 0.0005, "loss": 2.1037, "step": 257290 }, { "epoch": 0.9793473047966322, "grad_norm": 0.12311175465583801, "learning_rate": 0.0005, "loss": 2.0959, "step": 257300 }, { "epoch": 0.9793853672647549, "grad_norm": 0.1328628957271576, "learning_rate": 0.0005, "loss": 2.104, "step": 257310 }, { "epoch": 0.9794234297328775, "grad_norm": 0.1298210620880127, "learning_rate": 0.0005, "loss": 2.0907, "step": 257320 }, { "epoch": 0.9794614922010003, "grad_norm": 0.13311082124710083, "learning_rate": 0.0005, "loss": 2.0932, "step": 257330 }, { "epoch": 0.979499554669123, "grad_norm": 0.1271749883890152, "learning_rate": 0.0005, "loss": 2.1058, "step": 257340 }, { "epoch": 0.9795376171372456, "grad_norm": 0.1434793770313263, "learning_rate": 0.0005, "loss": 2.0931, "step": 257350 }, { "epoch": 0.9795756796053683, "grad_norm": 0.1282554268836975, "learning_rate": 0.0005, "loss": 2.1053, "step": 257360 }, { "epoch": 0.9796137420734911, "grad_norm": 0.14303486049175262, "learning_rate": 0.0005, "loss": 2.0975, "step": 257370 }, { "epoch": 0.9796518045416137, "grad_norm": 0.13568031787872314, "learning_rate": 0.0005, "loss": 2.098, "step": 257380 }, { "epoch": 0.9796898670097364, "grad_norm": 0.1360870599746704, "learning_rate": 0.0005, "loss": 2.0992, "step": 257390 }, { "epoch": 0.979727929477859, "grad_norm": 0.12494190782308578, "learning_rate": 0.0005, "loss": 2.1013, "step": 257400 }, { "epoch": 0.9797659919459818, "grad_norm": 0.11892944574356079, "learning_rate": 0.0005, "loss": 2.0945, "step": 257410 }, { "epoch": 0.9798040544141045, "grad_norm": 0.1417090743780136, "learning_rate": 0.0005, "loss": 2.1158, "step": 257420 }, { "epoch": 0.9798421168822271, "grad_norm": 0.1306687444448471, "learning_rate": 0.0005, "loss": 2.1011, "step": 257430 }, { "epoch": 0.9798801793503498, "grad_norm": 0.14193464815616608, "learning_rate": 0.0005, "loss": 2.1063, "step": 257440 }, { "epoch": 0.9799182418184724, "grad_norm": 0.12267079204320908, "learning_rate": 0.0005, "loss": 2.1065, "step": 257450 }, { "epoch": 0.9799563042865952, "grad_norm": 0.12777526676654816, "learning_rate": 0.0005, "loss": 2.1042, "step": 257460 }, { "epoch": 0.9799943667547178, "grad_norm": 0.13828790187835693, "learning_rate": 0.0005, "loss": 2.0931, "step": 257470 }, { "epoch": 0.9800324292228405, "grad_norm": 0.12572868168354034, "learning_rate": 0.0005, "loss": 2.1016, "step": 257480 }, { "epoch": 0.9800704916909632, "grad_norm": 0.14129850268363953, "learning_rate": 0.0005, "loss": 2.1105, "step": 257490 }, { "epoch": 0.9801085541590859, "grad_norm": 0.12947334349155426, "learning_rate": 0.0005, "loss": 2.0897, "step": 257500 }, { "epoch": 0.9801466166272086, "grad_norm": 0.1196305900812149, "learning_rate": 0.0005, "loss": 2.0941, "step": 257510 }, { "epoch": 0.9801846790953312, "grad_norm": 0.13480442762374878, "learning_rate": 0.0005, "loss": 2.0982, "step": 257520 }, { "epoch": 0.9802227415634539, "grad_norm": 0.14180704951286316, "learning_rate": 0.0005, "loss": 2.0998, "step": 257530 }, { "epoch": 0.9802608040315767, "grad_norm": 0.12781913578510284, "learning_rate": 0.0005, "loss": 2.0941, "step": 257540 }, { "epoch": 0.9802988664996993, "grad_norm": 0.13860957324504852, "learning_rate": 0.0005, "loss": 2.1034, "step": 257550 }, { "epoch": 0.980336928967822, "grad_norm": 0.13281327486038208, "learning_rate": 0.0005, "loss": 2.1025, "step": 257560 }, { "epoch": 0.9803749914359446, "grad_norm": 0.1327611654996872, "learning_rate": 0.0005, "loss": 2.1129, "step": 257570 }, { "epoch": 0.9804130539040674, "grad_norm": 0.11932291090488434, "learning_rate": 0.0005, "loss": 2.0989, "step": 257580 }, { "epoch": 0.9804511163721901, "grad_norm": 0.1157454401254654, "learning_rate": 0.0005, "loss": 2.0863, "step": 257590 }, { "epoch": 0.9804891788403127, "grad_norm": 0.12068181484937668, "learning_rate": 0.0005, "loss": 2.1061, "step": 257600 }, { "epoch": 0.9805272413084354, "grad_norm": 0.11915294080972672, "learning_rate": 0.0005, "loss": 2.1013, "step": 257610 }, { "epoch": 0.980565303776558, "grad_norm": 0.11446240544319153, "learning_rate": 0.0005, "loss": 2.1077, "step": 257620 }, { "epoch": 0.9806033662446808, "grad_norm": 0.1247885599732399, "learning_rate": 0.0005, "loss": 2.1064, "step": 257630 }, { "epoch": 0.9806414287128035, "grad_norm": 0.12949500977993011, "learning_rate": 0.0005, "loss": 2.102, "step": 257640 }, { "epoch": 0.9806794911809261, "grad_norm": 0.11171896755695343, "learning_rate": 0.0005, "loss": 2.102, "step": 257650 }, { "epoch": 0.9807175536490488, "grad_norm": 0.13205240666866302, "learning_rate": 0.0005, "loss": 2.1019, "step": 257660 }, { "epoch": 0.9807556161171715, "grad_norm": 0.15095172822475433, "learning_rate": 0.0005, "loss": 2.0995, "step": 257670 }, { "epoch": 0.9807936785852942, "grad_norm": 0.14866958558559418, "learning_rate": 0.0005, "loss": 2.1002, "step": 257680 }, { "epoch": 0.9808317410534169, "grad_norm": 0.13984258472919464, "learning_rate": 0.0005, "loss": 2.1007, "step": 257690 }, { "epoch": 0.9808698035215395, "grad_norm": 0.12085743993520737, "learning_rate": 0.0005, "loss": 2.0948, "step": 257700 }, { "epoch": 0.9809078659896623, "grad_norm": 0.11848316341638565, "learning_rate": 0.0005, "loss": 2.0996, "step": 257710 }, { "epoch": 0.9809459284577849, "grad_norm": 0.12194575369358063, "learning_rate": 0.0005, "loss": 2.0837, "step": 257720 }, { "epoch": 0.9809839909259076, "grad_norm": 0.12809818983078003, "learning_rate": 0.0005, "loss": 2.092, "step": 257730 }, { "epoch": 0.9810220533940303, "grad_norm": 0.13447466492652893, "learning_rate": 0.0005, "loss": 2.1036, "step": 257740 }, { "epoch": 0.9810601158621529, "grad_norm": 0.1250292956829071, "learning_rate": 0.0005, "loss": 2.1058, "step": 257750 }, { "epoch": 0.9810981783302757, "grad_norm": 0.14565639197826385, "learning_rate": 0.0005, "loss": 2.104, "step": 257760 }, { "epoch": 0.9811362407983983, "grad_norm": 0.11911211162805557, "learning_rate": 0.0005, "loss": 2.1064, "step": 257770 }, { "epoch": 0.981174303266521, "grad_norm": 0.12898415327072144, "learning_rate": 0.0005, "loss": 2.0929, "step": 257780 }, { "epoch": 0.9812123657346437, "grad_norm": 0.11979299783706665, "learning_rate": 0.0005, "loss": 2.108, "step": 257790 }, { "epoch": 0.9812504282027664, "grad_norm": 0.13223201036453247, "learning_rate": 0.0005, "loss": 2.0912, "step": 257800 }, { "epoch": 0.9812884906708891, "grad_norm": 0.13081419467926025, "learning_rate": 0.0005, "loss": 2.1162, "step": 257810 }, { "epoch": 0.9813265531390117, "grad_norm": 0.1428884118795395, "learning_rate": 0.0005, "loss": 2.1024, "step": 257820 }, { "epoch": 0.9813646156071344, "grad_norm": 0.13086241483688354, "learning_rate": 0.0005, "loss": 2.0926, "step": 257830 }, { "epoch": 0.9814026780752572, "grad_norm": 0.1303388625383377, "learning_rate": 0.0005, "loss": 2.0941, "step": 257840 }, { "epoch": 0.9814407405433798, "grad_norm": 0.1248994842171669, "learning_rate": 0.0005, "loss": 2.1072, "step": 257850 }, { "epoch": 0.9814788030115025, "grad_norm": 0.12460754811763763, "learning_rate": 0.0005, "loss": 2.104, "step": 257860 }, { "epoch": 0.9815168654796251, "grad_norm": 0.11974826455116272, "learning_rate": 0.0005, "loss": 2.1056, "step": 257870 }, { "epoch": 0.9815549279477478, "grad_norm": 0.1300465613603592, "learning_rate": 0.0005, "loss": 2.0892, "step": 257880 }, { "epoch": 0.9815929904158706, "grad_norm": 0.14067713916301727, "learning_rate": 0.0005, "loss": 2.099, "step": 257890 }, { "epoch": 0.9816310528839932, "grad_norm": 0.1349792778491974, "learning_rate": 0.0005, "loss": 2.0985, "step": 257900 }, { "epoch": 0.9816691153521159, "grad_norm": 0.13326580822467804, "learning_rate": 0.0005, "loss": 2.0921, "step": 257910 }, { "epoch": 0.9817071778202385, "grad_norm": 0.2773866057395935, "learning_rate": 0.0005, "loss": 2.0947, "step": 257920 }, { "epoch": 0.9817452402883613, "grad_norm": 0.12216406315565109, "learning_rate": 0.0005, "loss": 2.1029, "step": 257930 }, { "epoch": 0.981783302756484, "grad_norm": 0.14116701483726501, "learning_rate": 0.0005, "loss": 2.1063, "step": 257940 }, { "epoch": 0.9818213652246066, "grad_norm": 0.12974095344543457, "learning_rate": 0.0005, "loss": 2.1021, "step": 257950 }, { "epoch": 0.9818594276927293, "grad_norm": 0.11997047066688538, "learning_rate": 0.0005, "loss": 2.0941, "step": 257960 }, { "epoch": 0.981897490160852, "grad_norm": 0.13372445106506348, "learning_rate": 0.0005, "loss": 2.106, "step": 257970 }, { "epoch": 0.9819355526289747, "grad_norm": 0.13414129614830017, "learning_rate": 0.0005, "loss": 2.0915, "step": 257980 }, { "epoch": 0.9819736150970974, "grad_norm": 0.12741310894489288, "learning_rate": 0.0005, "loss": 2.0884, "step": 257990 }, { "epoch": 0.98201167756522, "grad_norm": 0.12601037323474884, "learning_rate": 0.0005, "loss": 2.106, "step": 258000 }, { "epoch": 0.9820497400333428, "grad_norm": 0.1299891471862793, "learning_rate": 0.0005, "loss": 2.1115, "step": 258010 }, { "epoch": 0.9820878025014654, "grad_norm": 0.13461840152740479, "learning_rate": 0.0005, "loss": 2.1031, "step": 258020 }, { "epoch": 0.9821258649695881, "grad_norm": 0.12584570050239563, "learning_rate": 0.0005, "loss": 2.0969, "step": 258030 }, { "epoch": 0.9821639274377107, "grad_norm": 0.13351255655288696, "learning_rate": 0.0005, "loss": 2.1014, "step": 258040 }, { "epoch": 0.9822019899058334, "grad_norm": 0.12079761922359467, "learning_rate": 0.0005, "loss": 2.1001, "step": 258050 }, { "epoch": 0.9822400523739562, "grad_norm": 0.12505701184272766, "learning_rate": 0.0005, "loss": 2.1102, "step": 258060 }, { "epoch": 0.9822781148420788, "grad_norm": 0.13210898637771606, "learning_rate": 0.0005, "loss": 2.0924, "step": 258070 }, { "epoch": 0.9823161773102015, "grad_norm": 0.1253563016653061, "learning_rate": 0.0005, "loss": 2.0963, "step": 258080 }, { "epoch": 0.9823542397783241, "grad_norm": 0.12380914390087128, "learning_rate": 0.0005, "loss": 2.0934, "step": 258090 }, { "epoch": 0.9823923022464469, "grad_norm": 0.1306920349597931, "learning_rate": 0.0005, "loss": 2.1157, "step": 258100 }, { "epoch": 0.9824303647145696, "grad_norm": 0.13246990740299225, "learning_rate": 0.0005, "loss": 2.1017, "step": 258110 }, { "epoch": 0.9824684271826922, "grad_norm": 0.1187557801604271, "learning_rate": 0.0005, "loss": 2.1127, "step": 258120 }, { "epoch": 0.9825064896508149, "grad_norm": 0.13110314309597015, "learning_rate": 0.0005, "loss": 2.0881, "step": 258130 }, { "epoch": 0.9825445521189377, "grad_norm": 0.14428871870040894, "learning_rate": 0.0005, "loss": 2.0947, "step": 258140 }, { "epoch": 0.9825826145870603, "grad_norm": 0.13296005129814148, "learning_rate": 0.0005, "loss": 2.0934, "step": 258150 }, { "epoch": 0.982620677055183, "grad_norm": 0.11815083771944046, "learning_rate": 0.0005, "loss": 2.105, "step": 258160 }, { "epoch": 0.9826587395233056, "grad_norm": 0.12260029464960098, "learning_rate": 0.0005, "loss": 2.1093, "step": 258170 }, { "epoch": 0.9826968019914283, "grad_norm": 0.13770967721939087, "learning_rate": 0.0005, "loss": 2.1037, "step": 258180 }, { "epoch": 0.982734864459551, "grad_norm": 0.12869694828987122, "learning_rate": 0.0005, "loss": 2.0934, "step": 258190 }, { "epoch": 0.9827729269276737, "grad_norm": 0.14327187836170197, "learning_rate": 0.0005, "loss": 2.1109, "step": 258200 }, { "epoch": 0.9828109893957964, "grad_norm": 0.12928041815757751, "learning_rate": 0.0005, "loss": 2.0949, "step": 258210 }, { "epoch": 0.982849051863919, "grad_norm": 0.11430586129426956, "learning_rate": 0.0005, "loss": 2.0952, "step": 258220 }, { "epoch": 0.9828871143320418, "grad_norm": 0.12899106740951538, "learning_rate": 0.0005, "loss": 2.0996, "step": 258230 }, { "epoch": 0.9829251768001644, "grad_norm": 0.12435305863618851, "learning_rate": 0.0005, "loss": 2.1006, "step": 258240 }, { "epoch": 0.9829632392682871, "grad_norm": 0.13388924300670624, "learning_rate": 0.0005, "loss": 2.101, "step": 258250 }, { "epoch": 0.9830013017364098, "grad_norm": 0.12655743956565857, "learning_rate": 0.0005, "loss": 2.0982, "step": 258260 }, { "epoch": 0.9830393642045325, "grad_norm": 0.13059300184249878, "learning_rate": 0.0005, "loss": 2.1105, "step": 258270 }, { "epoch": 0.9830774266726552, "grad_norm": 0.12760014832019806, "learning_rate": 0.0005, "loss": 2.0987, "step": 258280 }, { "epoch": 0.9831154891407778, "grad_norm": 0.12506812810897827, "learning_rate": 0.0005, "loss": 2.0887, "step": 258290 }, { "epoch": 0.9831535516089005, "grad_norm": 0.1192302331328392, "learning_rate": 0.0005, "loss": 2.102, "step": 258300 }, { "epoch": 0.9831916140770232, "grad_norm": 0.12790679931640625, "learning_rate": 0.0005, "loss": 2.1012, "step": 258310 }, { "epoch": 0.9832296765451459, "grad_norm": 0.11708279699087143, "learning_rate": 0.0005, "loss": 2.083, "step": 258320 }, { "epoch": 0.9832677390132686, "grad_norm": 0.1344541311264038, "learning_rate": 0.0005, "loss": 2.1002, "step": 258330 }, { "epoch": 0.9833058014813912, "grad_norm": 0.11863990128040314, "learning_rate": 0.0005, "loss": 2.1129, "step": 258340 }, { "epoch": 0.9833438639495139, "grad_norm": 0.1262895166873932, "learning_rate": 0.0005, "loss": 2.0953, "step": 258350 }, { "epoch": 0.9833819264176367, "grad_norm": 0.12119850516319275, "learning_rate": 0.0005, "loss": 2.0964, "step": 258360 }, { "epoch": 0.9834199888857593, "grad_norm": 0.14586344361305237, "learning_rate": 0.0005, "loss": 2.0944, "step": 258370 }, { "epoch": 0.983458051353882, "grad_norm": 0.131086528301239, "learning_rate": 0.0005, "loss": 2.0954, "step": 258380 }, { "epoch": 0.9834961138220046, "grad_norm": 0.12114156782627106, "learning_rate": 0.0005, "loss": 2.1026, "step": 258390 }, { "epoch": 0.9835341762901274, "grad_norm": 0.11951122432947159, "learning_rate": 0.0005, "loss": 2.0994, "step": 258400 }, { "epoch": 0.9835722387582501, "grad_norm": 0.1404489427804947, "learning_rate": 0.0005, "loss": 2.1066, "step": 258410 }, { "epoch": 0.9836103012263727, "grad_norm": 0.12836606800556183, "learning_rate": 0.0005, "loss": 2.1051, "step": 258420 }, { "epoch": 0.9836483636944954, "grad_norm": 0.1190219521522522, "learning_rate": 0.0005, "loss": 2.1099, "step": 258430 }, { "epoch": 0.9836864261626181, "grad_norm": 0.11856423318386078, "learning_rate": 0.0005, "loss": 2.097, "step": 258440 }, { "epoch": 0.9837244886307408, "grad_norm": 0.11622331291437149, "learning_rate": 0.0005, "loss": 2.0942, "step": 258450 }, { "epoch": 0.9837625510988635, "grad_norm": 0.1283189356327057, "learning_rate": 0.0005, "loss": 2.09, "step": 258460 }, { "epoch": 0.9838006135669861, "grad_norm": 0.1327824890613556, "learning_rate": 0.0005, "loss": 2.1166, "step": 258470 }, { "epoch": 0.9838386760351088, "grad_norm": 0.13173063099384308, "learning_rate": 0.0005, "loss": 2.1182, "step": 258480 }, { "epoch": 0.9838767385032315, "grad_norm": 0.1208709105849266, "learning_rate": 0.0005, "loss": 2.0835, "step": 258490 }, { "epoch": 0.9839148009713542, "grad_norm": 0.13490121066570282, "learning_rate": 0.0005, "loss": 2.1008, "step": 258500 }, { "epoch": 0.9839528634394769, "grad_norm": 0.12017489224672318, "learning_rate": 0.0005, "loss": 2.0999, "step": 258510 }, { "epoch": 0.9839909259075995, "grad_norm": 0.12940183281898499, "learning_rate": 0.0005, "loss": 2.1188, "step": 258520 }, { "epoch": 0.9840289883757223, "grad_norm": 0.11870049685239792, "learning_rate": 0.0005, "loss": 2.0974, "step": 258530 }, { "epoch": 0.9840670508438449, "grad_norm": 0.1260661631822586, "learning_rate": 0.0005, "loss": 2.1086, "step": 258540 }, { "epoch": 0.9841051133119676, "grad_norm": 0.11173855513334274, "learning_rate": 0.0005, "loss": 2.1008, "step": 258550 }, { "epoch": 0.9841431757800903, "grad_norm": 0.11546964198350906, "learning_rate": 0.0005, "loss": 2.0851, "step": 258560 }, { "epoch": 0.984181238248213, "grad_norm": 0.12497804313898087, "learning_rate": 0.0005, "loss": 2.0978, "step": 258570 }, { "epoch": 0.9842193007163357, "grad_norm": 0.14110919833183289, "learning_rate": 0.0005, "loss": 2.1159, "step": 258580 }, { "epoch": 0.9842573631844583, "grad_norm": 0.12901490926742554, "learning_rate": 0.0005, "loss": 2.0986, "step": 258590 }, { "epoch": 0.984295425652581, "grad_norm": 0.1393149495124817, "learning_rate": 0.0005, "loss": 2.0976, "step": 258600 }, { "epoch": 0.9843334881207036, "grad_norm": 0.14474362134933472, "learning_rate": 0.0005, "loss": 2.1097, "step": 258610 }, { "epoch": 0.9843715505888264, "grad_norm": 0.1445140540599823, "learning_rate": 0.0005, "loss": 2.1055, "step": 258620 }, { "epoch": 0.9844096130569491, "grad_norm": 0.1387968510389328, "learning_rate": 0.0005, "loss": 2.0849, "step": 258630 }, { "epoch": 0.9844476755250717, "grad_norm": 0.1392017900943756, "learning_rate": 0.0005, "loss": 2.1015, "step": 258640 }, { "epoch": 0.9844857379931944, "grad_norm": 0.11866472661495209, "learning_rate": 0.0005, "loss": 2.0996, "step": 258650 }, { "epoch": 0.9845238004613172, "grad_norm": 0.1393352746963501, "learning_rate": 0.0005, "loss": 2.1037, "step": 258660 }, { "epoch": 0.9845618629294398, "grad_norm": 0.12474837154150009, "learning_rate": 0.0005, "loss": 2.1053, "step": 258670 }, { "epoch": 0.9845999253975625, "grad_norm": 0.12675200402736664, "learning_rate": 0.0005, "loss": 2.1032, "step": 258680 }, { "epoch": 0.9846379878656851, "grad_norm": 0.11997582763433456, "learning_rate": 0.0005, "loss": 2.0972, "step": 258690 }, { "epoch": 0.9846760503338079, "grad_norm": 0.12846367061138153, "learning_rate": 0.0005, "loss": 2.1114, "step": 258700 }, { "epoch": 0.9847141128019306, "grad_norm": 0.12774696946144104, "learning_rate": 0.0005, "loss": 2.108, "step": 258710 }, { "epoch": 0.9847521752700532, "grad_norm": 0.22226040065288544, "learning_rate": 0.0005, "loss": 2.1133, "step": 258720 }, { "epoch": 0.9847902377381759, "grad_norm": 0.12668132781982422, "learning_rate": 0.0005, "loss": 2.0915, "step": 258730 }, { "epoch": 0.9848283002062985, "grad_norm": 0.1319456696510315, "learning_rate": 0.0005, "loss": 2.082, "step": 258740 }, { "epoch": 0.9848663626744213, "grad_norm": 0.12583576142787933, "learning_rate": 0.0005, "loss": 2.1138, "step": 258750 }, { "epoch": 0.984904425142544, "grad_norm": 0.12175232172012329, "learning_rate": 0.0005, "loss": 2.1044, "step": 258760 }, { "epoch": 0.9849424876106666, "grad_norm": 0.12503385543823242, "learning_rate": 0.0005, "loss": 2.0931, "step": 258770 }, { "epoch": 0.9849805500787893, "grad_norm": 0.13150545954704285, "learning_rate": 0.0005, "loss": 2.0976, "step": 258780 }, { "epoch": 0.985018612546912, "grad_norm": 0.1303718388080597, "learning_rate": 0.0005, "loss": 2.1056, "step": 258790 }, { "epoch": 0.9850566750150347, "grad_norm": 0.1266319453716278, "learning_rate": 0.0005, "loss": 2.0984, "step": 258800 }, { "epoch": 0.9850947374831573, "grad_norm": 0.13897955417633057, "learning_rate": 0.0005, "loss": 2.0937, "step": 258810 }, { "epoch": 0.98513279995128, "grad_norm": 0.13643626868724823, "learning_rate": 0.0005, "loss": 2.0976, "step": 258820 }, { "epoch": 0.9851708624194028, "grad_norm": 0.129782572388649, "learning_rate": 0.0005, "loss": 2.1045, "step": 258830 }, { "epoch": 0.9852089248875254, "grad_norm": 0.11904452741146088, "learning_rate": 0.0005, "loss": 2.098, "step": 258840 }, { "epoch": 0.9852469873556481, "grad_norm": 0.13108906149864197, "learning_rate": 0.0005, "loss": 2.107, "step": 258850 }, { "epoch": 0.9852850498237707, "grad_norm": 0.12430950999259949, "learning_rate": 0.0005, "loss": 2.0938, "step": 258860 }, { "epoch": 0.9853231122918935, "grad_norm": 0.1274895966053009, "learning_rate": 0.0005, "loss": 2.0906, "step": 258870 }, { "epoch": 0.9853611747600162, "grad_norm": 0.11865878850221634, "learning_rate": 0.0005, "loss": 2.077, "step": 258880 }, { "epoch": 0.9853992372281388, "grad_norm": 0.11411301791667938, "learning_rate": 0.0005, "loss": 2.1038, "step": 258890 }, { "epoch": 0.9854372996962615, "grad_norm": 0.13757802546024323, "learning_rate": 0.0005, "loss": 2.1019, "step": 258900 }, { "epoch": 0.9854753621643841, "grad_norm": 0.15388959646224976, "learning_rate": 0.0005, "loss": 2.0894, "step": 258910 }, { "epoch": 0.9855134246325069, "grad_norm": 0.13236352801322937, "learning_rate": 0.0005, "loss": 2.1124, "step": 258920 }, { "epoch": 0.9855514871006296, "grad_norm": 0.11857990175485611, "learning_rate": 0.0005, "loss": 2.1063, "step": 258930 }, { "epoch": 0.9855895495687522, "grad_norm": 0.1266779899597168, "learning_rate": 0.0005, "loss": 2.102, "step": 258940 }, { "epoch": 0.9856276120368749, "grad_norm": 0.14129385352134705, "learning_rate": 0.0005, "loss": 2.098, "step": 258950 }, { "epoch": 0.9856656745049976, "grad_norm": 0.12670108675956726, "learning_rate": 0.0005, "loss": 2.0982, "step": 258960 }, { "epoch": 0.9857037369731203, "grad_norm": 0.1306104212999344, "learning_rate": 0.0005, "loss": 2.1031, "step": 258970 }, { "epoch": 0.985741799441243, "grad_norm": 0.6689227223396301, "learning_rate": 0.0005, "loss": 2.0893, "step": 258980 }, { "epoch": 0.9857798619093656, "grad_norm": 0.1370919644832611, "learning_rate": 0.0005, "loss": 2.1177, "step": 258990 }, { "epoch": 0.9858179243774884, "grad_norm": 0.12262406200170517, "learning_rate": 0.0005, "loss": 2.0986, "step": 259000 }, { "epoch": 0.985855986845611, "grad_norm": 0.1291884481906891, "learning_rate": 0.0005, "loss": 2.1055, "step": 259010 }, { "epoch": 0.9858940493137337, "grad_norm": 0.13689911365509033, "learning_rate": 0.0005, "loss": 2.1088, "step": 259020 }, { "epoch": 0.9859321117818564, "grad_norm": 0.1361452043056488, "learning_rate": 0.0005, "loss": 2.0923, "step": 259030 }, { "epoch": 0.985970174249979, "grad_norm": 0.12106503546237946, "learning_rate": 0.0005, "loss": 2.105, "step": 259040 }, { "epoch": 0.9860082367181018, "grad_norm": 0.13545392453670502, "learning_rate": 0.0005, "loss": 2.0995, "step": 259050 }, { "epoch": 0.9860462991862244, "grad_norm": 0.12301244586706161, "learning_rate": 0.0005, "loss": 2.0975, "step": 259060 }, { "epoch": 0.9860843616543471, "grad_norm": 0.11876979470252991, "learning_rate": 0.0005, "loss": 2.1014, "step": 259070 }, { "epoch": 0.9861224241224698, "grad_norm": 0.11995098739862442, "learning_rate": 0.0005, "loss": 2.1066, "step": 259080 }, { "epoch": 0.9861604865905925, "grad_norm": 0.13658028841018677, "learning_rate": 0.0005, "loss": 2.0982, "step": 259090 }, { "epoch": 0.9861985490587152, "grad_norm": 0.12490151822566986, "learning_rate": 0.0005, "loss": 2.105, "step": 259100 }, { "epoch": 0.9862366115268378, "grad_norm": 0.11956927925348282, "learning_rate": 0.0005, "loss": 2.0995, "step": 259110 }, { "epoch": 0.9862746739949605, "grad_norm": 0.14448417723178864, "learning_rate": 0.0005, "loss": 2.0953, "step": 259120 }, { "epoch": 0.9863127364630833, "grad_norm": 0.13080665469169617, "learning_rate": 0.0005, "loss": 2.116, "step": 259130 }, { "epoch": 0.9863507989312059, "grad_norm": 0.116084985435009, "learning_rate": 0.0005, "loss": 2.1081, "step": 259140 }, { "epoch": 0.9863888613993286, "grad_norm": 0.13065792620182037, "learning_rate": 0.0005, "loss": 2.1118, "step": 259150 }, { "epoch": 0.9864269238674512, "grad_norm": 0.1385491043329239, "learning_rate": 0.0005, "loss": 2.1058, "step": 259160 }, { "epoch": 0.9864649863355739, "grad_norm": 0.11696495860815048, "learning_rate": 0.0005, "loss": 2.0916, "step": 259170 }, { "epoch": 0.9865030488036967, "grad_norm": 0.1375739574432373, "learning_rate": 0.0005, "loss": 2.0982, "step": 259180 }, { "epoch": 0.9865411112718193, "grad_norm": 0.13284343481063843, "learning_rate": 0.0005, "loss": 2.1221, "step": 259190 }, { "epoch": 0.986579173739942, "grad_norm": 0.12562021613121033, "learning_rate": 0.0005, "loss": 2.0988, "step": 259200 }, { "epoch": 0.9866172362080646, "grad_norm": 0.11927879601716995, "learning_rate": 0.0005, "loss": 2.0876, "step": 259210 }, { "epoch": 0.9866552986761874, "grad_norm": 0.12254306674003601, "learning_rate": 0.0005, "loss": 2.1014, "step": 259220 }, { "epoch": 0.98669336114431, "grad_norm": 0.1201302781701088, "learning_rate": 0.0005, "loss": 2.0942, "step": 259230 }, { "epoch": 0.9867314236124327, "grad_norm": 0.12187743186950684, "learning_rate": 0.0005, "loss": 2.0901, "step": 259240 }, { "epoch": 0.9867694860805554, "grad_norm": 0.11292102187871933, "learning_rate": 0.0005, "loss": 2.0807, "step": 259250 }, { "epoch": 0.9868075485486781, "grad_norm": 0.12742412090301514, "learning_rate": 0.0005, "loss": 2.1109, "step": 259260 }, { "epoch": 0.9868456110168008, "grad_norm": 0.1279342919588089, "learning_rate": 0.0005, "loss": 2.1088, "step": 259270 }, { "epoch": 0.9868836734849235, "grad_norm": 0.12992903590202332, "learning_rate": 0.0005, "loss": 2.0926, "step": 259280 }, { "epoch": 0.9869217359530461, "grad_norm": 0.14792388677597046, "learning_rate": 0.0005, "loss": 2.1004, "step": 259290 }, { "epoch": 0.9869597984211689, "grad_norm": 0.13319194316864014, "learning_rate": 0.0005, "loss": 2.0976, "step": 259300 }, { "epoch": 0.9869978608892915, "grad_norm": 0.1406574845314026, "learning_rate": 0.0005, "loss": 2.1123, "step": 259310 }, { "epoch": 0.9870359233574142, "grad_norm": 0.12122943997383118, "learning_rate": 0.0005, "loss": 2.094, "step": 259320 }, { "epoch": 0.9870739858255368, "grad_norm": 0.12128177285194397, "learning_rate": 0.0005, "loss": 2.111, "step": 259330 }, { "epoch": 0.9871120482936595, "grad_norm": 0.1400650441646576, "learning_rate": 0.0005, "loss": 2.097, "step": 259340 }, { "epoch": 0.9871501107617823, "grad_norm": 0.1302204579114914, "learning_rate": 0.0005, "loss": 2.1148, "step": 259350 }, { "epoch": 0.9871881732299049, "grad_norm": 0.12166402488946915, "learning_rate": 0.0005, "loss": 2.0991, "step": 259360 }, { "epoch": 0.9872262356980276, "grad_norm": 0.1307765245437622, "learning_rate": 0.0005, "loss": 2.0932, "step": 259370 }, { "epoch": 0.9872642981661502, "grad_norm": 0.1271083652973175, "learning_rate": 0.0005, "loss": 2.0974, "step": 259380 }, { "epoch": 0.987302360634273, "grad_norm": 0.12263017147779465, "learning_rate": 0.0005, "loss": 2.1272, "step": 259390 }, { "epoch": 0.9873404231023957, "grad_norm": 0.13138532638549805, "learning_rate": 0.0005, "loss": 2.0947, "step": 259400 }, { "epoch": 0.9873784855705183, "grad_norm": 0.1262940764427185, "learning_rate": 0.0005, "loss": 2.0935, "step": 259410 }, { "epoch": 0.987416548038641, "grad_norm": 0.12203992903232574, "learning_rate": 0.0005, "loss": 2.0917, "step": 259420 }, { "epoch": 0.9874546105067638, "grad_norm": 0.13928186893463135, "learning_rate": 0.0005, "loss": 2.1157, "step": 259430 }, { "epoch": 0.9874926729748864, "grad_norm": 0.13313859701156616, "learning_rate": 0.0005, "loss": 2.0918, "step": 259440 }, { "epoch": 0.9875307354430091, "grad_norm": 0.1350223273038864, "learning_rate": 0.0005, "loss": 2.1118, "step": 259450 }, { "epoch": 0.9875687979111317, "grad_norm": 0.12552247941493988, "learning_rate": 0.0005, "loss": 2.0922, "step": 259460 }, { "epoch": 0.9876068603792544, "grad_norm": 0.1323108673095703, "learning_rate": 0.0005, "loss": 2.083, "step": 259470 }, { "epoch": 0.9876449228473771, "grad_norm": 0.12774093449115753, "learning_rate": 0.0005, "loss": 2.1119, "step": 259480 }, { "epoch": 0.9876829853154998, "grad_norm": 0.14221549034118652, "learning_rate": 0.0005, "loss": 2.0833, "step": 259490 }, { "epoch": 0.9877210477836225, "grad_norm": 0.13134565949440002, "learning_rate": 0.0005, "loss": 2.0932, "step": 259500 }, { "epoch": 0.9877591102517451, "grad_norm": 0.12980684638023376, "learning_rate": 0.0005, "loss": 2.0976, "step": 259510 }, { "epoch": 0.9877971727198679, "grad_norm": 0.13035202026367188, "learning_rate": 0.0005, "loss": 2.1068, "step": 259520 }, { "epoch": 0.9878352351879905, "grad_norm": 0.13434964418411255, "learning_rate": 0.0005, "loss": 2.1018, "step": 259530 }, { "epoch": 0.9878732976561132, "grad_norm": 0.12856483459472656, "learning_rate": 0.0005, "loss": 2.1048, "step": 259540 }, { "epoch": 0.9879113601242359, "grad_norm": 0.1277189999818802, "learning_rate": 0.0005, "loss": 2.115, "step": 259550 }, { "epoch": 0.9879494225923586, "grad_norm": 0.1284840852022171, "learning_rate": 0.0005, "loss": 2.1032, "step": 259560 }, { "epoch": 0.9879874850604813, "grad_norm": 0.11731751263141632, "learning_rate": 0.0005, "loss": 2.0968, "step": 259570 }, { "epoch": 0.9880255475286039, "grad_norm": 0.13612335920333862, "learning_rate": 0.0005, "loss": 2.0886, "step": 259580 }, { "epoch": 0.9880636099967266, "grad_norm": 0.12836602330207825, "learning_rate": 0.0005, "loss": 2.0996, "step": 259590 }, { "epoch": 0.9881016724648493, "grad_norm": 0.13179896771907806, "learning_rate": 0.0005, "loss": 2.1122, "step": 259600 }, { "epoch": 0.988139734932972, "grad_norm": 0.1250675916671753, "learning_rate": 0.0005, "loss": 2.0987, "step": 259610 }, { "epoch": 0.9881777974010947, "grad_norm": 0.12391552329063416, "learning_rate": 0.0005, "loss": 2.1109, "step": 259620 }, { "epoch": 0.9882158598692173, "grad_norm": 0.1251387745141983, "learning_rate": 0.0005, "loss": 2.1006, "step": 259630 }, { "epoch": 0.98825392233734, "grad_norm": 0.13607516884803772, "learning_rate": 0.0005, "loss": 2.0868, "step": 259640 }, { "epoch": 0.9882919848054628, "grad_norm": 0.1332910805940628, "learning_rate": 0.0005, "loss": 2.0971, "step": 259650 }, { "epoch": 0.9883300472735854, "grad_norm": 0.12459276616573334, "learning_rate": 0.0005, "loss": 2.0953, "step": 259660 }, { "epoch": 0.9883681097417081, "grad_norm": 0.14937591552734375, "learning_rate": 0.0005, "loss": 2.1177, "step": 259670 }, { "epoch": 0.9884061722098307, "grad_norm": 0.11843692511320114, "learning_rate": 0.0005, "loss": 2.0845, "step": 259680 }, { "epoch": 0.9884442346779535, "grad_norm": 0.13042514026165009, "learning_rate": 0.0005, "loss": 2.1059, "step": 259690 }, { "epoch": 0.9884822971460762, "grad_norm": 0.12332228571176529, "learning_rate": 0.0005, "loss": 2.0964, "step": 259700 }, { "epoch": 0.9885203596141988, "grad_norm": 0.12637607753276825, "learning_rate": 0.0005, "loss": 2.0998, "step": 259710 }, { "epoch": 0.9885584220823215, "grad_norm": 0.11896184086799622, "learning_rate": 0.0005, "loss": 2.0866, "step": 259720 }, { "epoch": 0.9885964845504442, "grad_norm": 0.118443563580513, "learning_rate": 0.0005, "loss": 2.1033, "step": 259730 }, { "epoch": 0.9886345470185669, "grad_norm": 0.12883390486240387, "learning_rate": 0.0005, "loss": 2.1018, "step": 259740 }, { "epoch": 0.9886726094866896, "grad_norm": 0.12184642255306244, "learning_rate": 0.0005, "loss": 2.093, "step": 259750 }, { "epoch": 0.9887106719548122, "grad_norm": 0.12203714996576309, "learning_rate": 0.0005, "loss": 2.1054, "step": 259760 }, { "epoch": 0.9887487344229349, "grad_norm": 0.13342252373695374, "learning_rate": 0.0005, "loss": 2.1098, "step": 259770 }, { "epoch": 0.9887867968910576, "grad_norm": 0.12597885727882385, "learning_rate": 0.0005, "loss": 2.1023, "step": 259780 }, { "epoch": 0.9888248593591803, "grad_norm": 0.13244442641735077, "learning_rate": 0.0005, "loss": 2.0948, "step": 259790 }, { "epoch": 0.988862921827303, "grad_norm": 0.1264996975660324, "learning_rate": 0.0005, "loss": 2.1062, "step": 259800 }, { "epoch": 0.9889009842954256, "grad_norm": 0.12919080257415771, "learning_rate": 0.0005, "loss": 2.0979, "step": 259810 }, { "epoch": 0.9889390467635484, "grad_norm": 0.11465083807706833, "learning_rate": 0.0005, "loss": 2.1043, "step": 259820 }, { "epoch": 0.988977109231671, "grad_norm": 0.14477968215942383, "learning_rate": 0.0005, "loss": 2.0895, "step": 259830 }, { "epoch": 0.9890151716997937, "grad_norm": 0.13007590174674988, "learning_rate": 0.0005, "loss": 2.0981, "step": 259840 }, { "epoch": 0.9890532341679164, "grad_norm": 0.1438094526529312, "learning_rate": 0.0005, "loss": 2.1073, "step": 259850 }, { "epoch": 0.9890912966360391, "grad_norm": 0.1194855123758316, "learning_rate": 0.0005, "loss": 2.1134, "step": 259860 }, { "epoch": 0.9891293591041618, "grad_norm": 0.13489076495170593, "learning_rate": 0.0005, "loss": 2.1001, "step": 259870 }, { "epoch": 0.9891674215722844, "grad_norm": 0.12890882790088654, "learning_rate": 0.0005, "loss": 2.0964, "step": 259880 }, { "epoch": 0.9892054840404071, "grad_norm": 0.12260407209396362, "learning_rate": 0.0005, "loss": 2.1036, "step": 259890 }, { "epoch": 0.9892435465085297, "grad_norm": 0.13415499031543732, "learning_rate": 0.0005, "loss": 2.0939, "step": 259900 }, { "epoch": 0.9892816089766525, "grad_norm": 0.13491657376289368, "learning_rate": 0.0005, "loss": 2.0905, "step": 259910 }, { "epoch": 0.9893196714447752, "grad_norm": 0.14124566316604614, "learning_rate": 0.0005, "loss": 2.1005, "step": 259920 }, { "epoch": 0.9893577339128978, "grad_norm": 0.12449941784143448, "learning_rate": 0.0005, "loss": 2.1147, "step": 259930 }, { "epoch": 0.9893957963810205, "grad_norm": 0.1191040500998497, "learning_rate": 0.0005, "loss": 2.1082, "step": 259940 }, { "epoch": 0.9894338588491433, "grad_norm": 0.14282147586345673, "learning_rate": 0.0005, "loss": 2.0846, "step": 259950 }, { "epoch": 0.9894719213172659, "grad_norm": 0.11729595065116882, "learning_rate": 0.0005, "loss": 2.089, "step": 259960 }, { "epoch": 0.9895099837853886, "grad_norm": 0.12624691426753998, "learning_rate": 0.0005, "loss": 2.1154, "step": 259970 }, { "epoch": 0.9895480462535112, "grad_norm": 0.1268104761838913, "learning_rate": 0.0005, "loss": 2.1085, "step": 259980 }, { "epoch": 0.989586108721634, "grad_norm": 0.12314214557409286, "learning_rate": 0.0005, "loss": 2.0865, "step": 259990 }, { "epoch": 0.9896241711897567, "grad_norm": 0.12357240170240402, "learning_rate": 0.0005, "loss": 2.1092, "step": 260000 }, { "epoch": 0.9896622336578793, "grad_norm": 0.1274331957101822, "learning_rate": 0.000493545027756321, "loss": 2.0947, "step": 260010 }, { "epoch": 0.989700296126002, "grad_norm": 0.13679982721805573, "learning_rate": 0.0004908712907082472, "loss": 2.0972, "step": 260020 }, { "epoch": 0.9897383585941246, "grad_norm": 0.12808233499526978, "learning_rate": 0.000488819660112501, "loss": 2.1004, "step": 260030 }, { "epoch": 0.9897764210622474, "grad_norm": 0.1215348020195961, "learning_rate": 0.000487090055512642, "loss": 2.0961, "step": 260040 }, { "epoch": 0.98981448353037, "grad_norm": 0.11865545064210892, "learning_rate": 0.0004855662432702594, "loss": 2.0994, "step": 260050 }, { "epoch": 0.9898525459984927, "grad_norm": 0.12360881268978119, "learning_rate": 0.0004841886116991581, "loss": 2.103, "step": 260060 }, { "epoch": 0.9898906084666154, "grad_norm": 0.13874591886997223, "learning_rate": 0.00048292174872340064, "loss": 2.0962, "step": 260070 }, { "epoch": 0.9899286709347381, "grad_norm": 0.12478182464838028, "learning_rate": 0.00048174258141649444, "loss": 2.084, "step": 260080 }, { "epoch": 0.9899667334028608, "grad_norm": 0.1325121968984604, "learning_rate": 0.00048063508326896294, "loss": 2.0916, "step": 260090 }, { "epoch": 0.9900047958709834, "grad_norm": 0.13993875682353973, "learning_rate": 0.00047958758547680686, "loss": 2.0808, "step": 260100 }, { "epoch": 0.9900428583391061, "grad_norm": 0.13443487882614136, "learning_rate": 0.0004785912790355581, "loss": 2.0752, "step": 260110 }, { "epoch": 0.9900809208072289, "grad_norm": 0.1270710825920105, "learning_rate": 0.00047763932022500214, "loss": 2.0884, "step": 260120 }, { "epoch": 0.9901189832753515, "grad_norm": 0.12298639118671417, "learning_rate": 0.0004767262665937184, "loss": 2.0913, "step": 260130 }, { "epoch": 0.9901570457434742, "grad_norm": 0.13086600601673126, "learning_rate": 0.0004758477054230176, "loss": 2.1023, "step": 260140 }, { "epoch": 0.9901951082115968, "grad_norm": 0.1266835331916809, "learning_rate": 0.000475, "loss": 2.0924, "step": 260150 }, { "epoch": 0.9902331706797196, "grad_norm": 0.13983656466007233, "learning_rate": 0.0004741801110252839, "loss": 2.0947, "step": 260160 }, { "epoch": 0.9902712331478423, "grad_norm": 0.12102605402469635, "learning_rate": 0.00047338546762888114, "loss": 2.0982, "step": 260170 }, { "epoch": 0.9903092956159649, "grad_norm": 0.13553886115550995, "learning_rate": 0.0004726138721247417, "loss": 2.0925, "step": 260180 }, { "epoch": 0.9903473580840876, "grad_norm": 0.11878236383199692, "learning_rate": 0.00047186342830644315, "loss": 2.086, "step": 260190 }, { "epoch": 0.9903854205522102, "grad_norm": 0.12370821088552475, "learning_rate": 0.00047113248654051873, "loss": 2.0944, "step": 260200 }, { "epoch": 0.990423483020333, "grad_norm": 0.12732823193073273, "learning_rate": 0.0004704196010845019, "loss": 2.1089, "step": 260210 }, { "epoch": 0.9904615454884557, "grad_norm": 0.1242508515715599, "learning_rate": 0.00046972349645902506, "loss": 2.1049, "step": 260220 }, { "epoch": 0.9904996079565783, "grad_norm": 0.12015186250209808, "learning_rate": 0.00046904304063165547, "loss": 2.0988, "step": 260230 }, { "epoch": 0.990537670424701, "grad_norm": 0.11646655201911926, "learning_rate": 0.00046837722339831625, "loss": 2.0995, "step": 260240 }, { "epoch": 0.9905757328928237, "grad_norm": 0.12747345864772797, "learning_rate": 0.0004677251387816049, "loss": 2.0825, "step": 260250 }, { "epoch": 0.9906137953609464, "grad_norm": 0.11547389626502991, "learning_rate": 0.00046708597056978085, "loss": 2.0941, "step": 260260 }, { "epoch": 0.9906518578290691, "grad_norm": 0.13355474174022675, "learning_rate": 0.0004664589803375032, "loss": 2.0938, "step": 260270 }, { "epoch": 0.9906899202971917, "grad_norm": 0.10946942865848541, "learning_rate": 0.0004658434974468013, "loss": 2.1177, "step": 260280 }, { "epoch": 0.9907279827653145, "grad_norm": 0.13152863085269928, "learning_rate": 0.00046523891064230964, "loss": 2.0944, "step": 260290 }, { "epoch": 0.9907660452334371, "grad_norm": 0.1293293684720993, "learning_rate": 0.00046464466094067264, "loss": 2.0836, "step": 260300 }, { "epoch": 0.9908041077015598, "grad_norm": 0.15848124027252197, "learning_rate": 0.00046406023557858694, "loss": 2.1005, "step": 260310 }, { "epoch": 0.9908421701696825, "grad_norm": 0.11891598254442215, "learning_rate": 0.0004634851628329889, "loss": 2.085, "step": 260320 }, { "epoch": 0.9908802326378051, "grad_norm": 0.13395057618618011, "learning_rate": 0.0004629190075645217, "loss": 2.0884, "step": 260330 }, { "epoch": 0.9909182951059279, "grad_norm": 0.1368226706981659, "learning_rate": 0.00046236136736454596, "loss": 2.0924, "step": 260340 }, { "epoch": 0.9909563575740505, "grad_norm": 0.12232238054275513, "learning_rate": 0.00046181186920870134, "loss": 2.0994, "step": 260350 }, { "epoch": 0.9909944200421732, "grad_norm": 0.12242230027914047, "learning_rate": 0.00046127016653792587, "loss": 2.1126, "step": 260360 }, { "epoch": 0.9910324825102959, "grad_norm": 0.13534030318260193, "learning_rate": 0.0004607359367020342, "loss": 2.0776, "step": 260370 }, { "epoch": 0.9910705449784186, "grad_norm": 0.12158862501382828, "learning_rate": 0.00046020887871228895, "loss": 2.09, "step": 260380 }, { "epoch": 0.9911086074465413, "grad_norm": 0.12573935091495514, "learning_rate": 0.00045968871125850726, "loss": 2.0939, "step": 260390 }, { "epoch": 0.9911466699146639, "grad_norm": 0.13777689635753632, "learning_rate": 0.0004591751709536137, "loss": 2.0933, "step": 260400 }, { "epoch": 0.9911847323827866, "grad_norm": 0.14061543345451355, "learning_rate": 0.0004586680107745425, "loss": 2.0882, "step": 260410 }, { "epoch": 0.9912227948509094, "grad_norm": 0.12106756120920181, "learning_rate": 0.00045816699867329627, "loss": 2.0974, "step": 260420 }, { "epoch": 0.991260857319032, "grad_norm": 0.11237644404172897, "learning_rate": 0.00045767191633599904, "loss": 2.0765, "step": 260430 }, { "epoch": 0.9912989197871547, "grad_norm": 0.11515557765960693, "learning_rate": 0.00045718255807111627, "loss": 2.0915, "step": 260440 }, { "epoch": 0.9913369822552773, "grad_norm": 0.13092699646949768, "learning_rate": 0.00045669872981077806, "loss": 2.1002, "step": 260450 }, { "epoch": 0.9913750447234, "grad_norm": 0.1275516003370285, "learning_rate": 0.00045622024821145434, "loss": 2.087, "step": 260460 }, { "epoch": 0.9914131071915228, "grad_norm": 0.11951445043087006, "learning_rate": 0.00045574693984216084, "loss": 2.0958, "step": 260470 }, { "epoch": 0.9914511696596454, "grad_norm": 0.11712721735239029, "learning_rate": 0.0004552786404500042, "loss": 2.1043, "step": 260480 }, { "epoch": 0.9914892321277681, "grad_norm": 0.1637088805437088, "learning_rate": 0.00045481519429424683, "loss": 2.0944, "step": 260490 }, { "epoch": 0.9915272945958907, "grad_norm": 0.12922069430351257, "learning_rate": 0.0004543564535412362, "loss": 2.0902, "step": 260500 }, { "epoch": 0.9915653570640135, "grad_norm": 0.12950067222118378, "learning_rate": 0.0004539022777135356, "loss": 2.083, "step": 260510 }, { "epoch": 0.9916034195321362, "grad_norm": 0.1143832728266716, "learning_rate": 0.0004534525331874368, "loss": 2.0961, "step": 260520 }, { "epoch": 0.9916414820002588, "grad_norm": 0.12358095496892929, "learning_rate": 0.00045300709273376105, "loss": 2.0873, "step": 260530 }, { "epoch": 0.9916795444683815, "grad_norm": 0.12123238295316696, "learning_rate": 0.00045256583509747434, "loss": 2.0912, "step": 260540 }, { "epoch": 0.9917176069365042, "grad_norm": 0.1274598389863968, "learning_rate": 0.0004521286446121831, "loss": 2.0981, "step": 260550 }, { "epoch": 0.9917556694046269, "grad_norm": 0.1331852376461029, "learning_rate": 0.0004516954108460352, "loss": 2.0915, "step": 260560 }, { "epoch": 0.9917937318727496, "grad_norm": 0.12474968284368515, "learning_rate": 0.0004512660282759552, "loss": 2.0815, "step": 260570 }, { "epoch": 0.9918317943408722, "grad_norm": 0.12475012987852097, "learning_rate": 0.00045084039598749126, "loss": 2.0903, "step": 260580 }, { "epoch": 0.991869856808995, "grad_norm": 0.3353778123855591, "learning_rate": 0.00045041841739785495, "loss": 2.0809, "step": 260590 }, { "epoch": 0.9919079192771176, "grad_norm": 0.13950788974761963, "learning_rate": 0.00045000000000000004, "loss": 2.0717, "step": 260600 }, { "epoch": 0.9919459817452403, "grad_norm": 0.12284799665212631, "learning_rate": 0.00044958505512581945, "loss": 2.0936, "step": 260610 }, { "epoch": 0.991984044213363, "grad_norm": 0.12896114587783813, "learning_rate": 0.00044917349772674364, "loss": 2.0884, "step": 260620 }, { "epoch": 0.9920221066814856, "grad_norm": 0.12631112337112427, "learning_rate": 0.000448765246170202, "loss": 2.0751, "step": 260630 }, { "epoch": 0.9920601691496084, "grad_norm": 0.11359766870737076, "learning_rate": 0.0004483602220505678, "loss": 2.0827, "step": 260640 }, { "epoch": 0.992098231617731, "grad_norm": 0.11313661932945251, "learning_rate": 0.0004479583500133467, "loss": 2.0941, "step": 260650 }, { "epoch": 0.9921362940858537, "grad_norm": 0.13781104981899261, "learning_rate": 0.0004475595575914924, "loss": 2.0975, "step": 260660 }, { "epoch": 0.9921743565539763, "grad_norm": 0.11699791997671127, "learning_rate": 0.00044716377505284215, "loss": 2.0763, "step": 260670 }, { "epoch": 0.9922124190220991, "grad_norm": 0.12874886393547058, "learning_rate": 0.0004467709352577623, "loss": 2.1004, "step": 260680 }, { "epoch": 0.9922504814902218, "grad_norm": 0.1220022514462471, "learning_rate": 0.00044638097352618194, "loss": 2.0787, "step": 260690 }, { "epoch": 0.9922885439583444, "grad_norm": 0.13112017512321472, "learning_rate": 0.0004459938275132678, "loss": 2.0947, "step": 260700 }, { "epoch": 0.9923266064264671, "grad_norm": 0.1277540922164917, "learning_rate": 0.0004456094370930643, "loss": 2.0734, "step": 260710 }, { "epoch": 0.9923646688945899, "grad_norm": 0.13177043199539185, "learning_rate": 0.0004452277442494834, "loss": 2.0818, "step": 260720 }, { "epoch": 0.9924027313627125, "grad_norm": 0.1327768862247467, "learning_rate": 0.0004448486929740857, "loss": 2.0947, "step": 260730 }, { "epoch": 0.9924407938308352, "grad_norm": 0.13766483962535858, "learning_rate": 0.0004444722291701411, "loss": 2.0992, "step": 260740 }, { "epoch": 0.9924788562989578, "grad_norm": 0.12571793794631958, "learning_rate": 0.00044409830056250525, "loss": 2.1074, "step": 260750 }, { "epoch": 0.9925169187670805, "grad_norm": 0.13008525967597961, "learning_rate": 0.00044372685661288623, "loss": 2.0903, "step": 260760 }, { "epoch": 0.9925549812352032, "grad_norm": 0.12241663038730621, "learning_rate": 0.0004433578484401119, "loss": 2.1075, "step": 260770 }, { "epoch": 0.9925930437033259, "grad_norm": 0.1442648470401764, "learning_rate": 0.0004429912287450431, "loss": 2.0914, "step": 260780 }, { "epoch": 0.9926311061714486, "grad_norm": 0.14241662621498108, "learning_rate": 0.00044262695173980497, "loss": 2.0961, "step": 260790 }, { "epoch": 0.9926691686395712, "grad_norm": 0.12129341065883636, "learning_rate": 0.0004422649730810374, "loss": 2.083, "step": 260800 }, { "epoch": 0.992707231107694, "grad_norm": 0.11631777882575989, "learning_rate": 0.00044190524980688875, "loss": 2.085, "step": 260810 }, { "epoch": 0.9927452935758166, "grad_norm": 0.1259375363588333, "learning_rate": 0.0004415477402774994, "loss": 2.0885, "step": 260820 }, { "epoch": 0.9927833560439393, "grad_norm": 0.13364864885807037, "learning_rate": 0.0004411924041187422, "loss": 2.0886, "step": 260830 }, { "epoch": 0.992821418512062, "grad_norm": 0.11769169569015503, "learning_rate": 0.00044083920216900387, "loss": 2.0932, "step": 260840 }, { "epoch": 0.9928594809801847, "grad_norm": 0.12409477680921555, "learning_rate": 0.0004404880964288096, "loss": 2.0867, "step": 260850 }, { "epoch": 0.9928975434483074, "grad_norm": 0.12123719602823257, "learning_rate": 0.00044013905001310676, "loss": 2.1011, "step": 260860 }, { "epoch": 0.99293560591643, "grad_norm": 0.1280791163444519, "learning_rate": 0.00043979202710603854, "loss": 2.0903, "step": 260870 }, { "epoch": 0.9929736683845527, "grad_norm": 0.11044704169034958, "learning_rate": 0.00043944699291805016, "loss": 2.0946, "step": 260880 }, { "epoch": 0.9930117308526754, "grad_norm": 0.12645240128040314, "learning_rate": 0.000439103913645183, "loss": 2.1006, "step": 260890 }, { "epoch": 0.9930497933207981, "grad_norm": 0.12837836146354675, "learning_rate": 0.00043876275643042056, "loss": 2.0736, "step": 260900 }, { "epoch": 0.9930878557889208, "grad_norm": 0.12500976026058197, "learning_rate": 0.0004384234893269628, "loss": 2.0912, "step": 260910 }, { "epoch": 0.9931259182570434, "grad_norm": 0.13092707097530365, "learning_rate": 0.00043808608126331093, "loss": 2.0816, "step": 260920 }, { "epoch": 0.9931639807251661, "grad_norm": 0.14441804587841034, "learning_rate": 0.00043775050201005634, "loss": 2.0976, "step": 260930 }, { "epoch": 0.9932020431932889, "grad_norm": 0.13155049085617065, "learning_rate": 0.0004374167221482714, "loss": 2.0869, "step": 260940 }, { "epoch": 0.9932401056614115, "grad_norm": 0.15008226037025452, "learning_rate": 0.00043708471303941047, "loss": 2.0841, "step": 260950 }, { "epoch": 0.9932781681295342, "grad_norm": 0.1321364790201187, "learning_rate": 0.00043675444679663243, "loss": 2.1137, "step": 260960 }, { "epoch": 0.9933162305976568, "grad_norm": 0.11087088286876678, "learning_rate": 0.00043642589625746456, "loss": 2.102, "step": 260970 }, { "epoch": 0.9933542930657796, "grad_norm": 0.12107894569635391, "learning_rate": 0.0004360990349577306, "loss": 2.1037, "step": 260980 }, { "epoch": 0.9933923555339023, "grad_norm": 0.11820726096630096, "learning_rate": 0.0004357738371066743, "loss": 2.0826, "step": 260990 }, { "epoch": 0.9934304180020249, "grad_norm": 0.12801015377044678, "learning_rate": 0.00043545027756320976, "loss": 2.0897, "step": 261000 }, { "epoch": 0.9934684804701476, "grad_norm": 0.12365680187940598, "learning_rate": 0.00043512833181323814, "loss": 2.072, "step": 261010 }, { "epoch": 0.9935065429382703, "grad_norm": 0.12400777637958527, "learning_rate": 0.0004348079759479735, "loss": 2.0791, "step": 261020 }, { "epoch": 0.993544605406393, "grad_norm": 0.11780333518981934, "learning_rate": 0.00043448918664322153, "loss": 2.0863, "step": 261030 }, { "epoch": 0.9935826678745157, "grad_norm": 0.12443245947360992, "learning_rate": 0.0004341719411395617, "loss": 2.0896, "step": 261040 }, { "epoch": 0.9936207303426383, "grad_norm": 0.13305029273033142, "learning_rate": 0.00043385621722338526, "loss": 2.0816, "step": 261050 }, { "epoch": 0.993658792810761, "grad_norm": 0.123264841735363, "learning_rate": 0.0004335419932087437, "loss": 2.0981, "step": 261060 }, { "epoch": 0.9936968552788837, "grad_norm": 0.17386004328727722, "learning_rate": 0.0004332292479199663, "loss": 2.0907, "step": 261070 }, { "epoch": 0.9937349177470064, "grad_norm": 0.13856405019760132, "learning_rate": 0.00043291796067500634, "loss": 2.0982, "step": 261080 }, { "epoch": 0.993772980215129, "grad_norm": 0.11440404504537582, "learning_rate": 0.0004326081112694809, "loss": 2.0841, "step": 261090 }, { "epoch": 0.9938110426832517, "grad_norm": 0.11984309554100037, "learning_rate": 0.00043229967996136697, "loss": 2.0697, "step": 261100 }, { "epoch": 0.9938491051513745, "grad_norm": 0.13008153438568115, "learning_rate": 0.0004319926474563228, "loss": 2.0884, "step": 261110 }, { "epoch": 0.9938871676194971, "grad_norm": 0.11998965591192245, "learning_rate": 0.0004316869948936027, "loss": 2.0961, "step": 261120 }, { "epoch": 0.9939252300876198, "grad_norm": 0.12517978250980377, "learning_rate": 0.00043138270383253697, "loss": 2.0747, "step": 261130 }, { "epoch": 0.9939632925557425, "grad_norm": 0.12591873109340668, "learning_rate": 0.00043107975623954886, "loss": 2.0745, "step": 261140 }, { "epoch": 0.9940013550238652, "grad_norm": 0.12345649302005768, "learning_rate": 0.0004307781344756827, "loss": 2.0917, "step": 261150 }, { "epoch": 0.9940394174919879, "grad_norm": 0.14185993373394012, "learning_rate": 0.0004304778212846193, "loss": 2.0787, "step": 261160 }, { "epoch": 0.9940774799601105, "grad_norm": 0.10899876803159714, "learning_rate": 0.00043017879978115534, "loss": 2.0792, "step": 261170 }, { "epoch": 0.9941155424282332, "grad_norm": 0.11979634314775467, "learning_rate": 0.00042988105344012457, "loss": 2.0861, "step": 261180 }, { "epoch": 0.9941536048963558, "grad_norm": 0.1269412636756897, "learning_rate": 0.0004295845660857413, "loss": 2.0928, "step": 261190 }, { "epoch": 0.9941916673644786, "grad_norm": 0.12314164638519287, "learning_rate": 0.00042928932188134527, "loss": 2.0722, "step": 261200 }, { "epoch": 0.9942297298326013, "grad_norm": 0.13209030032157898, "learning_rate": 0.00042899530531953067, "loss": 2.0971, "step": 261210 }, { "epoch": 0.9942677923007239, "grad_norm": 0.12487448006868362, "learning_rate": 0.00042870250121264185, "loss": 2.082, "step": 261220 }, { "epoch": 0.9943058547688466, "grad_norm": 0.1182442456483841, "learning_rate": 0.00042841089468361826, "loss": 2.0888, "step": 261230 }, { "epoch": 0.9943439172369694, "grad_norm": 0.11452000588178635, "learning_rate": 0.0004281204711571739, "loss": 2.0861, "step": 261240 }, { "epoch": 0.994381979705092, "grad_norm": 0.13320042192935944, "learning_rate": 0.0004278312163512968, "loss": 2.0745, "step": 261250 }, { "epoch": 0.9944200421732147, "grad_norm": 0.12851102650165558, "learning_rate": 0.0004275431162690528, "loss": 2.0891, "step": 261260 }, { "epoch": 0.9944581046413373, "grad_norm": 0.13266323506832123, "learning_rate": 0.00042725615719068266, "loss": 2.0889, "step": 261270 }, { "epoch": 0.9944961671094601, "grad_norm": 0.1309056431055069, "learning_rate": 0.0004269703256659779, "loss": 2.0873, "step": 261280 }, { "epoch": 0.9945342295775828, "grad_norm": 0.12090447545051575, "learning_rate": 0.0004266856085069241, "loss": 2.0863, "step": 261290 }, { "epoch": 0.9945722920457054, "grad_norm": 0.12500663101673126, "learning_rate": 0.00042640199278060124, "loss": 2.0944, "step": 261300 }, { "epoch": 0.9946103545138281, "grad_norm": 0.11978045105934143, "learning_rate": 0.0004261194658023284, "loss": 2.0696, "step": 261310 }, { "epoch": 0.9946484169819507, "grad_norm": 0.1339166760444641, "learning_rate": 0.0004258380151290434, "loss": 2.0732, "step": 261320 }, { "epoch": 0.9946864794500735, "grad_norm": 0.12607313692569733, "learning_rate": 0.000425557628552909, "loss": 2.09, "step": 261330 }, { "epoch": 0.9947245419181961, "grad_norm": 0.12124264240264893, "learning_rate": 0.0004252782940951337, "loss": 2.0862, "step": 261340 }, { "epoch": 0.9947626043863188, "grad_norm": 0.12462054193019867, "learning_rate": 0.000425, "loss": 2.1053, "step": 261350 }, { "epoch": 0.9948006668544415, "grad_norm": 0.1448441445827484, "learning_rate": 0.0004247227347290919, "loss": 2.0945, "step": 261360 }, { "epoch": 0.9948387293225642, "grad_norm": 0.11749083548784256, "learning_rate": 0.00042444648695571244, "loss": 2.0738, "step": 261370 }, { "epoch": 0.9948767917906869, "grad_norm": 0.129529669880867, "learning_rate": 0.0004241712455594845, "loss": 2.0758, "step": 261380 }, { "epoch": 0.9949148542588095, "grad_norm": 0.11888234317302704, "learning_rate": 0.0004238969996211275, "loss": 2.094, "step": 261390 }, { "epoch": 0.9949529167269322, "grad_norm": 0.1276368796825409, "learning_rate": 0.00042362373841740267, "loss": 2.0911, "step": 261400 }, { "epoch": 0.994990979195055, "grad_norm": 0.11603518575429916, "learning_rate": 0.00042335145141622057, "loss": 2.0785, "step": 261410 }, { "epoch": 0.9950290416631776, "grad_norm": 0.12183357775211334, "learning_rate": 0.0004230801282719045, "loss": 2.101, "step": 261420 }, { "epoch": 0.9950671041313003, "grad_norm": 0.12915317714214325, "learning_rate": 0.0004228097588206039, "loss": 2.0822, "step": 261430 }, { "epoch": 0.9951051665994229, "grad_norm": 0.12496720254421234, "learning_rate": 0.0004225403330758517, "loss": 2.0768, "step": 261440 }, { "epoch": 0.9951432290675457, "grad_norm": 0.13041459023952484, "learning_rate": 0.0004222718412242599, "loss": 2.0899, "step": 261450 }, { "epoch": 0.9951812915356684, "grad_norm": 0.1264573186635971, "learning_rate": 0.0004220042736213494, "loss": 2.0879, "step": 261460 }, { "epoch": 0.995219354003791, "grad_norm": 0.1156756654381752, "learning_rate": 0.0004217376207875074, "loss": 2.09, "step": 261470 }, { "epoch": 0.9952574164719137, "grad_norm": 0.11675167083740234, "learning_rate": 0.00042147187340406834, "loss": 2.0849, "step": 261480 }, { "epoch": 0.9952954789400363, "grad_norm": 0.13087327778339386, "learning_rate": 0.0004212070223095146, "loss": 2.0705, "step": 261490 }, { "epoch": 0.9953335414081591, "grad_norm": 0.12341038882732391, "learning_rate": 0.0004209430584957905, "loss": 2.0994, "step": 261500 }, { "epoch": 0.9953716038762818, "grad_norm": 0.1165153756737709, "learning_rate": 0.00042067997310472807, "loss": 2.0832, "step": 261510 }, { "epoch": 0.9954096663444044, "grad_norm": 0.12436539679765701, "learning_rate": 0.0004204177574245779, "loss": 2.0767, "step": 261520 }, { "epoch": 0.9954477288125271, "grad_norm": 0.13395121693611145, "learning_rate": 0.00042015640288664346, "loss": 2.0783, "step": 261530 }, { "epoch": 0.9954857912806498, "grad_norm": 0.13121996819972992, "learning_rate": 0.0004198959010620139, "loss": 2.0866, "step": 261540 }, { "epoch": 0.9955238537487725, "grad_norm": 0.11080297082662582, "learning_rate": 0.00041963624365839205, "loss": 2.0746, "step": 261550 }, { "epoch": 0.9955619162168952, "grad_norm": 0.12134183198213577, "learning_rate": 0.0004193774225170145, "loss": 2.0965, "step": 261560 }, { "epoch": 0.9955999786850178, "grad_norm": 0.13009509444236755, "learning_rate": 0.0004191194296096605, "loss": 2.0738, "step": 261570 }, { "epoch": 0.9956380411531406, "grad_norm": 0.1238146498799324, "learning_rate": 0.0004188622570357461, "loss": 2.081, "step": 261580 }, { "epoch": 0.9956761036212632, "grad_norm": 0.11727666854858398, "learning_rate": 0.0004186058970195015, "loss": 2.0848, "step": 261590 }, { "epoch": 0.9957141660893859, "grad_norm": 0.1214786097407341, "learning_rate": 0.0004183503419072274, "loss": 2.0727, "step": 261600 }, { "epoch": 0.9957522285575086, "grad_norm": 0.12340279668569565, "learning_rate": 0.0004180955841646292, "loss": 2.0892, "step": 261610 }, { "epoch": 0.9957902910256312, "grad_norm": 0.1235736683011055, "learning_rate": 0.00041784161637422506, "loss": 2.0726, "step": 261620 }, { "epoch": 0.995828353493754, "grad_norm": 0.11563219130039215, "learning_rate": 0.0004175884312328259, "loss": 2.0813, "step": 261630 }, { "epoch": 0.9958664159618766, "grad_norm": 0.12072248756885529, "learning_rate": 0.00041733602154908503, "loss": 2.0853, "step": 261640 }, { "epoch": 0.9959044784299993, "grad_norm": 0.13266229629516602, "learning_rate": 0.000417084380241115, "loss": 2.0867, "step": 261650 }, { "epoch": 0.995942540898122, "grad_norm": 0.11815812438726425, "learning_rate": 0.000416833500334169, "loss": 2.0762, "step": 261660 }, { "epoch": 0.9959806033662447, "grad_norm": 0.11936049163341522, "learning_rate": 0.00041658337495838535, "loss": 2.0797, "step": 261670 }, { "epoch": 0.9960186658343674, "grad_norm": 0.13057257235050201, "learning_rate": 0.0004163339973465924, "loss": 2.0801, "step": 261680 }, { "epoch": 0.99605672830249, "grad_norm": 0.12162986397743225, "learning_rate": 0.00041608536083217264, "loss": 2.0789, "step": 261690 }, { "epoch": 0.9960947907706127, "grad_norm": 0.13624535501003265, "learning_rate": 0.0004158374588469827, "loss": 2.0855, "step": 261700 }, { "epoch": 0.9961328532387355, "grad_norm": 0.1518181413412094, "learning_rate": 0.00041559028491932936, "loss": 2.0836, "step": 261710 }, { "epoch": 0.9961709157068581, "grad_norm": 0.11926338076591492, "learning_rate": 0.00041534383267199807, "loss": 2.0806, "step": 261720 }, { "epoch": 0.9962089781749808, "grad_norm": 0.12259402126073837, "learning_rate": 0.00041509809582033314, "loss": 2.0924, "step": 261730 }, { "epoch": 0.9962470406431034, "grad_norm": 0.11852733790874481, "learning_rate": 0.00041485306817036797, "loss": 2.0745, "step": 261740 }, { "epoch": 0.9962851031112261, "grad_norm": 0.1294088065624237, "learning_rate": 0.0004146087436170033, "loss": 2.0817, "step": 261750 }, { "epoch": 0.9963231655793489, "grad_norm": 0.12300239503383636, "learning_rate": 0.0004143651161422325, "loss": 2.0908, "step": 261760 }, { "epoch": 0.9963612280474715, "grad_norm": 0.1162174716591835, "learning_rate": 0.00041412217981341167, "loss": 2.1025, "step": 261770 }, { "epoch": 0.9963992905155942, "grad_norm": 0.1360090672969818, "learning_rate": 0.0004138799287815746, "loss": 2.0813, "step": 261780 }, { "epoch": 0.9964373529837168, "grad_norm": 0.1395946741104126, "learning_rate": 0.0004136383572797893, "loss": 2.0956, "step": 261790 }, { "epoch": 0.9964754154518396, "grad_norm": 0.11686591058969498, "learning_rate": 0.0004133974596215562, "loss": 2.092, "step": 261800 }, { "epoch": 0.9965134779199623, "grad_norm": 0.11687017232179642, "learning_rate": 0.0004131572301992465, "loss": 2.0956, "step": 261810 }, { "epoch": 0.9965515403880849, "grad_norm": 0.12350569665431976, "learning_rate": 0.00041291766348257916, "loss": 2.0794, "step": 261820 }, { "epoch": 0.9965896028562076, "grad_norm": 0.13105247914791107, "learning_rate": 0.0004126787540171351, "loss": 2.0609, "step": 261830 }, { "epoch": 0.9966276653243303, "grad_norm": 0.11866418272256851, "learning_rate": 0.0004124404964229087, "loss": 2.0755, "step": 261840 }, { "epoch": 0.996665727792453, "grad_norm": 0.11236187070608139, "learning_rate": 0.00041220288539289384, "loss": 2.0822, "step": 261850 }, { "epoch": 0.9967037902605757, "grad_norm": 0.11838629096746445, "learning_rate": 0.00041196591569170495, "loss": 2.0929, "step": 261860 }, { "epoch": 0.9967418527286983, "grad_norm": 0.12502582371234894, "learning_rate": 0.00041172958215423096, "loss": 2.0859, "step": 261870 }, { "epoch": 0.9967799151968211, "grad_norm": 0.14400994777679443, "learning_rate": 0.00041149387968432167, "loss": 2.0881, "step": 261880 }, { "epoch": 0.9968179776649437, "grad_norm": 0.12822756171226501, "learning_rate": 0.00041125880325350577, "loss": 2.0717, "step": 261890 }, { "epoch": 0.9968560401330664, "grad_norm": 0.1273794323205948, "learning_rate": 0.0004110243478997391, "loss": 2.0873, "step": 261900 }, { "epoch": 0.996894102601189, "grad_norm": 0.11934802681207657, "learning_rate": 0.00041079050872618245, "loss": 2.0776, "step": 261910 }, { "epoch": 0.9969321650693117, "grad_norm": 0.13123826682567596, "learning_rate": 0.0004105572809000084, "loss": 2.0772, "step": 261920 }, { "epoch": 0.9969702275374345, "grad_norm": 0.13744977116584778, "learning_rate": 0.000410324659651236, "loss": 2.0683, "step": 261930 }, { "epoch": 0.9970082900055571, "grad_norm": 0.11932683736085892, "learning_rate": 0.0004100926402715922, "loss": 2.0658, "step": 261940 }, { "epoch": 0.9970463524736798, "grad_norm": 0.1288817971944809, "learning_rate": 0.00040986121811340025, "loss": 2.079, "step": 261950 }, { "epoch": 0.9970844149418024, "grad_norm": 0.13244900107383728, "learning_rate": 0.0004096303885884936, "loss": 2.0807, "step": 261960 }, { "epoch": 0.9971224774099252, "grad_norm": 0.12538810074329376, "learning_rate": 0.00040940014716715414, "loss": 2.065, "step": 261970 }, { "epoch": 0.9971605398780479, "grad_norm": 0.12961310148239136, "learning_rate": 0.00040917048937707525, "loss": 2.0796, "step": 261980 }, { "epoch": 0.9971986023461705, "grad_norm": 0.1247786357998848, "learning_rate": 0.0004089414108023485, "loss": 2.0708, "step": 261990 }, { "epoch": 0.9972366648142932, "grad_norm": 0.11900941282510757, "learning_rate": 0.0004087129070824723, "loss": 2.0948, "step": 262000 }, { "epoch": 0.997274727282416, "grad_norm": 0.1165849044919014, "learning_rate": 0.0004084849739113844, "loss": 2.0735, "step": 262010 }, { "epoch": 0.9973127897505386, "grad_norm": 0.12156182527542114, "learning_rate": 0.0004082576070365141, "loss": 2.0774, "step": 262020 }, { "epoch": 0.9973508522186613, "grad_norm": 0.13087128102779388, "learning_rate": 0.0004080308022578574, "loss": 2.0855, "step": 262030 }, { "epoch": 0.9973889146867839, "grad_norm": 0.1311582624912262, "learning_rate": 0.00040780455542707114, "loss": 2.0852, "step": 262040 }, { "epoch": 0.9974269771549066, "grad_norm": 0.12414788454771042, "learning_rate": 0.00040757886244658823, "loss": 2.0563, "step": 262050 }, { "epoch": 0.9974650396230293, "grad_norm": 0.10987682640552521, "learning_rate": 0.0004073537192687514, "loss": 2.0742, "step": 262060 }, { "epoch": 0.997503102091152, "grad_norm": 0.12739349901676178, "learning_rate": 0.00040712912189496645, "loss": 2.0616, "step": 262070 }, { "epoch": 0.9975411645592747, "grad_norm": 0.12246967107057571, "learning_rate": 0.00040690506637487375, "loss": 2.066, "step": 262080 }, { "epoch": 0.9975792270273973, "grad_norm": 0.12437837570905685, "learning_rate": 0.0004066815488055372, "loss": 2.0731, "step": 262090 }, { "epoch": 0.9976172894955201, "grad_norm": 0.12276354432106018, "learning_rate": 0.00040645856533065144, "loss": 2.0863, "step": 262100 }, { "epoch": 0.9976553519636427, "grad_norm": 0.14230865240097046, "learning_rate": 0.0004062361121397653, "loss": 2.0718, "step": 262110 }, { "epoch": 0.9976934144317654, "grad_norm": 0.11511580646038055, "learning_rate": 0.0004060141854675221, "loss": 2.0909, "step": 262120 }, { "epoch": 0.9977314768998881, "grad_norm": 0.12784437835216522, "learning_rate": 0.0004057927815929162, "loss": 2.068, "step": 262130 }, { "epoch": 0.9977695393680108, "grad_norm": 0.12329355627298355, "learning_rate": 0.0004055718968385647, "loss": 2.0667, "step": 262140 }, { "epoch": 0.9978076018361335, "grad_norm": 0.12663282454013824, "learning_rate": 0.0004053515275699954, "loss": 2.06, "step": 262150 }, { "epoch": 0.9978456643042561, "grad_norm": 0.14040499925613403, "learning_rate": 0.0004051316701949486, "loss": 2.0831, "step": 262160 }, { "epoch": 0.9978837267723788, "grad_norm": 0.11923239380121231, "learning_rate": 0.00040491232116269393, "loss": 2.0734, "step": 262170 }, { "epoch": 0.9979217892405015, "grad_norm": 0.11212129890918732, "learning_rate": 0.00040469347696336135, "loss": 2.0828, "step": 262180 }, { "epoch": 0.9979598517086242, "grad_norm": 0.11588910222053528, "learning_rate": 0.000404475134127286, "loss": 2.0721, "step": 262190 }, { "epoch": 0.9979979141767469, "grad_norm": 0.11533407121896744, "learning_rate": 0.00040425728922436623, "loss": 2.0772, "step": 262200 }, { "epoch": 0.9980359766448695, "grad_norm": 0.12063222378492355, "learning_rate": 0.00040403993886343483, "loss": 2.0653, "step": 262210 }, { "epoch": 0.9980740391129922, "grad_norm": 0.13424226641654968, "learning_rate": 0.0004038230796916433, "loss": 2.0903, "step": 262220 }, { "epoch": 0.998112101581115, "grad_norm": 0.12373346090316772, "learning_rate": 0.0004036067083938583, "loss": 2.0967, "step": 262230 }, { "epoch": 0.9981501640492376, "grad_norm": 0.13985063135623932, "learning_rate": 0.0004033908216920704, "loss": 2.0968, "step": 262240 }, { "epoch": 0.9981882265173603, "grad_norm": 0.12289280444383621, "learning_rate": 0.00040317541634481456, "loss": 2.0731, "step": 262250 }, { "epoch": 0.9982262889854829, "grad_norm": 0.11922875046730042, "learning_rate": 0.00040296048914660246, "loss": 2.0713, "step": 262260 }, { "epoch": 0.9982643514536057, "grad_norm": 0.1151062399148941, "learning_rate": 0.0004027460369273656, "loss": 2.079, "step": 262270 }, { "epoch": 0.9983024139217284, "grad_norm": 0.13483203947544098, "learning_rate": 0.0004025320565519104, "loss": 2.0612, "step": 262280 }, { "epoch": 0.998340476389851, "grad_norm": 0.12044581025838852, "learning_rate": 0.00040231854491938265, "loss": 2.0863, "step": 262290 }, { "epoch": 0.9983785388579737, "grad_norm": 0.13184158504009247, "learning_rate": 0.00040210549896274394, "loss": 2.0738, "step": 262300 }, { "epoch": 0.9984166013260964, "grad_norm": 0.12094981968402863, "learning_rate": 0.0004018929156482571, "loss": 2.0625, "step": 262310 }, { "epoch": 0.9984546637942191, "grad_norm": 0.47059765458106995, "learning_rate": 0.0004016807919749825, "loss": 2.0765, "step": 262320 }, { "epoch": 0.9984927262623418, "grad_norm": 0.12888529896736145, "learning_rate": 0.0004014691249742837, "loss": 2.0696, "step": 262330 }, { "epoch": 0.9985307887304644, "grad_norm": 0.1215779036283493, "learning_rate": 0.0004012579117093425, "loss": 2.0789, "step": 262340 }, { "epoch": 0.9985688511985871, "grad_norm": 0.11985091120004654, "learning_rate": 0.000401047149274684, "loss": 2.0806, "step": 262350 }, { "epoch": 0.9986069136667098, "grad_norm": 0.13111738860607147, "learning_rate": 0.0004008368347957099, "loss": 2.0949, "step": 262360 }, { "epoch": 0.9986449761348325, "grad_norm": 0.12248831242322922, "learning_rate": 0.00040062696542824105, "loss": 2.0751, "step": 262370 }, { "epoch": 0.9986830386029552, "grad_norm": 0.13073061406612396, "learning_rate": 0.0004004175383580689, "loss": 2.0818, "step": 262380 }, { "epoch": 0.9987211010710778, "grad_norm": 0.11265187710523605, "learning_rate": 0.0004002085508005153, "loss": 2.0791, "step": 262390 }, { "epoch": 0.9987591635392006, "grad_norm": 0.11508375406265259, "learning_rate": 0.0004, "loss": 2.0742, "step": 262400 }, { "epoch": 0.9987972260073232, "grad_norm": 0.1140381395816803, "learning_rate": 0.00039979188322961725, "loss": 2.065, "step": 262410 }, { "epoch": 0.9988352884754459, "grad_norm": 0.1270381361246109, "learning_rate": 0.00039958419779071954, "loss": 2.0798, "step": 262420 }, { "epoch": 0.9988733509435685, "grad_norm": 0.12487317621707916, "learning_rate": 0.0003993769410125095, "loss": 2.0717, "step": 262430 }, { "epoch": 0.9989114134116913, "grad_norm": 0.12228159606456757, "learning_rate": 0.00039917011025163883, "loss": 2.0825, "step": 262440 }, { "epoch": 0.998949475879814, "grad_norm": 0.11730682849884033, "learning_rate": 0.0003989637028918155, "loss": 2.0792, "step": 262450 }, { "epoch": 0.9989875383479366, "grad_norm": 0.127616748213768, "learning_rate": 0.00039875771634341706, "loss": 2.0759, "step": 262460 }, { "epoch": 0.9990256008160593, "grad_norm": 0.12070652842521667, "learning_rate": 0.000398552148043112, "loss": 2.0684, "step": 262470 }, { "epoch": 0.999063663284182, "grad_norm": 0.12831643223762512, "learning_rate": 0.00039834699545348727, "loss": 2.0886, "step": 262480 }, { "epoch": 0.9991017257523047, "grad_norm": 0.13065242767333984, "learning_rate": 0.0003981422560626832, "loss": 2.0678, "step": 262490 }, { "epoch": 0.9991397882204274, "grad_norm": 0.12046606093645096, "learning_rate": 0.0003979379273840342, "loss": 2.0794, "step": 262500 }, { "epoch": 0.99917785068855, "grad_norm": 0.12492360919713974, "learning_rate": 0.00039773400695571705, "loss": 2.0612, "step": 262510 }, { "epoch": 0.9992159131566727, "grad_norm": 0.11388891935348511, "learning_rate": 0.000397530492340404, "loss": 2.0642, "step": 262520 }, { "epoch": 0.9992539756247955, "grad_norm": 0.11626210063695908, "learning_rate": 0.0003973273811249237, "loss": 2.0722, "step": 262530 }, { "epoch": 0.9992920380929181, "grad_norm": 0.12329540401697159, "learning_rate": 0.0003971246709199269, "loss": 2.0744, "step": 262540 }, { "epoch": 0.9993301005610408, "grad_norm": 0.1349315345287323, "learning_rate": 0.0003969223593595585, "loss": 2.0731, "step": 262550 }, { "epoch": 0.9993681630291634, "grad_norm": 0.1360684037208557, "learning_rate": 0.00039672044410113557, "loss": 2.0867, "step": 262560 }, { "epoch": 0.9994062254972862, "grad_norm": 0.1352628469467163, "learning_rate": 0.00039651892282483077, "loss": 2.0611, "step": 262570 }, { "epoch": 0.9994442879654089, "grad_norm": 0.13114619255065918, "learning_rate": 0.0003963177932333614, "loss": 2.0862, "step": 262580 }, { "epoch": 0.9994823504335315, "grad_norm": 0.11713041365146637, "learning_rate": 0.0003961170530516839, "loss": 2.0724, "step": 262590 }, { "epoch": 0.9995204129016542, "grad_norm": 0.11450465768575668, "learning_rate": 0.0003959167000266934, "loss": 2.0709, "step": 262600 }, { "epoch": 0.9995584753697769, "grad_norm": 0.12338771671056747, "learning_rate": 0.00039571673192692895, "loss": 2.0782, "step": 262610 }, { "epoch": 0.9995965378378996, "grad_norm": 0.13126511871814728, "learning_rate": 0.0003955171465422835, "loss": 2.0789, "step": 262620 }, { "epoch": 0.9996346003060222, "grad_norm": 0.12571443617343903, "learning_rate": 0.00039531794168371864, "loss": 2.087, "step": 262630 }, { "epoch": 0.9996726627741449, "grad_norm": 0.12128579616546631, "learning_rate": 0.0003951191151829848, "loss": 2.0793, "step": 262640 }, { "epoch": 0.9997107252422676, "grad_norm": 0.13705646991729736, "learning_rate": 0.0003949206648923459, "loss": 2.0662, "step": 262650 }, { "epoch": 0.9997487877103903, "grad_norm": 0.12217232584953308, "learning_rate": 0.00039472258868430835, "loss": 2.0781, "step": 262660 }, { "epoch": 0.999786850178513, "grad_norm": 0.11280722916126251, "learning_rate": 0.0003945248844513551, "loss": 2.0688, "step": 262670 }, { "epoch": 0.9998249126466356, "grad_norm": 0.11979371309280396, "learning_rate": 0.0003943275501056843, "loss": 2.0698, "step": 262680 }, { "epoch": 0.9998629751147583, "grad_norm": 0.13274313509464264, "learning_rate": 0.00039413058357895173, "loss": 2.0747, "step": 262690 }, { "epoch": 0.9999010375828811, "grad_norm": 0.13018228113651276, "learning_rate": 0.00039393398282201785, "loss": 2.0636, "step": 262700 }, { "epoch": 0.9999391000510037, "grad_norm": 0.12832440435886383, "learning_rate": 0.0003937377458046995, "loss": 2.0743, "step": 262710 }, { "epoch": 0.9999771625191264, "grad_norm": 0.11964524537324905, "learning_rate": 0.0003935418705155246, "loss": 2.0809, "step": 262720 }, { "epoch": 1.0000152249872492, "grad_norm": 0.12813405692577362, "learning_rate": 0.0003933463549614923, "loss": 2.0832, "step": 262730 }, { "epoch": 1.0000532874553718, "grad_norm": 0.122567318379879, "learning_rate": 0.000393151197167836, "loss": 2.0679, "step": 262740 }, { "epoch": 1.0000913499234945, "grad_norm": 0.12381791323423386, "learning_rate": 0.0003929563951777906, "loss": 2.0814, "step": 262750 }, { "epoch": 1.0001294123916171, "grad_norm": 0.12520988285541534, "learning_rate": 0.0003927619470523639, "loss": 2.0771, "step": 262760 }, { "epoch": 1.0001674748597398, "grad_norm": 0.12787935137748718, "learning_rate": 0.0003925678508701112, "loss": 2.0707, "step": 262770 }, { "epoch": 1.0002055373278624, "grad_norm": 0.12769973278045654, "learning_rate": 0.00039237410472691374, "loss": 2.074, "step": 262780 }, { "epoch": 1.000243599795985, "grad_norm": 0.13190263509750366, "learning_rate": 0.0003921807067357609, "loss": 2.0792, "step": 262790 }, { "epoch": 1.0002816622641078, "grad_norm": 0.11785663664340973, "learning_rate": 0.00039198765502653567, "loss": 2.0698, "step": 262800 }, { "epoch": 1.0003197247322306, "grad_norm": 0.13956955075263977, "learning_rate": 0.0003917949477458038, "loss": 2.0618, "step": 262810 }, { "epoch": 1.0003577872003533, "grad_norm": 0.1239355057477951, "learning_rate": 0.000391602583056606, "loss": 2.0765, "step": 262820 }, { "epoch": 1.000395849668476, "grad_norm": 0.11955326795578003, "learning_rate": 0.00039141055913825384, "loss": 2.0675, "step": 262830 }, { "epoch": 1.0004339121365986, "grad_norm": 0.10961072146892548, "learning_rate": 0.0003912188741861286, "loss": 2.0729, "step": 262840 }, { "epoch": 1.0004719746047213, "grad_norm": 0.131142720580101, "learning_rate": 0.0003910275264114832, "loss": 2.0704, "step": 262850 }, { "epoch": 1.000510037072844, "grad_norm": 0.12614300847053528, "learning_rate": 0.0003908365140412479, "loss": 2.0757, "step": 262860 }, { "epoch": 1.0005480995409666, "grad_norm": 0.13212086260318756, "learning_rate": 0.00039064583531783835, "loss": 2.077, "step": 262870 }, { "epoch": 1.0005861620090892, "grad_norm": 0.11505197733640671, "learning_rate": 0.00039045548849896676, "loss": 2.0816, "step": 262880 }, { "epoch": 1.000624224477212, "grad_norm": 0.11376156657934189, "learning_rate": 0.00039026547185745653, "loss": 2.0682, "step": 262890 }, { "epoch": 1.0006622869453348, "grad_norm": 0.14367571473121643, "learning_rate": 0.000390075783681059, "loss": 2.0899, "step": 262900 }, { "epoch": 1.0007003494134574, "grad_norm": 0.11860551685094833, "learning_rate": 0.0003898864222722738, "loss": 2.0721, "step": 262910 }, { "epoch": 1.00073841188158, "grad_norm": 0.12184567004442215, "learning_rate": 0.00038969738594817136, "loss": 2.0688, "step": 262920 }, { "epoch": 1.0007764743497027, "grad_norm": 0.12217500805854797, "learning_rate": 0.00038950867304021855, "loss": 2.0592, "step": 262930 }, { "epoch": 1.0008145368178254, "grad_norm": 0.11617803573608398, "learning_rate": 0.0003893202818941067, "loss": 2.0612, "step": 262940 }, { "epoch": 1.000852599285948, "grad_norm": 0.13129088282585144, "learning_rate": 0.00038913221086958274, "loss": 2.0635, "step": 262950 }, { "epoch": 1.0008906617540707, "grad_norm": 0.1188531294465065, "learning_rate": 0.00038894445834028213, "loss": 2.0795, "step": 262960 }, { "epoch": 1.0009287242221934, "grad_norm": 0.12933498620986938, "learning_rate": 0.00038875702269356506, "loss": 2.0678, "step": 262970 }, { "epoch": 1.0009667866903162, "grad_norm": 0.12279139459133148, "learning_rate": 0.0003885699023303548, "loss": 2.0711, "step": 262980 }, { "epoch": 1.001004849158439, "grad_norm": 0.12984012067317963, "learning_rate": 0.00038838309566497855, "loss": 2.0839, "step": 262990 }, { "epoch": 1.0010429116265616, "grad_norm": 0.12132301926612854, "learning_rate": 0.00038819660112501055, "loss": 2.0716, "step": 263000 }, { "epoch": 1.0010809740946842, "grad_norm": 0.2595154941082001, "learning_rate": 0.00038801041715111773, "loss": 2.0623, "step": 263010 }, { "epoch": 1.0011190365628069, "grad_norm": 0.12713661789894104, "learning_rate": 0.00038782454219690775, "loss": 2.0847, "step": 263020 }, { "epoch": 1.0011570990309295, "grad_norm": 0.12192989140748978, "learning_rate": 0.00038763897472877886, "loss": 2.0625, "step": 263030 }, { "epoch": 1.0011951614990522, "grad_norm": 0.11912639439105988, "learning_rate": 0.00038745371322577245, "loss": 2.0742, "step": 263040 }, { "epoch": 1.0012332239671748, "grad_norm": 0.11651023477315903, "learning_rate": 0.00038726875617942763, "loss": 2.0721, "step": 263050 }, { "epoch": 1.0012712864352975, "grad_norm": 0.12285809963941574, "learning_rate": 0.00038708410209363785, "loss": 2.0678, "step": 263060 }, { "epoch": 1.0013093489034204, "grad_norm": 0.14245565235614777, "learning_rate": 0.0003868997494845097, "loss": 2.0897, "step": 263070 }, { "epoch": 1.001347411371543, "grad_norm": 0.14112943410873413, "learning_rate": 0.0003867156968802238, "loss": 2.0726, "step": 263080 }, { "epoch": 1.0013854738396657, "grad_norm": 0.1136462390422821, "learning_rate": 0.0003865319428208978, "loss": 2.0654, "step": 263090 }, { "epoch": 1.0014235363077884, "grad_norm": 0.11797811836004257, "learning_rate": 0.00038634848585845124, "loss": 2.0743, "step": 263100 }, { "epoch": 1.001461598775911, "grad_norm": 0.12332088500261307, "learning_rate": 0.00038616532455647214, "loss": 2.0581, "step": 263110 }, { "epoch": 1.0014996612440337, "grad_norm": 0.12641309201717377, "learning_rate": 0.0003859824574900862, "loss": 2.0921, "step": 263120 }, { "epoch": 1.0015377237121563, "grad_norm": 0.12153881043195724, "learning_rate": 0.00038579988324582737, "loss": 2.0746, "step": 263130 }, { "epoch": 1.001575786180279, "grad_norm": 0.1199311763048172, "learning_rate": 0.0003856176004215101, "loss": 2.0535, "step": 263140 }, { "epoch": 1.0016138486484019, "grad_norm": 0.11625245958566666, "learning_rate": 0.000385435607626104, "loss": 2.074, "step": 263150 }, { "epoch": 1.0016519111165245, "grad_norm": 0.11807789653539658, "learning_rate": 0.00038525390347961, "loss": 2.0766, "step": 263160 }, { "epoch": 1.0016899735846472, "grad_norm": 0.11367712914943695, "learning_rate": 0.0003850724866129379, "loss": 2.0647, "step": 263170 }, { "epoch": 1.0017280360527698, "grad_norm": 0.12399783730506897, "learning_rate": 0.00038489135566778667, "loss": 2.0563, "step": 263180 }, { "epoch": 1.0017660985208925, "grad_norm": 0.11463885754346848, "learning_rate": 0.00038471050929652495, "loss": 2.0593, "step": 263190 }, { "epoch": 1.0018041609890151, "grad_norm": 0.1467050015926361, "learning_rate": 0.00038452994616207484, "loss": 2.0702, "step": 263200 }, { "epoch": 1.0018422234571378, "grad_norm": 0.12973052263259888, "learning_rate": 0.0003843496649377962, "loss": 2.0552, "step": 263210 }, { "epoch": 1.0018802859252605, "grad_norm": 0.1213889867067337, "learning_rate": 0.000384169664307373, "loss": 2.0743, "step": 263220 }, { "epoch": 1.0019183483933831, "grad_norm": 0.1346946805715561, "learning_rate": 0.00038398994296470095, "loss": 2.081, "step": 263230 }, { "epoch": 1.001956410861506, "grad_norm": 0.13012921810150146, "learning_rate": 0.0003838104996137775, "loss": 2.065, "step": 263240 }, { "epoch": 1.0019944733296287, "grad_norm": 0.11883129179477692, "learning_rate": 0.00038363133296859215, "loss": 2.0656, "step": 263250 }, { "epoch": 1.0020325357977513, "grad_norm": 0.14053906500339508, "learning_rate": 0.0003834524417530194, "loss": 2.0638, "step": 263260 }, { "epoch": 1.002070598265874, "grad_norm": 0.12595908343791962, "learning_rate": 0.0003832738247007125, "loss": 2.074, "step": 263270 }, { "epoch": 1.0021086607339966, "grad_norm": 0.11965731531381607, "learning_rate": 0.00038309548055499875, "loss": 2.0691, "step": 263280 }, { "epoch": 1.0021467232021193, "grad_norm": 0.11948921531438828, "learning_rate": 0.0003829174080687768, "loss": 2.0721, "step": 263290 }, { "epoch": 1.002184785670242, "grad_norm": 0.11565563082695007, "learning_rate": 0.0003827396060044143, "loss": 2.0749, "step": 263300 }, { "epoch": 1.0022228481383646, "grad_norm": 0.13267754018306732, "learning_rate": 0.0003825620731336479, "loss": 2.0906, "step": 263310 }, { "epoch": 1.0022609106064875, "grad_norm": 0.12567409873008728, "learning_rate": 0.00038238480823748436, "loss": 2.0804, "step": 263320 }, { "epoch": 1.0022989730746101, "grad_norm": 0.11261701583862305, "learning_rate": 0.00038220781010610256, "loss": 2.0572, "step": 263330 }, { "epoch": 1.0023370355427328, "grad_norm": 0.1253342479467392, "learning_rate": 0.00038203107753875744, "loss": 2.0854, "step": 263340 }, { "epoch": 1.0023750980108554, "grad_norm": 0.12061937898397446, "learning_rate": 0.00038185460934368477, "loss": 2.0691, "step": 263350 }, { "epoch": 1.002413160478978, "grad_norm": 0.12016843259334564, "learning_rate": 0.0003816784043380077, "loss": 2.069, "step": 263360 }, { "epoch": 1.0024512229471008, "grad_norm": 0.12179528176784515, "learning_rate": 0.00038150246134764377, "loss": 2.085, "step": 263370 }, { "epoch": 1.0024892854152234, "grad_norm": 0.11890345811843872, "learning_rate": 0.000381326779207214, "loss": 2.0874, "step": 263380 }, { "epoch": 1.002527347883346, "grad_norm": 0.139007106423378, "learning_rate": 0.0003811513567599529, "loss": 2.0827, "step": 263390 }, { "epoch": 1.0025654103514687, "grad_norm": 0.11820542812347412, "learning_rate": 0.0003809761928576192, "loss": 2.0754, "step": 263400 }, { "epoch": 1.0026034728195916, "grad_norm": 0.13151657581329346, "learning_rate": 0.00038080128636040853, "loss": 2.0838, "step": 263410 }, { "epoch": 1.0026415352877143, "grad_norm": 0.12178993970155716, "learning_rate": 0.00038062663613686677, "loss": 2.0577, "step": 263420 }, { "epoch": 1.002679597755837, "grad_norm": 0.12232571840286255, "learning_rate": 0.0003804522410638047, "loss": 2.0789, "step": 263430 }, { "epoch": 1.0027176602239596, "grad_norm": 0.13086816668510437, "learning_rate": 0.00038027810002621356, "loss": 2.0751, "step": 263440 }, { "epoch": 1.0027557226920822, "grad_norm": 0.12952834367752075, "learning_rate": 0.000380104211917182, "loss": 2.0804, "step": 263450 }, { "epoch": 1.002793785160205, "grad_norm": 0.1318865418434143, "learning_rate": 0.00037993057563781417, "loss": 2.0713, "step": 263460 }, { "epoch": 1.0028318476283276, "grad_norm": 0.1240028366446495, "learning_rate": 0.00037975719009714824, "loss": 2.0767, "step": 263470 }, { "epoch": 1.0028699100964502, "grad_norm": 0.11515330523252487, "learning_rate": 0.000379584054212077, "loss": 2.0785, "step": 263480 }, { "epoch": 1.0029079725645729, "grad_norm": 0.15111832320690155, "learning_rate": 0.00037941116690726846, "loss": 2.0584, "step": 263490 }, { "epoch": 1.0029460350326957, "grad_norm": 0.1152157112956047, "learning_rate": 0.000379238527115088, "loss": 2.0668, "step": 263500 }, { "epoch": 1.0029840975008184, "grad_norm": 0.12826646864414215, "learning_rate": 0.0003790661337755218, "loss": 2.0696, "step": 263510 }, { "epoch": 1.003022159968941, "grad_norm": 0.12478494644165039, "learning_rate": 0.00037889398583610035, "loss": 2.0901, "step": 263520 }, { "epoch": 1.0030602224370637, "grad_norm": 0.12773947417736053, "learning_rate": 0.000378722082251824, "loss": 2.0644, "step": 263530 }, { "epoch": 1.0030982849051864, "grad_norm": 0.11997218430042267, "learning_rate": 0.0003785504219850888, "loss": 2.0726, "step": 263540 }, { "epoch": 1.003136347373309, "grad_norm": 0.12420439720153809, "learning_rate": 0.00037837900400561315, "loss": 2.0677, "step": 263550 }, { "epoch": 1.0031744098414317, "grad_norm": 0.12196481972932816, "learning_rate": 0.00037820782729036597, "loss": 2.0567, "step": 263560 }, { "epoch": 1.0032124723095543, "grad_norm": 0.11405656486749649, "learning_rate": 0.00037803689082349537, "loss": 2.0673, "step": 263570 }, { "epoch": 1.0032505347776772, "grad_norm": 0.12951435148715973, "learning_rate": 0.0003778661935962583, "loss": 2.0634, "step": 263580 }, { "epoch": 1.0032885972457999, "grad_norm": 0.12632913887500763, "learning_rate": 0.0003776957346069511, "loss": 2.0728, "step": 263590 }, { "epoch": 1.0033266597139225, "grad_norm": 0.13721810281276703, "learning_rate": 0.0003775255128608411, "loss": 2.0819, "step": 263600 }, { "epoch": 1.0033647221820452, "grad_norm": 0.1207350492477417, "learning_rate": 0.0003773555273700985, "loss": 2.0599, "step": 263610 }, { "epoch": 1.0034027846501679, "grad_norm": 0.1209789514541626, "learning_rate": 0.00037718577715372976, "loss": 2.0713, "step": 263620 }, { "epoch": 1.0034408471182905, "grad_norm": 0.12487935274839401, "learning_rate": 0.0003770162612375116, "loss": 2.0793, "step": 263630 }, { "epoch": 1.0034789095864132, "grad_norm": 0.13212476670742035, "learning_rate": 0.0003768469786539256, "loss": 2.0634, "step": 263640 }, { "epoch": 1.0035169720545358, "grad_norm": 0.11858946830034256, "learning_rate": 0.00037667792844209383, "loss": 2.0764, "step": 263650 }, { "epoch": 1.0035550345226585, "grad_norm": 0.12068924307823181, "learning_rate": 0.00037650910964771536, "loss": 2.0705, "step": 263660 }, { "epoch": 1.0035930969907814, "grad_norm": 0.11545553803443909, "learning_rate": 0.00037634052132300307, "loss": 2.0713, "step": 263670 }, { "epoch": 1.003631159458904, "grad_norm": 0.11419456452131271, "learning_rate": 0.00037617216252662196, "loss": 2.0601, "step": 263680 }, { "epoch": 1.0036692219270267, "grad_norm": 0.12380310148000717, "learning_rate": 0.00037600403232362754, "loss": 2.0698, "step": 263690 }, { "epoch": 1.0037072843951493, "grad_norm": 0.12769466638565063, "learning_rate": 0.0003758361297854055, "loss": 2.0653, "step": 263700 }, { "epoch": 1.003745346863272, "grad_norm": 0.11498839408159256, "learning_rate": 0.0003756684539896116, "loss": 2.0702, "step": 263710 }, { "epoch": 1.0037834093313946, "grad_norm": 0.1188734918832779, "learning_rate": 0.0003755010040201127, "loss": 2.0584, "step": 263720 }, { "epoch": 1.0038214717995173, "grad_norm": 0.12653295695781708, "learning_rate": 0.0003753337789669284, "loss": 2.0629, "step": 263730 }, { "epoch": 1.00385953426764, "grad_norm": 0.11432266980409622, "learning_rate": 0.0003751667779261733, "loss": 2.0694, "step": 263740 }, { "epoch": 1.0038975967357628, "grad_norm": 0.13446052372455597, "learning_rate": 0.000375, "loss": 2.0673, "step": 263750 }, { "epoch": 1.0039356592038855, "grad_norm": 0.20220239460468292, "learning_rate": 0.00037483344429654273, "loss": 2.0727, "step": 263760 }, { "epoch": 1.0039737216720082, "grad_norm": 0.12345918267965317, "learning_rate": 0.00037466710992986184, "loss": 2.0663, "step": 263770 }, { "epoch": 1.0040117841401308, "grad_norm": 0.11362280696630478, "learning_rate": 0.0003745009960198887, "loss": 2.0641, "step": 263780 }, { "epoch": 1.0040498466082535, "grad_norm": 0.12851089239120483, "learning_rate": 0.00037433510169237127, "loss": 2.0735, "step": 263790 }, { "epoch": 1.0040879090763761, "grad_norm": 0.13473722338676453, "learning_rate": 0.00037416942607882087, "loss": 2.0698, "step": 263800 }, { "epoch": 1.0041259715444988, "grad_norm": 0.13479141891002655, "learning_rate": 0.00037400396831645846, "loss": 2.082, "step": 263810 }, { "epoch": 1.0041640340126214, "grad_norm": 0.11503574252128601, "learning_rate": 0.00037383872754816297, "loss": 2.0694, "step": 263820 }, { "epoch": 1.004202096480744, "grad_norm": 0.1254061758518219, "learning_rate": 0.00037367370292241866, "loss": 2.0849, "step": 263830 }, { "epoch": 1.004240158948867, "grad_norm": 0.12476842105388641, "learning_rate": 0.00037350889359326486, "loss": 2.0783, "step": 263840 }, { "epoch": 1.0042782214169896, "grad_norm": 0.12398224323987961, "learning_rate": 0.00037334429872024446, "loss": 2.0698, "step": 263850 }, { "epoch": 1.0043162838851123, "grad_norm": 0.10987059772014618, "learning_rate": 0.00037317991746835467, "loss": 2.0737, "step": 263860 }, { "epoch": 1.004354346353235, "grad_norm": 0.1181088387966156, "learning_rate": 0.00037301574900799704, "loss": 2.0674, "step": 263870 }, { "epoch": 1.0043924088213576, "grad_norm": 0.1426362544298172, "learning_rate": 0.000372851792514929, "loss": 2.0892, "step": 263880 }, { "epoch": 1.0044304712894803, "grad_norm": 0.12462259083986282, "learning_rate": 0.00037268804717021525, "loss": 2.0735, "step": 263890 }, { "epoch": 1.004468533757603, "grad_norm": 0.12202735990285873, "learning_rate": 0.00037252451216018037, "loss": 2.0563, "step": 263900 }, { "epoch": 1.0045065962257256, "grad_norm": 0.12221315503120422, "learning_rate": 0.0003723611866763614, "loss": 2.0689, "step": 263910 }, { "epoch": 1.0045446586938482, "grad_norm": 0.11692935228347778, "learning_rate": 0.00037219806991546124, "loss": 2.0791, "step": 263920 }, { "epoch": 1.0045827211619711, "grad_norm": 0.13614268600940704, "learning_rate": 0.0003720351610793027, "loss": 2.0521, "step": 263930 }, { "epoch": 1.0046207836300938, "grad_norm": 0.1747211217880249, "learning_rate": 0.0003718724593747829, "loss": 2.0717, "step": 263940 }, { "epoch": 1.0046588460982164, "grad_norm": 0.12883198261260986, "learning_rate": 0.0003717099640138279, "loss": 2.0805, "step": 263950 }, { "epoch": 1.004696908566339, "grad_norm": 0.11266852915287018, "learning_rate": 0.0003715476742133487, "loss": 2.0634, "step": 263960 }, { "epoch": 1.0047349710344617, "grad_norm": 0.11975182592868805, "learning_rate": 0.0003713855891951968, "loss": 2.0719, "step": 263970 }, { "epoch": 1.0047730335025844, "grad_norm": 0.13118694722652435, "learning_rate": 0.00037122370818612097, "loss": 2.0715, "step": 263980 }, { "epoch": 1.004811095970707, "grad_norm": 0.1224631518125534, "learning_rate": 0.00037106203041772375, "loss": 2.0583, "step": 263990 }, { "epoch": 1.0048491584388297, "grad_norm": 0.12530510127544403, "learning_rate": 0.00037090055512641946, "loss": 2.0691, "step": 264000 }, { "epoch": 1.0048872209069526, "grad_norm": 0.12490088492631912, "learning_rate": 0.00037073928155339174, "loss": 2.0633, "step": 264010 }, { "epoch": 1.0049252833750753, "grad_norm": 0.13902606070041656, "learning_rate": 0.0003705782089445522, "loss": 2.0725, "step": 264020 }, { "epoch": 1.004963345843198, "grad_norm": 0.12821468710899353, "learning_rate": 0.0003704173365504989, "loss": 2.0639, "step": 264030 }, { "epoch": 1.0050014083113206, "grad_norm": 0.11437909305095673, "learning_rate": 0.00037025666362647627, "loss": 2.0664, "step": 264040 }, { "epoch": 1.0050394707794432, "grad_norm": 0.13727909326553345, "learning_rate": 0.0003700961894323342, "loss": 2.0568, "step": 264050 }, { "epoch": 1.0050775332475659, "grad_norm": 0.11755349487066269, "learning_rate": 0.00036993591323248887, "loss": 2.0766, "step": 264060 }, { "epoch": 1.0051155957156885, "grad_norm": 0.11817418038845062, "learning_rate": 0.00036977583429588295, "loss": 2.084, "step": 264070 }, { "epoch": 1.0051536581838112, "grad_norm": 0.11482264846563339, "learning_rate": 0.000369615951895947, "loss": 2.0624, "step": 264080 }, { "epoch": 1.0051917206519339, "grad_norm": 0.11733140796422958, "learning_rate": 0.00036945626531056084, "loss": 2.0585, "step": 264090 }, { "epoch": 1.0052297831200567, "grad_norm": 0.12599030137062073, "learning_rate": 0.00036929677382201566, "loss": 2.0602, "step": 264100 }, { "epoch": 1.0052678455881794, "grad_norm": 0.12617869675159454, "learning_rate": 0.000369137476716976, "loss": 2.0603, "step": 264110 }, { "epoch": 1.005305908056302, "grad_norm": 0.1278885155916214, "learning_rate": 0.000368978373286443, "loss": 2.0668, "step": 264120 }, { "epoch": 1.0053439705244247, "grad_norm": 0.12624938786029816, "learning_rate": 0.0003688194628257174, "loss": 2.0669, "step": 264130 }, { "epoch": 1.0053820329925474, "grad_norm": 0.12271690368652344, "learning_rate": 0.00036866074463436305, "loss": 2.0706, "step": 264140 }, { "epoch": 1.00542009546067, "grad_norm": 0.11715830862522125, "learning_rate": 0.00036850221801617086, "loss": 2.0778, "step": 264150 }, { "epoch": 1.0054581579287927, "grad_norm": 0.11676019430160522, "learning_rate": 0.00036834388227912333, "loss": 2.0479, "step": 264160 }, { "epoch": 1.0054962203969153, "grad_norm": 0.13122425973415375, "learning_rate": 0.00036818573673535934, "loss": 2.0753, "step": 264170 }, { "epoch": 1.0055342828650382, "grad_norm": 0.12575967609882355, "learning_rate": 0.000368027780701139, "loss": 2.0607, "step": 264180 }, { "epoch": 1.0055723453331609, "grad_norm": 0.1159675344824791, "learning_rate": 0.00036787001349680937, "loss": 2.0737, "step": 264190 }, { "epoch": 1.0056104078012835, "grad_norm": 0.11694355309009552, "learning_rate": 0.0003677124344467705, "loss": 2.0636, "step": 264200 }, { "epoch": 1.0056484702694062, "grad_norm": 0.12231706827878952, "learning_rate": 0.0003675550428794412, "loss": 2.0793, "step": 264210 }, { "epoch": 1.0056865327375288, "grad_norm": 0.12539350986480713, "learning_rate": 0.0003673978381272261, "loss": 2.0681, "step": 264220 }, { "epoch": 1.0057245952056515, "grad_norm": 0.1346427947282791, "learning_rate": 0.0003672408195264825, "loss": 2.0652, "step": 264230 }, { "epoch": 1.0057626576737742, "grad_norm": 0.11903837323188782, "learning_rate": 0.00036708398641748744, "loss": 2.0749, "step": 264240 }, { "epoch": 1.0058007201418968, "grad_norm": 0.12069795280694962, "learning_rate": 0.0003669273381444057, "loss": 2.0741, "step": 264250 }, { "epoch": 1.0058387826100195, "grad_norm": 0.129713237285614, "learning_rate": 0.0003667708740552578, "loss": 2.0621, "step": 264260 }, { "epoch": 1.0058768450781423, "grad_norm": 0.11903487145900726, "learning_rate": 0.00036661459350188763, "loss": 2.0669, "step": 264270 }, { "epoch": 1.005914907546265, "grad_norm": 0.10933295637369156, "learning_rate": 0.00036645849583993254, "loss": 2.0737, "step": 264280 }, { "epoch": 1.0059529700143877, "grad_norm": 0.11205419898033142, "learning_rate": 0.00036630258042879063, "loss": 2.0696, "step": 264290 }, { "epoch": 1.0059910324825103, "grad_norm": 0.11133591085672379, "learning_rate": 0.0003661468466315916, "loss": 2.0711, "step": 264300 }, { "epoch": 1.006029094950633, "grad_norm": 0.12142396718263626, "learning_rate": 0.00036599129381516537, "loss": 2.0675, "step": 264310 }, { "epoch": 1.0060671574187556, "grad_norm": 0.13197079300880432, "learning_rate": 0.0003658359213500126, "loss": 2.0718, "step": 264320 }, { "epoch": 1.0061052198868783, "grad_norm": 0.1480017453432083, "learning_rate": 0.0003656807286102747, "loss": 2.0737, "step": 264330 }, { "epoch": 1.006143282355001, "grad_norm": 0.12461571395397186, "learning_rate": 0.00036552571497370465, "loss": 2.0583, "step": 264340 }, { "epoch": 1.0061813448231236, "grad_norm": 0.12747173011302948, "learning_rate": 0.0003653708798216374, "loss": 2.0611, "step": 264350 }, { "epoch": 1.0062194072912465, "grad_norm": 0.13145940005779266, "learning_rate": 0.0003652162225389618, "loss": 2.0746, "step": 264360 }, { "epoch": 1.0062574697593691, "grad_norm": 0.12994110584259033, "learning_rate": 0.0003650617425140915, "loss": 2.0586, "step": 264370 }, { "epoch": 1.0062955322274918, "grad_norm": 0.12092719972133636, "learning_rate": 0.0003649074391389371, "loss": 2.0539, "step": 264380 }, { "epoch": 1.0063335946956145, "grad_norm": 0.12784551084041595, "learning_rate": 0.0003647533118088777, "loss": 2.0727, "step": 264390 }, { "epoch": 1.006371657163737, "grad_norm": 0.12413742393255234, "learning_rate": 0.000364599359922734, "loss": 2.0712, "step": 264400 }, { "epoch": 1.0064097196318598, "grad_norm": 0.12628783285617828, "learning_rate": 0.0003644455828827404, "loss": 2.058, "step": 264410 }, { "epoch": 1.0064477820999824, "grad_norm": 0.11597968637943268, "learning_rate": 0.0003642919800945181, "loss": 2.0514, "step": 264420 }, { "epoch": 1.006485844568105, "grad_norm": 0.12907037138938904, "learning_rate": 0.0003641385509670483, "loss": 2.0661, "step": 264430 }, { "epoch": 1.006523907036228, "grad_norm": 0.12897427380084991, "learning_rate": 0.0003639852949126455, "loss": 2.0635, "step": 264440 }, { "epoch": 1.0065619695043506, "grad_norm": 0.1267291009426117, "learning_rate": 0.00036383221134693173, "loss": 2.0646, "step": 264450 }, { "epoch": 1.0066000319724733, "grad_norm": 0.12274293601512909, "learning_rate": 0.0003636792996888098, "loss": 2.0733, "step": 264460 }, { "epoch": 1.006638094440596, "grad_norm": 0.1234775260090828, "learning_rate": 0.0003635265593604382, "loss": 2.0722, "step": 264470 }, { "epoch": 1.0066761569087186, "grad_norm": 0.1398056596517563, "learning_rate": 0.0003633739897872054, "loss": 2.0682, "step": 264480 }, { "epoch": 1.0067142193768412, "grad_norm": 0.13379579782485962, "learning_rate": 0.00036322159039770446, "loss": 2.0686, "step": 264490 }, { "epoch": 1.006752281844964, "grad_norm": 0.11642500758171082, "learning_rate": 0.0003630693606237085, "loss": 2.0736, "step": 264500 }, { "epoch": 1.0067903443130866, "grad_norm": 0.12083044648170471, "learning_rate": 0.00036291729990014544, "loss": 2.0572, "step": 264510 }, { "epoch": 1.0068284067812092, "grad_norm": 0.12776164710521698, "learning_rate": 0.000362765407665074, "loss": 2.0651, "step": 264520 }, { "epoch": 1.006866469249332, "grad_norm": 0.1178087666630745, "learning_rate": 0.0003626136833596592, "loss": 2.0825, "step": 264530 }, { "epoch": 1.0069045317174548, "grad_norm": 0.14784130454063416, "learning_rate": 0.0003624621264281483, "loss": 2.0755, "step": 264540 }, { "epoch": 1.0069425941855774, "grad_norm": 0.1876007318496704, "learning_rate": 0.0003623107363178474, "loss": 2.0711, "step": 264550 }, { "epoch": 1.0069806566537, "grad_norm": 0.13013619184494019, "learning_rate": 0.00036215951247909776, "loss": 2.052, "step": 264560 }, { "epoch": 1.0070187191218227, "grad_norm": 0.1125163584947586, "learning_rate": 0.0003620084543652523, "loss": 2.057, "step": 264570 }, { "epoch": 1.0070567815899454, "grad_norm": 0.12135955691337585, "learning_rate": 0.0003618575614326527, "loss": 2.0687, "step": 264580 }, { "epoch": 1.007094844058068, "grad_norm": 0.1208009198307991, "learning_rate": 0.0003617068331406067, "loss": 2.0622, "step": 264590 }, { "epoch": 1.0071329065261907, "grad_norm": 0.11748412996530533, "learning_rate": 0.0003615562689513654, "loss": 2.0786, "step": 264600 }, { "epoch": 1.0071709689943136, "grad_norm": 0.140289306640625, "learning_rate": 0.00036140586833010094, "loss": 2.0799, "step": 264610 }, { "epoch": 1.0072090314624362, "grad_norm": 0.12493662536144257, "learning_rate": 0.00036125563074488394, "loss": 2.0628, "step": 264620 }, { "epoch": 1.007247093930559, "grad_norm": 0.11624481528997421, "learning_rate": 0.0003611055556666623, "loss": 2.0709, "step": 264630 }, { "epoch": 1.0072851563986815, "grad_norm": 0.11657288670539856, "learning_rate": 0.00036095564256923864, "loss": 2.0628, "step": 264640 }, { "epoch": 1.0073232188668042, "grad_norm": 0.1291431337594986, "learning_rate": 0.00036080589092924944, "loss": 2.0778, "step": 264650 }, { "epoch": 1.0073612813349269, "grad_norm": 0.11379469186067581, "learning_rate": 0.0003606563002261435, "loss": 2.0554, "step": 264660 }, { "epoch": 1.0073993438030495, "grad_norm": 0.12691283226013184, "learning_rate": 0.0003605068699421605, "loss": 2.0661, "step": 264670 }, { "epoch": 1.0074374062711722, "grad_norm": 0.12331678718328476, "learning_rate": 0.0003603575995623106, "loss": 2.0747, "step": 264680 }, { "epoch": 1.0074754687392948, "grad_norm": 0.11112986505031586, "learning_rate": 0.0003602084885743535, "loss": 2.0633, "step": 264690 }, { "epoch": 1.0075135312074177, "grad_norm": 0.11814521253108978, "learning_rate": 0.00036005953646877774, "loss": 2.0625, "step": 264700 }, { "epoch": 1.0075515936755404, "grad_norm": 0.12072381377220154, "learning_rate": 0.000359910742738781, "loss": 2.0715, "step": 264710 }, { "epoch": 1.007589656143663, "grad_norm": 0.11832285672426224, "learning_rate": 0.00035976210688024913, "loss": 2.0724, "step": 264720 }, { "epoch": 1.0076277186117857, "grad_norm": 0.12225547432899475, "learning_rate": 0.0003596136283917369, "loss": 2.0707, "step": 264730 }, { "epoch": 1.0076657810799083, "grad_norm": 0.12879660725593567, "learning_rate": 0.0003594653067744481, "loss": 2.0624, "step": 264740 }, { "epoch": 1.007703843548031, "grad_norm": 0.11869513988494873, "learning_rate": 0.0003593171415322156, "loss": 2.0778, "step": 264750 }, { "epoch": 1.0077419060161537, "grad_norm": 0.1178370863199234, "learning_rate": 0.00035916913217148264, "loss": 2.0569, "step": 264760 }, { "epoch": 1.0077799684842763, "grad_norm": 0.12137988209724426, "learning_rate": 0.00035902127820128314, "loss": 2.0602, "step": 264770 }, { "epoch": 1.007818030952399, "grad_norm": 0.1318652629852295, "learning_rate": 0.00035887357913322304, "loss": 2.0584, "step": 264780 }, { "epoch": 1.0078560934205218, "grad_norm": 0.1255052536725998, "learning_rate": 0.000358726034481461, "loss": 2.0753, "step": 264790 }, { "epoch": 1.0078941558886445, "grad_norm": 0.11904001235961914, "learning_rate": 0.00035857864376269053, "loss": 2.0701, "step": 264800 }, { "epoch": 1.0079322183567672, "grad_norm": 0.1347653716802597, "learning_rate": 0.0003584314064961205, "loss": 2.0608, "step": 264810 }, { "epoch": 1.0079702808248898, "grad_norm": 0.12691181898117065, "learning_rate": 0.0003582843222034579, "loss": 2.067, "step": 264820 }, { "epoch": 1.0080083432930125, "grad_norm": 0.11998774111270905, "learning_rate": 0.00035813739040888894, "loss": 2.0675, "step": 264830 }, { "epoch": 1.0080464057611351, "grad_norm": 0.1235920637845993, "learning_rate": 0.0003579906106390614, "loss": 2.0609, "step": 264840 }, { "epoch": 1.0080844682292578, "grad_norm": 0.16122466325759888, "learning_rate": 0.00035784398242306683, "loss": 2.0666, "step": 264850 }, { "epoch": 1.0081225306973804, "grad_norm": 0.12416666746139526, "learning_rate": 0.00035769750529242295, "loss": 2.0639, "step": 264860 }, { "epoch": 1.0081605931655033, "grad_norm": 0.12113657593727112, "learning_rate": 0.00035755117878105605, "loss": 2.0689, "step": 264870 }, { "epoch": 1.008198655633626, "grad_norm": 0.12859567999839783, "learning_rate": 0.00035740500242528375, "loss": 2.0651, "step": 264880 }, { "epoch": 1.0082367181017486, "grad_norm": 0.12713578343391418, "learning_rate": 0.00035725897576379805, "loss": 2.0582, "step": 264890 }, { "epoch": 1.0082747805698713, "grad_norm": 0.11723849922418594, "learning_rate": 0.0003571130983376479, "loss": 2.0713, "step": 264900 }, { "epoch": 1.008312843037994, "grad_norm": 0.12310074269771576, "learning_rate": 0.000356967369690223, "loss": 2.0527, "step": 264910 }, { "epoch": 1.0083509055061166, "grad_norm": 0.12018231302499771, "learning_rate": 0.00035682178936723646, "loss": 2.0476, "step": 264920 }, { "epoch": 1.0083889679742393, "grad_norm": 0.1254473626613617, "learning_rate": 0.00035667635691670875, "loss": 2.0584, "step": 264930 }, { "epoch": 1.008427030442362, "grad_norm": 0.1365075260400772, "learning_rate": 0.00035653107188895106, "loss": 2.0541, "step": 264940 }, { "epoch": 1.0084650929104846, "grad_norm": 0.12408968806266785, "learning_rate": 0.0003563859338365493, "loss": 2.0747, "step": 264950 }, { "epoch": 1.0085031553786075, "grad_norm": 0.1151510551571846, "learning_rate": 0.0003562409423143478, "loss": 2.0481, "step": 264960 }, { "epoch": 1.0085412178467301, "grad_norm": 0.12300057709217072, "learning_rate": 0.00035609609687943365, "loss": 2.06, "step": 264970 }, { "epoch": 1.0085792803148528, "grad_norm": 0.11640718579292297, "learning_rate": 0.0003559513970911207, "loss": 2.0609, "step": 264980 }, { "epoch": 1.0086173427829754, "grad_norm": 0.13439495861530304, "learning_rate": 0.00035580684251093375, "loss": 2.0642, "step": 264990 }, { "epoch": 1.008655405251098, "grad_norm": 0.13347728550434113, "learning_rate": 0.00035566243270259356, "loss": 2.0612, "step": 265000 }, { "epoch": 1.0086934677192207, "grad_norm": 0.12582042813301086, "learning_rate": 0.0003555181672320011, "loss": 2.0582, "step": 265010 }, { "epoch": 1.0087315301873434, "grad_norm": 0.1276628077030182, "learning_rate": 0.00035537404566722243, "loss": 2.0508, "step": 265020 }, { "epoch": 1.008769592655466, "grad_norm": 0.13432751595973969, "learning_rate": 0.0003552300675784735, "loss": 2.0717, "step": 265030 }, { "epoch": 1.008807655123589, "grad_norm": 0.11870171129703522, "learning_rate": 0.00035508623253810564, "loss": 2.0571, "step": 265040 }, { "epoch": 1.0088457175917116, "grad_norm": 0.12191476672887802, "learning_rate": 0.00035494254012058993, "loss": 2.0698, "step": 265050 }, { "epoch": 1.0088837800598343, "grad_norm": 0.12333951145410538, "learning_rate": 0.00035479898990250337, "loss": 2.0618, "step": 265060 }, { "epoch": 1.008921842527957, "grad_norm": 0.11955705285072327, "learning_rate": 0.00035465558146251366, "loss": 2.0592, "step": 265070 }, { "epoch": 1.0089599049960796, "grad_norm": 0.12724429368972778, "learning_rate": 0.00035451231438136537, "loss": 2.0613, "step": 265080 }, { "epoch": 1.0089979674642022, "grad_norm": 0.13674019277095795, "learning_rate": 0.000354369188241865, "loss": 2.0668, "step": 265090 }, { "epoch": 1.0090360299323249, "grad_norm": 0.12613962590694427, "learning_rate": 0.00035422620262886754, "loss": 2.0556, "step": 265100 }, { "epoch": 1.0090740924004475, "grad_norm": 0.12345962971448898, "learning_rate": 0.00035408335712926143, "loss": 2.0559, "step": 265110 }, { "epoch": 1.0091121548685702, "grad_norm": 0.11877019703388214, "learning_rate": 0.0003539406513319557, "loss": 2.0419, "step": 265120 }, { "epoch": 1.009150217336693, "grad_norm": 0.11650692671537399, "learning_rate": 0.0003537980848278656, "loss": 2.0547, "step": 265130 }, { "epoch": 1.0091882798048157, "grad_norm": 0.12157123535871506, "learning_rate": 0.0003536556572098987, "loss": 2.0465, "step": 265140 }, { "epoch": 1.0092263422729384, "grad_norm": 0.13357307016849518, "learning_rate": 0.0003535133680729421, "loss": 2.0592, "step": 265150 }, { "epoch": 1.009264404741061, "grad_norm": 0.12342700362205505, "learning_rate": 0.00035337121701384825, "loss": 2.0605, "step": 265160 }, { "epoch": 1.0093024672091837, "grad_norm": 0.1354524791240692, "learning_rate": 0.00035322920363142177, "loss": 2.0726, "step": 265170 }, { "epoch": 1.0093405296773064, "grad_norm": 0.13181686401367188, "learning_rate": 0.0003530873275264066, "loss": 2.0707, "step": 265180 }, { "epoch": 1.009378592145429, "grad_norm": 0.1218135878443718, "learning_rate": 0.0003529455883014726, "loss": 2.0449, "step": 265190 }, { "epoch": 1.0094166546135517, "grad_norm": 0.11534344404935837, "learning_rate": 0.00035280398556120253, "loss": 2.0425, "step": 265200 }, { "epoch": 1.0094547170816743, "grad_norm": 0.12393413484096527, "learning_rate": 0.0003526625189120795, "loss": 2.0799, "step": 265210 }, { "epoch": 1.0094927795497972, "grad_norm": 0.13263042271137238, "learning_rate": 0.0003525211879624738, "loss": 2.0631, "step": 265220 }, { "epoch": 1.0095308420179199, "grad_norm": 0.12168634682893753, "learning_rate": 0.0003523799923226304, "loss": 2.0658, "step": 265230 }, { "epoch": 1.0095689044860425, "grad_norm": 0.1166045144200325, "learning_rate": 0.0003522389316046567, "loss": 2.065, "step": 265240 }, { "epoch": 1.0096069669541652, "grad_norm": 0.12050196528434753, "learning_rate": 0.00035209800542250957, "loss": 2.0715, "step": 265250 }, { "epoch": 1.0096450294222878, "grad_norm": 0.13119080662727356, "learning_rate": 0.0003519572133919836, "loss": 2.0592, "step": 265260 }, { "epoch": 1.0096830918904105, "grad_norm": 0.13546597957611084, "learning_rate": 0.00035181655513069843, "loss": 2.0598, "step": 265270 }, { "epoch": 1.0097211543585332, "grad_norm": 0.12136995792388916, "learning_rate": 0.00035167603025808677, "loss": 2.0529, "step": 265280 }, { "epoch": 1.0097592168266558, "grad_norm": 0.13856296241283417, "learning_rate": 0.0003515356383953824, "loss": 2.0558, "step": 265290 }, { "epoch": 1.0097972792947787, "grad_norm": 0.12482566386461258, "learning_rate": 0.00035139537916560826, "loss": 2.0644, "step": 265300 }, { "epoch": 1.0098353417629014, "grad_norm": 0.12852446734905243, "learning_rate": 0.00035125525219356483, "loss": 2.0676, "step": 265310 }, { "epoch": 1.009873404231024, "grad_norm": 0.1314767599105835, "learning_rate": 0.00035111525710581807, "loss": 2.0602, "step": 265320 }, { "epoch": 1.0099114666991467, "grad_norm": 0.12297448515892029, "learning_rate": 0.0003509753935306879, "loss": 2.0586, "step": 265330 }, { "epoch": 1.0099495291672693, "grad_norm": 0.1343231052160263, "learning_rate": 0.00035083566109823705, "loss": 2.0591, "step": 265340 }, { "epoch": 1.009987591635392, "grad_norm": 0.11657698452472687, "learning_rate": 0.0003506960594402591, "loss": 2.0508, "step": 265350 }, { "epoch": 1.0100256541035146, "grad_norm": 0.13249176740646362, "learning_rate": 0.00035055658819026735, "loss": 2.0565, "step": 265360 }, { "epoch": 1.0100637165716373, "grad_norm": 0.1254306137561798, "learning_rate": 0.00035041724698348407, "loss": 2.0648, "step": 265370 }, { "epoch": 1.01010177903976, "grad_norm": 0.12727105617523193, "learning_rate": 0.00035027803545682864, "loss": 2.054, "step": 265380 }, { "epoch": 1.0101398415078828, "grad_norm": 0.11955145001411438, "learning_rate": 0.00035013895324890684, "loss": 2.0738, "step": 265390 }, { "epoch": 1.0101779039760055, "grad_norm": 0.11762211471796036, "learning_rate": 0.00035, "loss": 2.0612, "step": 265400 }, { "epoch": 1.0102159664441281, "grad_norm": 0.13332591950893402, "learning_rate": 0.00034986117535205403, "loss": 2.0627, "step": 265410 }, { "epoch": 1.0102540289122508, "grad_norm": 0.1313796192407608, "learning_rate": 0.0003497224789486687, "loss": 2.0571, "step": 265420 }, { "epoch": 1.0102920913803735, "grad_norm": 0.11726871132850647, "learning_rate": 0.00034958391043508674, "loss": 2.069, "step": 265430 }, { "epoch": 1.0103301538484961, "grad_norm": 0.1298063099384308, "learning_rate": 0.0003494454694581838, "loss": 2.0624, "step": 265440 }, { "epoch": 1.0103682163166188, "grad_norm": 0.12552876770496368, "learning_rate": 0.0003493071556664573, "loss": 2.0478, "step": 265450 }, { "epoch": 1.0104062787847414, "grad_norm": 0.12347956746816635, "learning_rate": 0.00034916896871001646, "loss": 2.0487, "step": 265460 }, { "epoch": 1.0104443412528643, "grad_norm": 0.11969196796417236, "learning_rate": 0.0003490309082405718, "loss": 2.0575, "step": 265470 }, { "epoch": 1.010482403720987, "grad_norm": 0.12199029326438904, "learning_rate": 0.0003488929739114248, "loss": 2.0509, "step": 265480 }, { "epoch": 1.0105204661891096, "grad_norm": 0.12357623130083084, "learning_rate": 0.0003487551653774583, "loss": 2.0611, "step": 265490 }, { "epoch": 1.0105585286572323, "grad_norm": 0.1269357055425644, "learning_rate": 0.00034861748229512545, "loss": 2.0546, "step": 265500 }, { "epoch": 1.010596591125355, "grad_norm": 0.12839782238006592, "learning_rate": 0.0003484799243224406, "loss": 2.0637, "step": 265510 }, { "epoch": 1.0106346535934776, "grad_norm": 0.12106683105230331, "learning_rate": 0.00034834249111896897, "loss": 2.0545, "step": 265520 }, { "epoch": 1.0106727160616003, "grad_norm": 0.11588679999113083, "learning_rate": 0.00034820518234581697, "loss": 2.0674, "step": 265530 }, { "epoch": 1.010710778529723, "grad_norm": 0.12074842303991318, "learning_rate": 0.00034806799766562234, "loss": 2.0695, "step": 265540 }, { "epoch": 1.0107488409978456, "grad_norm": 0.12576799094676971, "learning_rate": 0.0003479309367425445, "loss": 2.0669, "step": 265550 }, { "epoch": 1.0107869034659684, "grad_norm": 0.12758374214172363, "learning_rate": 0.000347793999242255, "loss": 2.0525, "step": 265560 }, { "epoch": 1.010824965934091, "grad_norm": 0.13564392924308777, "learning_rate": 0.00034765718483192805, "loss": 2.0669, "step": 265570 }, { "epoch": 1.0108630284022138, "grad_norm": 0.12913645803928375, "learning_rate": 0.00034752049318023095, "loss": 2.0469, "step": 265580 }, { "epoch": 1.0109010908703364, "grad_norm": 0.1334034502506256, "learning_rate": 0.00034738392395731484, "loss": 2.0645, "step": 265590 }, { "epoch": 1.010939153338459, "grad_norm": 0.12395340949296951, "learning_rate": 0.00034724747683480533, "loss": 2.059, "step": 265600 }, { "epoch": 1.0109772158065817, "grad_norm": 0.1262947916984558, "learning_rate": 0.0003471111514857934, "loss": 2.0786, "step": 265610 }, { "epoch": 1.0110152782747044, "grad_norm": 0.11491633951663971, "learning_rate": 0.0003469749475848263, "loss": 2.0532, "step": 265620 }, { "epoch": 1.011053340742827, "grad_norm": 0.12061280012130737, "learning_rate": 0.00034683886480789807, "loss": 2.0567, "step": 265630 }, { "epoch": 1.0110914032109497, "grad_norm": 0.11542093008756638, "learning_rate": 0.0003467029028324411, "loss": 2.0645, "step": 265640 }, { "epoch": 1.0111294656790726, "grad_norm": 0.11799933016300201, "learning_rate": 0.00034656706133731694, "loss": 2.0585, "step": 265650 }, { "epoch": 1.0111675281471952, "grad_norm": 0.13193176686763763, "learning_rate": 0.0003464313400028074, "loss": 2.0518, "step": 265660 }, { "epoch": 1.011205590615318, "grad_norm": 0.12344823032617569, "learning_rate": 0.000346295738510606, "loss": 2.0678, "step": 265670 }, { "epoch": 1.0112436530834406, "grad_norm": 0.1263018101453781, "learning_rate": 0.000346160256543809, "loss": 2.0649, "step": 265680 }, { "epoch": 1.0112817155515632, "grad_norm": 0.12341112643480301, "learning_rate": 0.00034602489378690685, "loss": 2.0566, "step": 265690 }, { "epoch": 1.0113197780196859, "grad_norm": 0.1236085444688797, "learning_rate": 0.0003458896499257756, "loss": 2.0558, "step": 265700 }, { "epoch": 1.0113578404878085, "grad_norm": 0.13375934958457947, "learning_rate": 0.00034575452464766866, "loss": 2.0586, "step": 265710 }, { "epoch": 1.0113959029559312, "grad_norm": 0.11553890258073807, "learning_rate": 0.00034561951764120786, "loss": 2.042, "step": 265720 }, { "epoch": 1.011433965424054, "grad_norm": 0.15427514910697937, "learning_rate": 0.0003454846285963756, "loss": 2.0556, "step": 265730 }, { "epoch": 1.0114720278921767, "grad_norm": 0.12771137058734894, "learning_rate": 0.00034534985720450606, "loss": 2.0808, "step": 265740 }, { "epoch": 1.0115100903602994, "grad_norm": 0.12867282330989838, "learning_rate": 0.0003452152031582774, "loss": 2.0604, "step": 265750 }, { "epoch": 1.011548152828422, "grad_norm": 0.12850438058376312, "learning_rate": 0.0003450806661517033, "loss": 2.0593, "step": 265760 }, { "epoch": 1.0115862152965447, "grad_norm": 0.12304188311100006, "learning_rate": 0.00034494624588012496, "loss": 2.0701, "step": 265770 }, { "epoch": 1.0116242777646673, "grad_norm": 0.1225685402750969, "learning_rate": 0.00034481194204020294, "loss": 2.0595, "step": 265780 }, { "epoch": 1.01166234023279, "grad_norm": 0.12346023321151733, "learning_rate": 0.00034467775432990935, "loss": 2.0654, "step": 265790 }, { "epoch": 1.0117004027009127, "grad_norm": 0.12504765391349792, "learning_rate": 0.00034454368244851973, "loss": 2.0584, "step": 265800 }, { "epoch": 1.0117384651690353, "grad_norm": 0.1463267207145691, "learning_rate": 0.0003444097260966055, "loss": 2.0563, "step": 265810 }, { "epoch": 1.0117765276371582, "grad_norm": 0.13947755098342896, "learning_rate": 0.00034427588497602563, "loss": 2.0643, "step": 265820 }, { "epoch": 1.0118145901052809, "grad_norm": 0.1254192292690277, "learning_rate": 0.0003441421587899195, "loss": 2.0549, "step": 265830 }, { "epoch": 1.0118526525734035, "grad_norm": 0.12350041419267654, "learning_rate": 0.00034400854724269883, "loss": 2.0662, "step": 265840 }, { "epoch": 1.0118907150415262, "grad_norm": 0.1425914466381073, "learning_rate": 0.00034387505004004003, "loss": 2.063, "step": 265850 }, { "epoch": 1.0119287775096488, "grad_norm": 0.12194148451089859, "learning_rate": 0.000343741666888877, "loss": 2.0575, "step": 265860 }, { "epoch": 1.0119668399777715, "grad_norm": 0.1255568414926529, "learning_rate": 0.00034360839749739337, "loss": 2.0679, "step": 265870 }, { "epoch": 1.0120049024458941, "grad_norm": 0.12367769330739975, "learning_rate": 0.00034347524157501475, "loss": 2.0739, "step": 265880 }, { "epoch": 1.0120429649140168, "grad_norm": 0.11774411052465439, "learning_rate": 0.00034334219883240205, "loss": 2.0546, "step": 265890 }, { "epoch": 1.0120810273821397, "grad_norm": 0.1194257140159607, "learning_rate": 0.00034320926898144354, "loss": 2.0595, "step": 265900 }, { "epoch": 1.0121190898502623, "grad_norm": 0.11727624386548996, "learning_rate": 0.00034307645173524783, "loss": 2.073, "step": 265910 }, { "epoch": 1.012157152318385, "grad_norm": 0.23917624354362488, "learning_rate": 0.0003429437468081367, "loss": 2.0695, "step": 265920 }, { "epoch": 1.0121952147865076, "grad_norm": 0.11985159665346146, "learning_rate": 0.00034281115391563777, "loss": 2.0403, "step": 265930 }, { "epoch": 1.0122332772546303, "grad_norm": 0.11827631294727325, "learning_rate": 0.0003426786727744773, "loss": 2.0505, "step": 265940 }, { "epoch": 1.012271339722753, "grad_norm": 0.1171039417386055, "learning_rate": 0.0003425463031025735, "loss": 2.0589, "step": 265950 }, { "epoch": 1.0123094021908756, "grad_norm": 0.12298347800970078, "learning_rate": 0.00034241404461902915, "loss": 2.0669, "step": 265960 }, { "epoch": 1.0123474646589983, "grad_norm": 0.12044963985681534, "learning_rate": 0.00034228189704412496, "loss": 2.0533, "step": 265970 }, { "epoch": 1.012385527127121, "grad_norm": 0.12134046852588654, "learning_rate": 0.0003421498600993124, "loss": 2.0674, "step": 265980 }, { "epoch": 1.0124235895952438, "grad_norm": 0.11874759942293167, "learning_rate": 0.00034201793350720677, "loss": 2.0597, "step": 265990 }, { "epoch": 1.0124616520633665, "grad_norm": 0.1224139928817749, "learning_rate": 0.000341886116991581, "loss": 2.0592, "step": 266000 }, { "epoch": 1.0124997145314891, "grad_norm": 0.1219983920454979, "learning_rate": 0.0003417544102773582, "loss": 2.0589, "step": 266010 }, { "epoch": 1.0125377769996118, "grad_norm": 0.14656014740467072, "learning_rate": 0.00034162281309060535, "loss": 2.0629, "step": 266020 }, { "epoch": 1.0125758394677344, "grad_norm": 0.13034473359584808, "learning_rate": 0.00034149132515852643, "loss": 2.0674, "step": 266030 }, { "epoch": 1.012613901935857, "grad_norm": 0.12056338042020798, "learning_rate": 0.0003413599462094561, "loss": 2.0456, "step": 266040 }, { "epoch": 1.0126519644039798, "grad_norm": 0.12062523514032364, "learning_rate": 0.0003412286759728529, "loss": 2.0658, "step": 266050 }, { "epoch": 1.0126900268721024, "grad_norm": 0.11885876953601837, "learning_rate": 0.00034109751417929295, "loss": 2.0566, "step": 266060 }, { "epoch": 1.012728089340225, "grad_norm": 0.12024195492267609, "learning_rate": 0.0003409664605604633, "loss": 2.0683, "step": 266070 }, { "epoch": 1.012766151808348, "grad_norm": 0.12108021229505539, "learning_rate": 0.0003408355148491557, "loss": 2.0484, "step": 266080 }, { "epoch": 1.0128042142764706, "grad_norm": 0.11807679384946823, "learning_rate": 0.00034070467677926007, "loss": 2.0572, "step": 266090 }, { "epoch": 1.0128422767445933, "grad_norm": 0.11726764589548111, "learning_rate": 0.00034057394608575845, "loss": 2.0616, "step": 266100 }, { "epoch": 1.012880339212716, "grad_norm": 0.12831568717956543, "learning_rate": 0.0003404433225047183, "loss": 2.0764, "step": 266110 }, { "epoch": 1.0129184016808386, "grad_norm": 0.12672363221645355, "learning_rate": 0.00034031280577328686, "loss": 2.051, "step": 266120 }, { "epoch": 1.0129564641489612, "grad_norm": 0.15040802955627441, "learning_rate": 0.0003401823956296846, "loss": 2.0577, "step": 266130 }, { "epoch": 1.012994526617084, "grad_norm": 0.1106763556599617, "learning_rate": 0.00034005209181319895, "loss": 2.0587, "step": 266140 }, { "epoch": 1.0130325890852065, "grad_norm": 0.12348024547100067, "learning_rate": 0.0003399218940641788, "loss": 2.0643, "step": 266150 }, { "epoch": 1.0130706515533294, "grad_norm": 0.14651146531105042, "learning_rate": 0.00033979180212402783, "loss": 2.0612, "step": 266160 }, { "epoch": 1.013108714021452, "grad_norm": 0.11821554601192474, "learning_rate": 0.00033966181573519884, "loss": 2.0595, "step": 266170 }, { "epoch": 1.0131467764895747, "grad_norm": 0.12168850004673004, "learning_rate": 0.0003395319346411879, "loss": 2.0767, "step": 266180 }, { "epoch": 1.0131848389576974, "grad_norm": 0.1250876486301422, "learning_rate": 0.0003394021585865281, "loss": 2.0556, "step": 266190 }, { "epoch": 1.01322290142582, "grad_norm": 0.12602892518043518, "learning_rate": 0.00033927248731678404, "loss": 2.0792, "step": 266200 }, { "epoch": 1.0132609638939427, "grad_norm": 0.11486831307411194, "learning_rate": 0.0003391429205785459, "loss": 2.0466, "step": 266210 }, { "epoch": 1.0132990263620654, "grad_norm": 0.11743859201669693, "learning_rate": 0.0003390134581194233, "loss": 2.0593, "step": 266220 }, { "epoch": 1.013337088830188, "grad_norm": 0.12538020312786102, "learning_rate": 0.00033888409968804034, "loss": 2.0717, "step": 266230 }, { "epoch": 1.0133751512983107, "grad_norm": 0.12342677265405655, "learning_rate": 0.000338754845034029, "loss": 2.0695, "step": 266240 }, { "epoch": 1.0134132137664336, "grad_norm": 0.12480781972408295, "learning_rate": 0.00033862569390802436, "loss": 2.0609, "step": 266250 }, { "epoch": 1.0134512762345562, "grad_norm": 0.1464034616947174, "learning_rate": 0.0003384966460616581, "loss": 2.0619, "step": 266260 }, { "epoch": 1.0134893387026789, "grad_norm": 0.12034554779529572, "learning_rate": 0.0003383677012475539, "loss": 2.0665, "step": 266270 }, { "epoch": 1.0135274011708015, "grad_norm": 0.12253714352846146, "learning_rate": 0.00033823885921932095, "loss": 2.0644, "step": 266280 }, { "epoch": 1.0135654636389242, "grad_norm": 0.14051516354084015, "learning_rate": 0.0003381101197315492, "loss": 2.0513, "step": 266290 }, { "epoch": 1.0136035261070468, "grad_norm": 0.12799084186553955, "learning_rate": 0.0003379814825398035, "loss": 2.0541, "step": 266300 }, { "epoch": 1.0136415885751695, "grad_norm": 0.12607944011688232, "learning_rate": 0.0003378529474006183, "loss": 2.057, "step": 266310 }, { "epoch": 1.0136796510432922, "grad_norm": 0.12739719450473785, "learning_rate": 0.0003377245140714922, "loss": 2.046, "step": 266320 }, { "epoch": 1.013717713511415, "grad_norm": 0.13314710557460785, "learning_rate": 0.0003375961823108829, "loss": 2.0547, "step": 266330 }, { "epoch": 1.0137557759795377, "grad_norm": 0.13084451854228973, "learning_rate": 0.00033746795187820136, "loss": 2.0629, "step": 266340 }, { "epoch": 1.0137938384476604, "grad_norm": 0.12994296848773956, "learning_rate": 0.00033733982253380726, "loss": 2.0678, "step": 266350 }, { "epoch": 1.013831900915783, "grad_norm": 0.12707634270191193, "learning_rate": 0.0003372117940390029, "loss": 2.0555, "step": 266360 }, { "epoch": 1.0138699633839057, "grad_norm": 0.1259993463754654, "learning_rate": 0.000337083866156029, "loss": 2.0404, "step": 266370 }, { "epoch": 1.0139080258520283, "grad_norm": 0.15101008117198944, "learning_rate": 0.0003369560386480587, "loss": 2.0687, "step": 266380 }, { "epoch": 1.013946088320151, "grad_norm": 0.12252911180257797, "learning_rate": 0.0003368283112791928, "loss": 2.0619, "step": 266390 }, { "epoch": 1.0139841507882736, "grad_norm": 0.11849822103977203, "learning_rate": 0.00033670068381445477, "loss": 2.0519, "step": 266400 }, { "epoch": 1.0140222132563963, "grad_norm": 0.11857569962739944, "learning_rate": 0.0003365731560197857, "loss": 2.0449, "step": 266410 }, { "epoch": 1.0140602757245192, "grad_norm": 0.12244977056980133, "learning_rate": 0.00033644572766203875, "loss": 2.0471, "step": 266420 }, { "epoch": 1.0140983381926418, "grad_norm": 0.128976970911026, "learning_rate": 0.00033631839850897517, "loss": 2.0452, "step": 266430 }, { "epoch": 1.0141364006607645, "grad_norm": 0.11401844769716263, "learning_rate": 0.00033619116832925847, "loss": 2.0556, "step": 266440 }, { "epoch": 1.0141744631288871, "grad_norm": 0.12518161535263062, "learning_rate": 0.00033606403689245005, "loss": 2.061, "step": 266450 }, { "epoch": 1.0142125255970098, "grad_norm": 0.12827928364276886, "learning_rate": 0.00033593700396900386, "loss": 2.0657, "step": 266460 }, { "epoch": 1.0142505880651325, "grad_norm": 0.12486431747674942, "learning_rate": 0.00033581006933026215, "loss": 2.0553, "step": 266470 }, { "epoch": 1.0142886505332551, "grad_norm": 0.13143974542617798, "learning_rate": 0.00033568323274845016, "loss": 2.0454, "step": 266480 }, { "epoch": 1.0143267130013778, "grad_norm": 0.133738175034523, "learning_rate": 0.0003355564939966718, "loss": 2.0383, "step": 266490 }, { "epoch": 1.0143647754695004, "grad_norm": 0.12220674008131027, "learning_rate": 0.00033542985284890417, "loss": 2.0568, "step": 266500 }, { "epoch": 1.0144028379376233, "grad_norm": 0.12594294548034668, "learning_rate": 0.000335303309079994, "loss": 2.0652, "step": 266510 }, { "epoch": 1.014440900405746, "grad_norm": 0.1356634497642517, "learning_rate": 0.0003351768624656518, "loss": 2.0414, "step": 266520 }, { "epoch": 1.0144789628738686, "grad_norm": 0.11889128386974335, "learning_rate": 0.000335050512782448, "loss": 2.0487, "step": 266530 }, { "epoch": 1.0145170253419913, "grad_norm": 0.13497433066368103, "learning_rate": 0.0003349242598078082, "loss": 2.0658, "step": 266540 }, { "epoch": 1.014555087810114, "grad_norm": 0.11696867644786835, "learning_rate": 0.0003347981033200083, "loss": 2.0619, "step": 266550 }, { "epoch": 1.0145931502782366, "grad_norm": 0.11523982882499695, "learning_rate": 0.00033467204309817005, "loss": 2.0731, "step": 266560 }, { "epoch": 1.0146312127463593, "grad_norm": 0.1279909610748291, "learning_rate": 0.00033454607892225704, "loss": 2.0666, "step": 266570 }, { "epoch": 1.014669275214482, "grad_norm": 0.11702031642198563, "learning_rate": 0.0003344202105730695, "loss": 2.0524, "step": 266580 }, { "epoch": 1.0147073376826048, "grad_norm": 0.12712937593460083, "learning_rate": 0.00033429443783224014, "loss": 2.0589, "step": 266590 }, { "epoch": 1.0147454001507275, "grad_norm": 0.12439191341400146, "learning_rate": 0.00033416876048223, "loss": 2.047, "step": 266600 }, { "epoch": 1.01478346261885, "grad_norm": 0.1319584995508194, "learning_rate": 0.0003340431783063237, "loss": 2.0527, "step": 266610 }, { "epoch": 1.0148215250869728, "grad_norm": 0.1275530457496643, "learning_rate": 0.00033391769108862516, "loss": 2.066, "step": 266620 }, { "epoch": 1.0148595875550954, "grad_norm": 0.11523158103227615, "learning_rate": 0.00033379229861405337, "loss": 2.0584, "step": 266630 }, { "epoch": 1.014897650023218, "grad_norm": 0.1223595067858696, "learning_rate": 0.000333667000668338, "loss": 2.0693, "step": 266640 }, { "epoch": 1.0149357124913407, "grad_norm": 0.12558513879776, "learning_rate": 0.0003335417970380152, "loss": 2.0495, "step": 266650 }, { "epoch": 1.0149737749594634, "grad_norm": 0.14692170917987823, "learning_rate": 0.0003334166875104232, "loss": 2.0554, "step": 266660 }, { "epoch": 1.015011837427586, "grad_norm": 0.12919218838214874, "learning_rate": 0.0003332916718736984, "loss": 2.0621, "step": 266670 }, { "epoch": 1.015049899895709, "grad_norm": 0.14028730988502502, "learning_rate": 0.00033316674991677064, "loss": 2.0557, "step": 266680 }, { "epoch": 1.0150879623638316, "grad_norm": 0.1568068265914917, "learning_rate": 0.00033304192142936, "loss": 2.0638, "step": 266690 }, { "epoch": 1.0151260248319542, "grad_norm": 0.13906286656856537, "learning_rate": 0.00033291718620197155, "loss": 2.0654, "step": 266700 }, { "epoch": 1.015164087300077, "grad_norm": 0.1218264028429985, "learning_rate": 0.00033279254402589177, "loss": 2.0528, "step": 266710 }, { "epoch": 1.0152021497681996, "grad_norm": 0.1341378092765808, "learning_rate": 0.0003326679946931849, "loss": 2.064, "step": 266720 }, { "epoch": 1.0152402122363222, "grad_norm": 0.13618212938308716, "learning_rate": 0.00033254353799668803, "loss": 2.0526, "step": 266730 }, { "epoch": 1.0152782747044449, "grad_norm": 0.1174771636724472, "learning_rate": 0.00033241917373000773, "loss": 2.068, "step": 266740 }, { "epoch": 1.0153163371725675, "grad_norm": 0.12650097906589508, "learning_rate": 0.00033229490168751574, "loss": 2.0551, "step": 266750 }, { "epoch": 1.0153543996406904, "grad_norm": 0.12127122282981873, "learning_rate": 0.00033217072166434527, "loss": 2.0677, "step": 266760 }, { "epoch": 1.015392462108813, "grad_norm": 0.12797364592552185, "learning_rate": 0.0003320466334563867, "loss": 2.0677, "step": 266770 }, { "epoch": 1.0154305245769357, "grad_norm": 0.12370993942022324, "learning_rate": 0.0003319226368602839, "loss": 2.0511, "step": 266780 }, { "epoch": 1.0154685870450584, "grad_norm": 0.1199609562754631, "learning_rate": 0.00033179873167343046, "loss": 2.058, "step": 266790 }, { "epoch": 1.015506649513181, "grad_norm": 0.12218517065048218, "learning_rate": 0.0003316749176939654, "loss": 2.0799, "step": 266800 }, { "epoch": 1.0155447119813037, "grad_norm": 0.12803561985492706, "learning_rate": 0.0003315511947207698, "loss": 2.0366, "step": 266810 }, { "epoch": 1.0155827744494264, "grad_norm": 0.1239086464047432, "learning_rate": 0.00033142756255346293, "loss": 2.0491, "step": 266820 }, { "epoch": 1.015620836917549, "grad_norm": 0.11244252324104309, "learning_rate": 0.00033130402099239785, "loss": 2.0671, "step": 266830 }, { "epoch": 1.0156588993856717, "grad_norm": 0.13067172467708588, "learning_rate": 0.00033118056983865866, "loss": 2.0574, "step": 266840 }, { "epoch": 1.0156969618537945, "grad_norm": 0.1313253790140152, "learning_rate": 0.00033105720889405586, "loss": 2.0725, "step": 266850 }, { "epoch": 1.0157350243219172, "grad_norm": 0.12428645044565201, "learning_rate": 0.00033093393796112326, "loss": 2.0476, "step": 266860 }, { "epoch": 1.0157730867900399, "grad_norm": 0.1148483082652092, "learning_rate": 0.0003308107568431137, "loss": 2.056, "step": 266870 }, { "epoch": 1.0158111492581625, "grad_norm": 0.11680980026721954, "learning_rate": 0.00033068766534399607, "loss": 2.0547, "step": 266880 }, { "epoch": 1.0158492117262852, "grad_norm": 0.13104647397994995, "learning_rate": 0.0003305646632684512, "loss": 2.0578, "step": 266890 }, { "epoch": 1.0158872741944078, "grad_norm": 0.1296451985836029, "learning_rate": 0.0003304417504218683, "loss": 2.0579, "step": 266900 }, { "epoch": 1.0159253366625305, "grad_norm": 0.13351930677890778, "learning_rate": 0.00033031892661034153, "loss": 2.0502, "step": 266910 }, { "epoch": 1.0159633991306531, "grad_norm": 0.12565377354621887, "learning_rate": 0.0003301961916406663, "loss": 2.0472, "step": 266920 }, { "epoch": 1.0160014615987758, "grad_norm": 0.12520720064640045, "learning_rate": 0.0003300735453203357, "loss": 2.0581, "step": 266930 }, { "epoch": 1.0160395240668987, "grad_norm": 0.11860103160142899, "learning_rate": 0.0003299509874575372, "loss": 2.077, "step": 266940 }, { "epoch": 1.0160775865350213, "grad_norm": 0.11802423000335693, "learning_rate": 0.0003298285178611489, "loss": 2.0563, "step": 266950 }, { "epoch": 1.016115649003144, "grad_norm": 0.1367066502571106, "learning_rate": 0.000329706136340736, "loss": 2.0517, "step": 266960 }, { "epoch": 1.0161537114712667, "grad_norm": 0.7919990420341492, "learning_rate": 0.0003295838427065477, "loss": 2.0558, "step": 266970 }, { "epoch": 1.0161917739393893, "grad_norm": 0.12654846906661987, "learning_rate": 0.00032946163676951355, "loss": 2.0564, "step": 266980 }, { "epoch": 1.016229836407512, "grad_norm": 0.12668249011039734, "learning_rate": 0.0003293395183412399, "loss": 2.0684, "step": 266990 }, { "epoch": 1.0162678988756346, "grad_norm": 0.12429069727659225, "learning_rate": 0.0003292174872340067, "loss": 2.0495, "step": 267000 }, { "epoch": 1.0163059613437573, "grad_norm": 0.12904322147369385, "learning_rate": 0.0003290955432607642, "loss": 2.0571, "step": 267010 }, { "epoch": 1.0163440238118802, "grad_norm": 0.11880702525377274, "learning_rate": 0.0003289736862351293, "loss": 2.0493, "step": 267020 }, { "epoch": 1.0163820862800028, "grad_norm": 0.12380634993314743, "learning_rate": 0.00032885191597138263, "loss": 2.0683, "step": 267030 }, { "epoch": 1.0164201487481255, "grad_norm": 0.1211773157119751, "learning_rate": 0.00032873023228446494, "loss": 2.0607, "step": 267040 }, { "epoch": 1.0164582112162481, "grad_norm": 0.12402321398258209, "learning_rate": 0.0003286086349899739, "loss": 2.054, "step": 267050 }, { "epoch": 1.0164962736843708, "grad_norm": 0.13763384521007538, "learning_rate": 0.0003284871239041609, "loss": 2.0655, "step": 267060 }, { "epoch": 1.0165343361524934, "grad_norm": 0.12396842241287231, "learning_rate": 0.00032836569884392766, "loss": 2.0471, "step": 267070 }, { "epoch": 1.016572398620616, "grad_norm": 0.1238761842250824, "learning_rate": 0.0003282443596268233, "loss": 2.0527, "step": 267080 }, { "epoch": 1.0166104610887388, "grad_norm": 0.13287590444087982, "learning_rate": 0.0003281231060710409, "loss": 2.042, "step": 267090 }, { "epoch": 1.0166485235568614, "grad_norm": 0.12432575225830078, "learning_rate": 0.0003280019379954142, "loss": 2.0563, "step": 267100 }, { "epoch": 1.0166865860249843, "grad_norm": 0.12866751849651337, "learning_rate": 0.00032788085521941494, "loss": 2.0383, "step": 267110 }, { "epoch": 1.016724648493107, "grad_norm": 0.1300543248653412, "learning_rate": 0.0003277598575631492, "loss": 2.0679, "step": 267120 }, { "epoch": 1.0167627109612296, "grad_norm": 0.12244408577680588, "learning_rate": 0.00032763894484735443, "loss": 2.0551, "step": 267130 }, { "epoch": 1.0168007734293523, "grad_norm": 0.14718212187290192, "learning_rate": 0.0003275181168933966, "loss": 2.0612, "step": 267140 }, { "epoch": 1.016838835897475, "grad_norm": 0.1293575018644333, "learning_rate": 0.00032739737352326684, "loss": 2.0561, "step": 267150 }, { "epoch": 1.0168768983655976, "grad_norm": 0.11455338448286057, "learning_rate": 0.0003272767145595785, "loss": 2.0612, "step": 267160 }, { "epoch": 1.0169149608337202, "grad_norm": 0.13592086732387543, "learning_rate": 0.000327156139825564, "loss": 2.0513, "step": 267170 }, { "epoch": 1.016953023301843, "grad_norm": 0.12359960377216339, "learning_rate": 0.0003270356491450719, "loss": 2.0394, "step": 267180 }, { "epoch": 1.0169910857699658, "grad_norm": 0.1179828867316246, "learning_rate": 0.0003269152423425641, "loss": 2.0547, "step": 267190 }, { "epoch": 1.0170291482380884, "grad_norm": 0.12695840001106262, "learning_rate": 0.0003267949192431123, "loss": 2.0474, "step": 267200 }, { "epoch": 1.017067210706211, "grad_norm": 0.12281975150108337, "learning_rate": 0.0003266746796723956, "loss": 2.0541, "step": 267210 }, { "epoch": 1.0171052731743337, "grad_norm": 0.1289357990026474, "learning_rate": 0.00032655452345669737, "loss": 2.0818, "step": 267220 }, { "epoch": 1.0171433356424564, "grad_norm": 0.12075571715831757, "learning_rate": 0.0003264344504229022, "loss": 2.0671, "step": 267230 }, { "epoch": 1.017181398110579, "grad_norm": 0.12263869494199753, "learning_rate": 0.000326314460398493, "loss": 2.0484, "step": 267240 }, { "epoch": 1.0172194605787017, "grad_norm": 0.13677935302257538, "learning_rate": 0.0003261945532115482, "loss": 2.0737, "step": 267250 }, { "epoch": 1.0172575230468244, "grad_norm": 0.13722999393939972, "learning_rate": 0.0003260747286907392, "loss": 2.0591, "step": 267260 }, { "epoch": 1.017295585514947, "grad_norm": 0.12648960947990417, "learning_rate": 0.0003259549866653265, "loss": 2.0347, "step": 267270 }, { "epoch": 1.01733364798307, "grad_norm": 0.12108505517244339, "learning_rate": 0.00032583532696515825, "loss": 2.0592, "step": 267280 }, { "epoch": 1.0173717104511926, "grad_norm": 0.13847528398036957, "learning_rate": 0.0003257157494206663, "loss": 2.0518, "step": 267290 }, { "epoch": 1.0174097729193152, "grad_norm": 0.13215801119804382, "learning_rate": 0.0003255962538628637, "loss": 2.0478, "step": 267300 }, { "epoch": 1.0174478353874379, "grad_norm": 0.12544967234134674, "learning_rate": 0.00032547684012334253, "loss": 2.0576, "step": 267310 }, { "epoch": 1.0174858978555605, "grad_norm": 0.1270923614501953, "learning_rate": 0.0003253575080342702, "loss": 2.0461, "step": 267320 }, { "epoch": 1.0175239603236832, "grad_norm": 0.13393916189670563, "learning_rate": 0.0003252382574283872, "loss": 2.0528, "step": 267330 }, { "epoch": 1.0175620227918059, "grad_norm": 0.13391059637069702, "learning_rate": 0.00032511908813900436, "loss": 2.0443, "step": 267340 }, { "epoch": 1.0176000852599285, "grad_norm": 0.11034025996923447, "learning_rate": 0.00032500000000000004, "loss": 2.0617, "step": 267350 }, { "epoch": 1.0176381477280514, "grad_norm": 0.13497096300125122, "learning_rate": 0.0003248809928458174, "loss": 2.0606, "step": 267360 }, { "epoch": 1.017676210196174, "grad_norm": 0.12526626884937286, "learning_rate": 0.0003247620665114618, "loss": 2.0575, "step": 267370 }, { "epoch": 1.0177142726642967, "grad_norm": 0.13385437428951263, "learning_rate": 0.0003246432208324982, "loss": 2.0567, "step": 267380 }, { "epoch": 1.0177523351324194, "grad_norm": 0.1284913271665573, "learning_rate": 0.0003245244556450482, "loss": 2.054, "step": 267390 }, { "epoch": 1.017790397600542, "grad_norm": 0.11587168276309967, "learning_rate": 0.0003244057707857877, "loss": 2.0541, "step": 267400 }, { "epoch": 1.0178284600686647, "grad_norm": 0.11677401512861252, "learning_rate": 0.0003242871660919442, "loss": 2.0539, "step": 267410 }, { "epoch": 1.0178665225367873, "grad_norm": 0.12091653048992157, "learning_rate": 0.0003241686414012942, "loss": 2.0556, "step": 267420 }, { "epoch": 1.01790458500491, "grad_norm": 0.11467602103948593, "learning_rate": 0.00032405019655216055, "loss": 2.0558, "step": 267430 }, { "epoch": 1.0179426474730326, "grad_norm": 0.13561949133872986, "learning_rate": 0.00032393183138340994, "loss": 2.0645, "step": 267440 }, { "epoch": 1.0179807099411555, "grad_norm": 0.14254984259605408, "learning_rate": 0.0003238135457344502, "loss": 2.0544, "step": 267450 }, { "epoch": 1.0180187724092782, "grad_norm": 0.13436760008335114, "learning_rate": 0.0003236953394452281, "loss": 2.0375, "step": 267460 }, { "epoch": 1.0180568348774008, "grad_norm": 0.136689230799675, "learning_rate": 0.0003235772123562265, "loss": 2.0658, "step": 267470 }, { "epoch": 1.0180948973455235, "grad_norm": 0.13175417482852936, "learning_rate": 0.0003234591643084619, "loss": 2.0571, "step": 267480 }, { "epoch": 1.0181329598136462, "grad_norm": 0.13128870725631714, "learning_rate": 0.00032334119514348193, "loss": 2.0514, "step": 267490 }, { "epoch": 1.0181710222817688, "grad_norm": 0.12385053932666779, "learning_rate": 0.00032322330470336316, "loss": 2.0423, "step": 267500 }, { "epoch": 1.0182090847498915, "grad_norm": 0.13810376822948456, "learning_rate": 0.0003231054928307081, "loss": 2.0527, "step": 267510 }, { "epoch": 1.0182471472180141, "grad_norm": 0.18634332716464996, "learning_rate": 0.0003229877593686433, "loss": 2.0546, "step": 267520 }, { "epoch": 1.0182852096861368, "grad_norm": 0.1266355961561203, "learning_rate": 0.00032287010416081653, "loss": 2.0672, "step": 267530 }, { "epoch": 1.0183232721542597, "grad_norm": 0.13006629049777985, "learning_rate": 0.00032275252705139443, "loss": 2.0619, "step": 267540 }, { "epoch": 1.0183613346223823, "grad_norm": 0.13841281831264496, "learning_rate": 0.00032263502788506035, "loss": 2.0476, "step": 267550 }, { "epoch": 1.018399397090505, "grad_norm": 0.13692769408226013, "learning_rate": 0.0003225176065070116, "loss": 2.0531, "step": 267560 }, { "epoch": 1.0184374595586276, "grad_norm": 0.13610826432704926, "learning_rate": 0.00032240026276295715, "loss": 2.0712, "step": 267570 }, { "epoch": 1.0184755220267503, "grad_norm": 0.12330146878957748, "learning_rate": 0.00032228299649911566, "loss": 2.0588, "step": 267580 }, { "epoch": 1.018513584494873, "grad_norm": 0.12359149754047394, "learning_rate": 0.0003221658075622126, "loss": 2.0514, "step": 267590 }, { "epoch": 1.0185516469629956, "grad_norm": 0.12202282249927521, "learning_rate": 0.00032204869579947816, "loss": 2.0551, "step": 267600 }, { "epoch": 1.0185897094311183, "grad_norm": 0.12366171926259995, "learning_rate": 0.0003219316610586449, "loss": 2.0434, "step": 267610 }, { "epoch": 1.0186277718992411, "grad_norm": 0.13653773069381714, "learning_rate": 0.0003218147031879454, "loss": 2.0518, "step": 267620 }, { "epoch": 1.0186658343673638, "grad_norm": 0.12109905481338501, "learning_rate": 0.0003216978220361101, "loss": 2.0403, "step": 267630 }, { "epoch": 1.0187038968354865, "grad_norm": 0.13771206140518188, "learning_rate": 0.0003215810174523649, "loss": 2.056, "step": 267640 }, { "epoch": 1.0187419593036091, "grad_norm": 0.119480662047863, "learning_rate": 0.0003214642892864288, "loss": 2.0594, "step": 267650 }, { "epoch": 1.0187800217717318, "grad_norm": 0.11807338893413544, "learning_rate": 0.0003213476373885118, "loss": 2.0479, "step": 267660 }, { "epoch": 1.0188180842398544, "grad_norm": 0.1199382096529007, "learning_rate": 0.00032123106160931276, "loss": 2.0501, "step": 267670 }, { "epoch": 1.018856146707977, "grad_norm": 0.12022703140974045, "learning_rate": 0.0003211145618000169, "loss": 2.0536, "step": 267680 }, { "epoch": 1.0188942091760997, "grad_norm": 0.12680114805698395, "learning_rate": 0.0003209981378122935, "loss": 2.0466, "step": 267690 }, { "epoch": 1.0189322716442224, "grad_norm": 0.12346377223730087, "learning_rate": 0.0003208817894982944, "loss": 2.0414, "step": 267700 }, { "epoch": 1.0189703341123453, "grad_norm": 0.12502339482307434, "learning_rate": 0.0003207655167106508, "loss": 2.0502, "step": 267710 }, { "epoch": 1.019008396580468, "grad_norm": 0.12736190855503082, "learning_rate": 0.00032064931930247194, "loss": 2.0404, "step": 267720 }, { "epoch": 1.0190464590485906, "grad_norm": 0.11968687921762466, "learning_rate": 0.0003205331971273424, "loss": 2.055, "step": 267730 }, { "epoch": 1.0190845215167132, "grad_norm": 0.11527033895254135, "learning_rate": 0.00032041715003932025, "loss": 2.0296, "step": 267740 }, { "epoch": 1.019122583984836, "grad_norm": 0.128132626414299, "learning_rate": 0.0003203011778929348, "loss": 2.0428, "step": 267750 }, { "epoch": 1.0191606464529586, "grad_norm": 0.1341097503900528, "learning_rate": 0.0003201852805431843, "loss": 2.0479, "step": 267760 }, { "epoch": 1.0191987089210812, "grad_norm": 0.14120911061763763, "learning_rate": 0.0003200694578455342, "loss": 2.056, "step": 267770 }, { "epoch": 1.0192367713892039, "grad_norm": 0.12796425819396973, "learning_rate": 0.0003199537096559147, "loss": 2.0521, "step": 267780 }, { "epoch": 1.0192748338573265, "grad_norm": 0.12128780037164688, "learning_rate": 0.0003198380358307189, "loss": 2.0576, "step": 267790 }, { "epoch": 1.0193128963254494, "grad_norm": 0.12644854187965393, "learning_rate": 0.00031972243622680054, "loss": 2.063, "step": 267800 }, { "epoch": 1.019350958793572, "grad_norm": 0.13088761270046234, "learning_rate": 0.0003196069107014721, "loss": 2.0624, "step": 267810 }, { "epoch": 1.0193890212616947, "grad_norm": 0.13620150089263916, "learning_rate": 0.00031949145911250255, "loss": 2.0609, "step": 267820 }, { "epoch": 1.0194270837298174, "grad_norm": 0.11924911290407181, "learning_rate": 0.00031937608131811556, "loss": 2.0535, "step": 267830 }, { "epoch": 1.01946514619794, "grad_norm": 0.13811926543712616, "learning_rate": 0.0003192607771769872, "loss": 2.0487, "step": 267840 }, { "epoch": 1.0195032086660627, "grad_norm": 0.11828891932964325, "learning_rate": 0.0003191455465482441, "loss": 2.0518, "step": 267850 }, { "epoch": 1.0195412711341854, "grad_norm": 0.11766720563173294, "learning_rate": 0.00031903038929146147, "loss": 2.0627, "step": 267860 }, { "epoch": 1.019579333602308, "grad_norm": 0.12382876127958298, "learning_rate": 0.0003189153052666608, "loss": 2.0566, "step": 267870 }, { "epoch": 1.019617396070431, "grad_norm": 0.1257692575454712, "learning_rate": 0.0003188002943343082, "loss": 2.0413, "step": 267880 }, { "epoch": 1.0196554585385535, "grad_norm": 0.12685583531856537, "learning_rate": 0.00031868535635531253, "loss": 2.0734, "step": 267890 }, { "epoch": 1.0196935210066762, "grad_norm": 0.12163806706666946, "learning_rate": 0.000318570491191023, "loss": 2.0426, "step": 267900 }, { "epoch": 1.0197315834747989, "grad_norm": 0.1382589042186737, "learning_rate": 0.0003184556987032274, "loss": 2.0553, "step": 267910 }, { "epoch": 1.0197696459429215, "grad_norm": 0.13633142411708832, "learning_rate": 0.00031834097875415044, "loss": 2.0386, "step": 267920 }, { "epoch": 1.0198077084110442, "grad_norm": 0.12466693669557571, "learning_rate": 0.0003182263312064515, "loss": 2.0495, "step": 267930 }, { "epoch": 1.0198457708791668, "grad_norm": 0.13681867718696594, "learning_rate": 0.0003181117559232226, "loss": 2.0497, "step": 267940 }, { "epoch": 1.0198838333472895, "grad_norm": 0.1384352594614029, "learning_rate": 0.0003179972527679871, "loss": 2.0599, "step": 267950 }, { "epoch": 1.0199218958154121, "grad_norm": 0.11396428197622299, "learning_rate": 0.0003178828216046969, "loss": 2.0436, "step": 267960 }, { "epoch": 1.019959958283535, "grad_norm": 0.13938863575458527, "learning_rate": 0.0003177684622977314, "loss": 2.0478, "step": 267970 }, { "epoch": 1.0199980207516577, "grad_norm": 0.12432560324668884, "learning_rate": 0.0003176541747118953, "loss": 2.0578, "step": 267980 }, { "epoch": 1.0200360832197803, "grad_norm": 0.12672552466392517, "learning_rate": 0.0003175399587124165, "loss": 2.0483, "step": 267990 }, { "epoch": 1.020074145687903, "grad_norm": 0.1234738752245903, "learning_rate": 0.00031742581416494464, "loss": 2.0588, "step": 268000 }, { "epoch": 1.0201122081560257, "grad_norm": 0.12561601400375366, "learning_rate": 0.00031731174093554894, "loss": 2.0552, "step": 268010 }, { "epoch": 1.0201502706241483, "grad_norm": 0.13383440673351288, "learning_rate": 0.00031719773889071653, "loss": 2.0625, "step": 268020 }, { "epoch": 1.020188333092271, "grad_norm": 0.12872928380966187, "learning_rate": 0.0003170838078973506, "loss": 2.0599, "step": 268030 }, { "epoch": 1.0202263955603936, "grad_norm": 0.17568981647491455, "learning_rate": 0.00031696994782276874, "loss": 2.0451, "step": 268040 }, { "epoch": 1.0202644580285165, "grad_norm": 0.13012796640396118, "learning_rate": 0.00031685615853470074, "loss": 2.0433, "step": 268050 }, { "epoch": 1.0203025204966392, "grad_norm": 0.13532185554504395, "learning_rate": 0.00031674243990128723, "loss": 2.0476, "step": 268060 }, { "epoch": 1.0203405829647618, "grad_norm": 0.1131984293460846, "learning_rate": 0.00031662879179107753, "loss": 2.0518, "step": 268070 }, { "epoch": 1.0203786454328845, "grad_norm": 0.11749204248189926, "learning_rate": 0.0003165152140730282, "loss": 2.0495, "step": 268080 }, { "epoch": 1.0204167079010071, "grad_norm": 0.12241828441619873, "learning_rate": 0.00031640170661650116, "loss": 2.0469, "step": 268090 }, { "epoch": 1.0204547703691298, "grad_norm": 0.14096224308013916, "learning_rate": 0.0003162882692912616, "loss": 2.0591, "step": 268100 }, { "epoch": 1.0204928328372525, "grad_norm": 0.12495024502277374, "learning_rate": 0.0003161749019674771, "loss": 2.0499, "step": 268110 }, { "epoch": 1.020530895305375, "grad_norm": 0.12601830065250397, "learning_rate": 0.00031606160451571474, "loss": 2.0624, "step": 268120 }, { "epoch": 1.0205689577734978, "grad_norm": 0.12339954823255539, "learning_rate": 0.0003159483768069404, "loss": 2.0632, "step": 268130 }, { "epoch": 1.0206070202416206, "grad_norm": 0.13249126076698303, "learning_rate": 0.0003158352187125164, "loss": 2.0613, "step": 268140 }, { "epoch": 1.0206450827097433, "grad_norm": 0.14813245832920074, "learning_rate": 0.00031572213010420014, "loss": 2.0554, "step": 268150 }, { "epoch": 1.020683145177866, "grad_norm": 0.12110047787427902, "learning_rate": 0.0003156091108541422, "loss": 2.0641, "step": 268160 }, { "epoch": 1.0207212076459886, "grad_norm": 0.12983626127243042, "learning_rate": 0.00031549616083488487, "loss": 2.0557, "step": 268170 }, { "epoch": 1.0207592701141113, "grad_norm": 0.12175390124320984, "learning_rate": 0.0003153832799193601, "loss": 2.0587, "step": 268180 }, { "epoch": 1.020797332582234, "grad_norm": 0.12431416660547256, "learning_rate": 0.00031527046798088834, "loss": 2.0439, "step": 268190 }, { "epoch": 1.0208353950503566, "grad_norm": 0.12754175066947937, "learning_rate": 0.0003151577248931764, "loss": 2.0825, "step": 268200 }, { "epoch": 1.0208734575184792, "grad_norm": 0.12287028878927231, "learning_rate": 0.00031504505053031606, "loss": 2.0501, "step": 268210 }, { "epoch": 1.0209115199866021, "grad_norm": 0.14124400913715363, "learning_rate": 0.0003149324447667825, "loss": 2.0659, "step": 268220 }, { "epoch": 1.0209495824547248, "grad_norm": 0.1486729383468628, "learning_rate": 0.0003148199074774325, "loss": 2.06, "step": 268230 }, { "epoch": 1.0209876449228474, "grad_norm": 0.12180312722921371, "learning_rate": 0.0003147074385375027, "loss": 2.0547, "step": 268240 }, { "epoch": 1.02102570739097, "grad_norm": 0.12472087144851685, "learning_rate": 0.00031459503782260847, "loss": 2.06, "step": 268250 }, { "epoch": 1.0210637698590928, "grad_norm": 0.12739062309265137, "learning_rate": 0.0003144827052087416, "loss": 2.0566, "step": 268260 }, { "epoch": 1.0211018323272154, "grad_norm": 0.1196855679154396, "learning_rate": 0.0003143704405722695, "loss": 2.0511, "step": 268270 }, { "epoch": 1.021139894795338, "grad_norm": 0.13298091292381287, "learning_rate": 0.0003142582437899329, "loss": 2.049, "step": 268280 }, { "epoch": 1.0211779572634607, "grad_norm": 0.11433979868888855, "learning_rate": 0.00031414611473884474, "loss": 2.0521, "step": 268290 }, { "epoch": 1.0212160197315834, "grad_norm": 0.15914879739284515, "learning_rate": 0.00031403405329648835, "loss": 2.0452, "step": 268300 }, { "epoch": 1.0212540821997063, "grad_norm": 0.13394173979759216, "learning_rate": 0.00031392205934071605, "loss": 2.0617, "step": 268310 }, { "epoch": 1.021292144667829, "grad_norm": 0.1315068155527115, "learning_rate": 0.0003138101327497475, "loss": 2.0559, "step": 268320 }, { "epoch": 1.0213302071359516, "grad_norm": 0.1253783404827118, "learning_rate": 0.00031369827340216804, "loss": 2.0615, "step": 268330 }, { "epoch": 1.0213682696040742, "grad_norm": 0.12846381962299347, "learning_rate": 0.0003135864811769275, "loss": 2.0415, "step": 268340 }, { "epoch": 1.0214063320721969, "grad_norm": 0.13440275192260742, "learning_rate": 0.0003134747559533384, "loss": 2.0616, "step": 268350 }, { "epoch": 1.0214443945403195, "grad_norm": 0.12881481647491455, "learning_rate": 0.0003133630976110744, "loss": 2.0386, "step": 268360 }, { "epoch": 1.0214824570084422, "grad_norm": 0.12021521478891373, "learning_rate": 0.00031325150603016906, "loss": 2.0555, "step": 268370 }, { "epoch": 1.0215205194765649, "grad_norm": 0.11945555359125137, "learning_rate": 0.0003131399810910138, "loss": 2.0573, "step": 268380 }, { "epoch": 1.0215585819446875, "grad_norm": 0.12708795070648193, "learning_rate": 0.00031302852267435727, "loss": 2.047, "step": 268390 }, { "epoch": 1.0215966444128104, "grad_norm": 0.12525077164173126, "learning_rate": 0.0003129171306613029, "loss": 2.0467, "step": 268400 }, { "epoch": 1.021634706880933, "grad_norm": 0.1318259835243225, "learning_rate": 0.0003128058049333082, "loss": 2.0442, "step": 268410 }, { "epoch": 1.0216727693490557, "grad_norm": 0.1215236485004425, "learning_rate": 0.00031269454537218266, "loss": 2.0592, "step": 268420 }, { "epoch": 1.0217108318171784, "grad_norm": 0.1245192289352417, "learning_rate": 0.0003125833518600869, "loss": 2.0556, "step": 268430 }, { "epoch": 1.021748894285301, "grad_norm": 0.12638451159000397, "learning_rate": 0.0003124722242795306, "loss": 2.0435, "step": 268440 }, { "epoch": 1.0217869567534237, "grad_norm": 0.12292831391096115, "learning_rate": 0.00031236116251337167, "loss": 2.0565, "step": 268450 }, { "epoch": 1.0218250192215463, "grad_norm": 0.12036208808422089, "learning_rate": 0.0003122501664448141, "loss": 2.0577, "step": 268460 }, { "epoch": 1.021863081689669, "grad_norm": 0.12013652175664902, "learning_rate": 0.00031213923595740733, "loss": 2.0472, "step": 268470 }, { "epoch": 1.0219011441577919, "grad_norm": 0.13374583423137665, "learning_rate": 0.00031202837093504424, "loss": 2.0341, "step": 268480 }, { "epoch": 1.0219392066259145, "grad_norm": 0.11965156346559525, "learning_rate": 0.00031191757126195976, "loss": 2.0475, "step": 268490 }, { "epoch": 1.0219772690940372, "grad_norm": 0.12487906217575073, "learning_rate": 0.00031180683682272974, "loss": 2.0673, "step": 268500 }, { "epoch": 1.0220153315621598, "grad_norm": 0.13901683688163757, "learning_rate": 0.0003116961675022695, "loss": 2.0512, "step": 268510 }, { "epoch": 1.0220533940302825, "grad_norm": 0.13149023056030273, "learning_rate": 0.0003115855631858323, "loss": 2.0504, "step": 268520 }, { "epoch": 1.0220914564984052, "grad_norm": 0.12233823537826538, "learning_rate": 0.0003114750237590078, "loss": 2.0701, "step": 268530 }, { "epoch": 1.0221295189665278, "grad_norm": 0.11628257483243942, "learning_rate": 0.00031136454910772116, "loss": 2.0632, "step": 268540 }, { "epoch": 1.0221675814346505, "grad_norm": 0.1403857320547104, "learning_rate": 0.0003112541391182313, "loss": 2.0637, "step": 268550 }, { "epoch": 1.0222056439027731, "grad_norm": 0.11599292606115341, "learning_rate": 0.00031114379367712944, "loss": 2.0597, "step": 268560 }, { "epoch": 1.022243706370896, "grad_norm": 0.1288028508424759, "learning_rate": 0.0003110335126713381, "loss": 2.0502, "step": 268570 }, { "epoch": 1.0222817688390187, "grad_norm": 0.1404590755701065, "learning_rate": 0.0003109232959881096, "loss": 2.0545, "step": 268580 }, { "epoch": 1.0223198313071413, "grad_norm": 0.12103798240423203, "learning_rate": 0.00031081314351502463, "loss": 2.0361, "step": 268590 }, { "epoch": 1.022357893775264, "grad_norm": 0.12138662487268448, "learning_rate": 0.0003107030551399909, "loss": 2.0593, "step": 268600 }, { "epoch": 1.0223959562433866, "grad_norm": 0.12390505522489548, "learning_rate": 0.0003105930307512419, "loss": 2.0392, "step": 268610 }, { "epoch": 1.0224340187115093, "grad_norm": 0.12961971759796143, "learning_rate": 0.0003104830702373356, "loss": 2.0532, "step": 268620 }, { "epoch": 1.022472081179632, "grad_norm": 0.1259758621454239, "learning_rate": 0.0003103731734871531, "loss": 2.0452, "step": 268630 }, { "epoch": 1.0225101436477546, "grad_norm": 0.14060895144939423, "learning_rate": 0.0003102633403898972, "loss": 2.0581, "step": 268640 }, { "epoch": 1.0225482061158773, "grad_norm": 0.1207043007016182, "learning_rate": 0.0003101535708350914, "loss": 2.0508, "step": 268650 }, { "epoch": 1.0225862685840001, "grad_norm": 0.13988932967185974, "learning_rate": 0.00031004386471257804, "loss": 2.0532, "step": 268660 }, { "epoch": 1.0226243310521228, "grad_norm": 0.14313188195228577, "learning_rate": 0.00030993422191251787, "loss": 2.0454, "step": 268670 }, { "epoch": 1.0226623935202455, "grad_norm": 0.12709325551986694, "learning_rate": 0.00030982464232538786, "loss": 2.0561, "step": 268680 }, { "epoch": 1.0227004559883681, "grad_norm": 0.13226522505283356, "learning_rate": 0.00030971512584198046, "loss": 2.057, "step": 268690 }, { "epoch": 1.0227385184564908, "grad_norm": 0.14007730782032013, "learning_rate": 0.0003096056723534023, "loss": 2.0546, "step": 268700 }, { "epoch": 1.0227765809246134, "grad_norm": 0.12856709957122803, "learning_rate": 0.0003094962817510727, "loss": 2.044, "step": 268710 }, { "epoch": 1.022814643392736, "grad_norm": 0.13431373238563538, "learning_rate": 0.00030938695392672264, "loss": 2.0507, "step": 268720 }, { "epoch": 1.0228527058608587, "grad_norm": 0.13312764465808868, "learning_rate": 0.0003092776887723935, "loss": 2.0521, "step": 268730 }, { "epoch": 1.0228907683289816, "grad_norm": 0.1246262639760971, "learning_rate": 0.0003091684861804354, "loss": 2.0738, "step": 268740 }, { "epoch": 1.0229288307971043, "grad_norm": 0.1405113935470581, "learning_rate": 0.00030905934604350666, "loss": 2.0422, "step": 268750 }, { "epoch": 1.022966893265227, "grad_norm": 0.12192755937576294, "learning_rate": 0.000308950268254572, "loss": 2.0503, "step": 268760 }, { "epoch": 1.0230049557333496, "grad_norm": 0.13036781549453735, "learning_rate": 0.00030884125270690154, "loss": 2.0533, "step": 268770 }, { "epoch": 1.0230430182014723, "grad_norm": 0.16155710816383362, "learning_rate": 0.00030873229929406966, "loss": 2.0446, "step": 268780 }, { "epoch": 1.023081080669595, "grad_norm": 0.14020535349845886, "learning_rate": 0.00030862340790995365, "loss": 2.0495, "step": 268790 }, { "epoch": 1.0231191431377176, "grad_norm": 0.11833903193473816, "learning_rate": 0.00030851457844873235, "loss": 2.05, "step": 268800 }, { "epoch": 1.0231572056058402, "grad_norm": 0.1218896210193634, "learning_rate": 0.0003084058108048855, "loss": 2.0512, "step": 268810 }, { "epoch": 1.0231952680739629, "grad_norm": 0.12483804672956467, "learning_rate": 0.00030829710487319186, "loss": 2.0453, "step": 268820 }, { "epoch": 1.0232333305420858, "grad_norm": 0.1135217696428299, "learning_rate": 0.00030818846054872853, "loss": 2.0549, "step": 268830 }, { "epoch": 1.0232713930102084, "grad_norm": 0.12844489514827728, "learning_rate": 0.0003080798777268696, "loss": 2.0584, "step": 268840 }, { "epoch": 1.023309455478331, "grad_norm": 0.11942926049232483, "learning_rate": 0.00030797135630328476, "loss": 2.0411, "step": 268850 }, { "epoch": 1.0233475179464537, "grad_norm": 0.13348643481731415, "learning_rate": 0.0003078628961739387, "loss": 2.0697, "step": 268860 }, { "epoch": 1.0233855804145764, "grad_norm": 0.12040871381759644, "learning_rate": 0.0003077544972350892, "loss": 2.0597, "step": 268870 }, { "epoch": 1.023423642882699, "grad_norm": 0.12042777240276337, "learning_rate": 0.00030764615938328653, "loss": 2.0513, "step": 268880 }, { "epoch": 1.0234617053508217, "grad_norm": 0.11985976994037628, "learning_rate": 0.00030753788251537223, "loss": 2.0532, "step": 268890 }, { "epoch": 1.0234997678189444, "grad_norm": 0.12266091257333755, "learning_rate": 0.0003074296665284776, "loss": 2.0559, "step": 268900 }, { "epoch": 1.0235378302870672, "grad_norm": 0.11272238940000534, "learning_rate": 0.00030732151132002305, "loss": 2.0522, "step": 268910 }, { "epoch": 1.02357589275519, "grad_norm": 0.1326189637184143, "learning_rate": 0.0003072134167877166, "loss": 2.0474, "step": 268920 }, { "epoch": 1.0236139552233126, "grad_norm": 0.1255561113357544, "learning_rate": 0.0003071053828295529, "loss": 2.0353, "step": 268930 }, { "epoch": 1.0236520176914352, "grad_norm": 0.12174849957227707, "learning_rate": 0.00030699740934381214, "loss": 2.0544, "step": 268940 }, { "epoch": 1.0236900801595579, "grad_norm": 0.1409405916929245, "learning_rate": 0.0003068894962290589, "loss": 2.0487, "step": 268950 }, { "epoch": 1.0237281426276805, "grad_norm": 0.1327158361673355, "learning_rate": 0.00030678164338414083, "loss": 2.0471, "step": 268960 }, { "epoch": 1.0237662050958032, "grad_norm": 0.12210732698440552, "learning_rate": 0.00030667385070818796, "loss": 2.0532, "step": 268970 }, { "epoch": 1.0238042675639258, "grad_norm": 0.11677435040473938, "learning_rate": 0.0003065661181006113, "loss": 2.0624, "step": 268980 }, { "epoch": 1.0238423300320485, "grad_norm": 0.13230466842651367, "learning_rate": 0.0003064584454611017, "loss": 2.0562, "step": 268990 }, { "epoch": 1.0238803925001714, "grad_norm": 0.12184811383485794, "learning_rate": 0.00030635083268962916, "loss": 2.0595, "step": 269000 }, { "epoch": 1.023918454968294, "grad_norm": 0.12902513146400452, "learning_rate": 0.00030624327968644115, "loss": 2.0547, "step": 269010 }, { "epoch": 1.0239565174364167, "grad_norm": 0.12603649497032166, "learning_rate": 0.00030613578635206205, "loss": 2.0609, "step": 269020 }, { "epoch": 1.0239945799045393, "grad_norm": 0.1304672509431839, "learning_rate": 0.0003060283525872918, "loss": 2.0491, "step": 269030 }, { "epoch": 1.024032642372662, "grad_norm": 0.11761519312858582, "learning_rate": 0.00030592097829320485, "loss": 2.0537, "step": 269040 }, { "epoch": 1.0240707048407847, "grad_norm": 0.11814765632152557, "learning_rate": 0.00030581366337114924, "loss": 2.0416, "step": 269050 }, { "epoch": 1.0241087673089073, "grad_norm": 0.11363788694143295, "learning_rate": 0.0003057064077227455, "loss": 2.0402, "step": 269060 }, { "epoch": 1.02414682977703, "grad_norm": 0.1189923882484436, "learning_rate": 0.0003055992112498854, "loss": 2.0461, "step": 269070 }, { "epoch": 1.0241848922451529, "grad_norm": 0.12008653581142426, "learning_rate": 0.00030549207385473123, "loss": 2.0411, "step": 269080 }, { "epoch": 1.0242229547132755, "grad_norm": 0.1371253877878189, "learning_rate": 0.0003053849954397144, "loss": 2.0486, "step": 269090 }, { "epoch": 1.0242610171813982, "grad_norm": 0.13268887996673584, "learning_rate": 0.0003052779759075346, "loss": 2.0425, "step": 269100 }, { "epoch": 1.0242990796495208, "grad_norm": 0.11739718168973923, "learning_rate": 0.000305171015161159, "loss": 2.0396, "step": 269110 }, { "epoch": 1.0243371421176435, "grad_norm": 0.13256852328777313, "learning_rate": 0.00030506411310382076, "loss": 2.0447, "step": 269120 }, { "epoch": 1.0243752045857661, "grad_norm": 0.12280917912721634, "learning_rate": 0.00030495726963901816, "loss": 2.045, "step": 269130 }, { "epoch": 1.0244132670538888, "grad_norm": 0.11900188028812408, "learning_rate": 0.0003048504846705139, "loss": 2.0466, "step": 269140 }, { "epoch": 1.0244513295220115, "grad_norm": 0.12554916739463806, "learning_rate": 0.0003047437581023337, "loss": 2.0548, "step": 269150 }, { "epoch": 1.0244893919901341, "grad_norm": 0.1171054095029831, "learning_rate": 0.00030463708983876524, "loss": 2.0488, "step": 269160 }, { "epoch": 1.024527454458257, "grad_norm": 0.11716967821121216, "learning_rate": 0.00030453047978435787, "loss": 2.0453, "step": 269170 }, { "epoch": 1.0245655169263796, "grad_norm": 0.1312137097120285, "learning_rate": 0.0003044239278439205, "loss": 2.0319, "step": 269180 }, { "epoch": 1.0246035793945023, "grad_norm": 0.1252538561820984, "learning_rate": 0.00030431743392252163, "loss": 2.0537, "step": 269190 }, { "epoch": 1.024641641862625, "grad_norm": 0.1490531712770462, "learning_rate": 0.00030421099792548787, "loss": 2.0492, "step": 269200 }, { "epoch": 1.0246797043307476, "grad_norm": 0.14222683012485504, "learning_rate": 0.00030410461975840267, "loss": 2.0459, "step": 269210 }, { "epoch": 1.0247177667988703, "grad_norm": 0.1411609947681427, "learning_rate": 0.0003039982993271062, "loss": 2.0302, "step": 269220 }, { "epoch": 1.024755829266993, "grad_norm": 0.14139287173748016, "learning_rate": 0.00030389203653769355, "loss": 2.0311, "step": 269230 }, { "epoch": 1.0247938917351156, "grad_norm": 0.12095063179731369, "learning_rate": 0.00030378583129651415, "loss": 2.0512, "step": 269240 }, { "epoch": 1.0248319542032382, "grad_norm": 0.14292798936367035, "learning_rate": 0.0003036796835101709, "loss": 2.0592, "step": 269250 }, { "epoch": 1.0248700166713611, "grad_norm": 0.12329915165901184, "learning_rate": 0.00030357359308551886, "loss": 2.0566, "step": 269260 }, { "epoch": 1.0249080791394838, "grad_norm": 0.12383721023797989, "learning_rate": 0.00030346755992966456, "loss": 2.0421, "step": 269270 }, { "epoch": 1.0249461416076064, "grad_norm": 0.12567129731178284, "learning_rate": 0.000303361583949965, "loss": 2.0447, "step": 269280 }, { "epoch": 1.024984204075729, "grad_norm": 0.13148026168346405, "learning_rate": 0.0003032556650540267, "loss": 2.0577, "step": 269290 }, { "epoch": 1.0250222665438518, "grad_norm": 0.14086686074733734, "learning_rate": 0.00030314980314970474, "loss": 2.039, "step": 269300 }, { "epoch": 1.0250603290119744, "grad_norm": 0.12691082060337067, "learning_rate": 0.00030304399814510175, "loss": 2.0465, "step": 269310 }, { "epoch": 1.025098391480097, "grad_norm": 0.11918236315250397, "learning_rate": 0.0003029382499485673, "loss": 2.0489, "step": 269320 }, { "epoch": 1.0251364539482197, "grad_norm": 0.11919201165437698, "learning_rate": 0.00030283255846869653, "loss": 2.0438, "step": 269330 }, { "epoch": 1.0251745164163426, "grad_norm": 0.12480544298887253, "learning_rate": 0.0003027269236143293, "loss": 2.0322, "step": 269340 }, { "epoch": 1.0252125788844653, "grad_norm": 0.12979908287525177, "learning_rate": 0.00030262134529454983, "loss": 2.0557, "step": 269350 }, { "epoch": 1.025250641352588, "grad_norm": 0.13761655986309052, "learning_rate": 0.000302515823418685, "loss": 2.0328, "step": 269360 }, { "epoch": 1.0252887038207106, "grad_norm": 0.13578085601329803, "learning_rate": 0.000302410357896304, "loss": 2.0563, "step": 269370 }, { "epoch": 1.0253267662888332, "grad_norm": 0.12312494218349457, "learning_rate": 0.0003023049486372172, "loss": 2.0461, "step": 269380 }, { "epoch": 1.025364828756956, "grad_norm": 0.11766970902681351, "learning_rate": 0.00030219959555147517, "loss": 2.0526, "step": 269390 }, { "epoch": 1.0254028912250785, "grad_norm": 0.12069400399923325, "learning_rate": 0.00030209429854936805, "loss": 2.0549, "step": 269400 }, { "epoch": 1.0254409536932012, "grad_norm": 0.16372714936733246, "learning_rate": 0.00030198905754142444, "loss": 2.0425, "step": 269410 }, { "epoch": 1.0254790161613239, "grad_norm": 0.1190953329205513, "learning_rate": 0.00030188387243841055, "loss": 2.0513, "step": 269420 }, { "epoch": 1.0255170786294467, "grad_norm": 0.13131879270076752, "learning_rate": 0.00030177874315132937, "loss": 2.0602, "step": 269430 }, { "epoch": 1.0255551410975694, "grad_norm": 0.11880412697792053, "learning_rate": 0.00030167366959141977, "loss": 2.0469, "step": 269440 }, { "epoch": 1.025593203565692, "grad_norm": 0.1298285871744156, "learning_rate": 0.00030156865167015566, "loss": 2.0728, "step": 269450 }, { "epoch": 1.0256312660338147, "grad_norm": 0.13389140367507935, "learning_rate": 0.00030146368929924515, "loss": 2.0458, "step": 269460 }, { "epoch": 1.0256693285019374, "grad_norm": 0.12634499371051788, "learning_rate": 0.00030135878239062944, "loss": 2.0379, "step": 269470 }, { "epoch": 1.02570739097006, "grad_norm": 0.1331862211227417, "learning_rate": 0.00030125393085648214, "loss": 2.0434, "step": 269480 }, { "epoch": 1.0257454534381827, "grad_norm": 0.11369920521974564, "learning_rate": 0.00030114913460920856, "loss": 2.0404, "step": 269490 }, { "epoch": 1.0257835159063053, "grad_norm": 0.14334441721439362, "learning_rate": 0.00030104439356144465, "loss": 2.0642, "step": 269500 }, { "epoch": 1.025821578374428, "grad_norm": 0.1409229189157486, "learning_rate": 0.0003009397076260562, "loss": 2.0557, "step": 269510 }, { "epoch": 1.0258596408425509, "grad_norm": 0.11479076743125916, "learning_rate": 0.0003008350767161379, "loss": 2.0506, "step": 269520 }, { "epoch": 1.0258977033106735, "grad_norm": 0.12338791787624359, "learning_rate": 0.00030073050074501287, "loss": 2.0564, "step": 269530 }, { "epoch": 1.0259357657787962, "grad_norm": 0.12768040597438812, "learning_rate": 0.00030062597962623116, "loss": 2.0389, "step": 269540 }, { "epoch": 1.0259738282469189, "grad_norm": 0.1327030509710312, "learning_rate": 0.0003005215132735696, "loss": 2.0432, "step": 269550 }, { "epoch": 1.0260118907150415, "grad_norm": 0.1372380554676056, "learning_rate": 0.00030041710160103063, "loss": 2.0446, "step": 269560 }, { "epoch": 1.0260499531831642, "grad_norm": 0.12993136048316956, "learning_rate": 0.0003003127445228414, "loss": 2.0497, "step": 269570 }, { "epoch": 1.0260880156512868, "grad_norm": 0.12132129818201065, "learning_rate": 0.0003002084419534532, "loss": 2.047, "step": 269580 }, { "epoch": 1.0261260781194095, "grad_norm": 0.1340913325548172, "learning_rate": 0.00030010419380754053, "loss": 2.0438, "step": 269590 }, { "epoch": 1.0261641405875324, "grad_norm": 0.12921398878097534, "learning_rate": 0.0003, "loss": 2.0546, "step": 269600 }, { "epoch": 1.026202203055655, "grad_norm": 0.13172978162765503, "learning_rate": 0.0002998958604459501, "loss": 2.0324, "step": 269610 }, { "epoch": 1.0262402655237777, "grad_norm": 0.1243109479546547, "learning_rate": 0.00029979177506073, "loss": 2.0618, "step": 269620 }, { "epoch": 1.0262783279919003, "grad_norm": 0.11869452893733978, "learning_rate": 0.0002996877437598987, "loss": 2.0386, "step": 269630 }, { "epoch": 1.026316390460023, "grad_norm": 0.13170160353183746, "learning_rate": 0.0002995837664592345, "loss": 2.0502, "step": 269640 }, { "epoch": 1.0263544529281456, "grad_norm": 0.1317114382982254, "learning_rate": 0.00029947984307473395, "loss": 2.025, "step": 269650 }, { "epoch": 1.0263925153962683, "grad_norm": 0.12583796679973602, "learning_rate": 0.0002993759735226112, "loss": 2.0529, "step": 269660 }, { "epoch": 1.026430577864391, "grad_norm": 0.13131083548069, "learning_rate": 0.00029927215771929726, "loss": 2.0412, "step": 269670 }, { "epoch": 1.0264686403325136, "grad_norm": 0.12807200849056244, "learning_rate": 0.0002991683955814391, "loss": 2.0481, "step": 269680 }, { "epoch": 1.0265067028006365, "grad_norm": 0.14543797075748444, "learning_rate": 0.0002990646870258988, "loss": 2.0281, "step": 269690 }, { "epoch": 1.0265447652687592, "grad_norm": 0.12976425886154175, "learning_rate": 0.00029896103196975306, "loss": 2.0369, "step": 269700 }, { "epoch": 1.0265828277368818, "grad_norm": 0.13158537447452545, "learning_rate": 0.000298857430330292, "loss": 2.0489, "step": 269710 }, { "epoch": 1.0266208902050045, "grad_norm": 0.1256389617919922, "learning_rate": 0.00029875388202501896, "loss": 2.0485, "step": 269720 }, { "epoch": 1.0266589526731271, "grad_norm": 0.1335773915052414, "learning_rate": 0.0002986503869716491, "loss": 2.0394, "step": 269730 }, { "epoch": 1.0266970151412498, "grad_norm": 0.12555572390556335, "learning_rate": 0.0002985469450881091, "loss": 2.0512, "step": 269740 }, { "epoch": 1.0267350776093724, "grad_norm": 0.13962522149085999, "learning_rate": 0.0002984435562925363, "loss": 2.0377, "step": 269750 }, { "epoch": 1.026773140077495, "grad_norm": 0.14196039736270905, "learning_rate": 0.0002983402205032777, "loss": 2.0521, "step": 269760 }, { "epoch": 1.026811202545618, "grad_norm": 0.1314079761505127, "learning_rate": 0.0002982369376388896, "loss": 2.0545, "step": 269770 }, { "epoch": 1.0268492650137406, "grad_norm": 0.12030822783708572, "learning_rate": 0.0002981337076181365, "loss": 2.0389, "step": 269780 }, { "epoch": 1.0268873274818633, "grad_norm": 0.13099026679992676, "learning_rate": 0.0002980305303599906, "loss": 2.0451, "step": 269790 }, { "epoch": 1.026925389949986, "grad_norm": 0.128982275724411, "learning_rate": 0.000297927405783631, "loss": 2.0383, "step": 269800 }, { "epoch": 1.0269634524181086, "grad_norm": 0.142899751663208, "learning_rate": 0.0002978243338084427, "loss": 2.0751, "step": 269810 }, { "epoch": 1.0270015148862313, "grad_norm": 0.1250215768814087, "learning_rate": 0.0002977213143540164, "loss": 2.0571, "step": 269820 }, { "epoch": 1.027039577354354, "grad_norm": 0.1249474361538887, "learning_rate": 0.00029761834734014713, "loss": 2.0465, "step": 269830 }, { "epoch": 1.0270776398224766, "grad_norm": 0.13094857335090637, "learning_rate": 0.0002975154326868341, "loss": 2.0414, "step": 269840 }, { "epoch": 1.0271157022905992, "grad_norm": 0.13219231367111206, "learning_rate": 0.0002974125703142797, "loss": 2.0326, "step": 269850 }, { "epoch": 1.027153764758722, "grad_norm": 0.12404201924800873, "learning_rate": 0.0002973097601428887, "loss": 2.0403, "step": 269860 }, { "epoch": 1.0271918272268448, "grad_norm": 0.1259431391954422, "learning_rate": 0.0002972070020932676, "loss": 2.0463, "step": 269870 }, { "epoch": 1.0272298896949674, "grad_norm": 0.1325017213821411, "learning_rate": 0.000297104296086224, "loss": 2.0418, "step": 269880 }, { "epoch": 1.02726795216309, "grad_norm": 0.13986846804618835, "learning_rate": 0.00029700164204276593, "loss": 2.051, "step": 269890 }, { "epoch": 1.0273060146312127, "grad_norm": 0.13025562465190887, "learning_rate": 0.000296899039884101, "loss": 2.0497, "step": 269900 }, { "epoch": 1.0273440770993354, "grad_norm": 0.11536869406700134, "learning_rate": 0.0002967964895316356, "loss": 2.0686, "step": 269910 }, { "epoch": 1.027382139567458, "grad_norm": 0.12382342666387558, "learning_rate": 0.0002966939909069746, "loss": 2.0553, "step": 269920 }, { "epoch": 1.0274202020355807, "grad_norm": 0.12849363684654236, "learning_rate": 0.00029659154393192014, "loss": 2.0469, "step": 269930 }, { "epoch": 1.0274582645037036, "grad_norm": 0.123623326420784, "learning_rate": 0.0002964891485284712, "loss": 2.0419, "step": 269940 }, { "epoch": 1.0274963269718262, "grad_norm": 0.12088244408369064, "learning_rate": 0.00029638680461882306, "loss": 2.0494, "step": 269950 }, { "epoch": 1.027534389439949, "grad_norm": 0.1202690601348877, "learning_rate": 0.0002962845121253663, "loss": 2.0575, "step": 269960 }, { "epoch": 1.0275724519080716, "grad_norm": 0.1362772434949875, "learning_rate": 0.0002961822709706865, "loss": 2.0621, "step": 269970 }, { "epoch": 1.0276105143761942, "grad_norm": 0.13600513339042664, "learning_rate": 0.0002960800810775629, "loss": 2.0292, "step": 269980 }, { "epoch": 1.0276485768443169, "grad_norm": 0.11714157462120056, "learning_rate": 0.00029597794236896834, "loss": 2.0388, "step": 269990 }, { "epoch": 1.0276866393124395, "grad_norm": 0.11797063052654266, "learning_rate": 0.0002958758547680685, "loss": 2.0347, "step": 270000 }, { "epoch": 1.0277247017805622, "grad_norm": 0.1323215812444687, "learning_rate": 0.0002957738181982209, "loss": 2.0464, "step": 270010 }, { "epoch": 1.0277627642486848, "grad_norm": 0.13525962829589844, "learning_rate": 0.00029567183258297447, "loss": 2.0588, "step": 270020 }, { "epoch": 1.0278008267168077, "grad_norm": 0.13091787695884705, "learning_rate": 0.000295569897846069, "loss": 2.0575, "step": 270030 }, { "epoch": 1.0278388891849304, "grad_norm": 0.13230513036251068, "learning_rate": 0.0002954680139114341, "loss": 2.0416, "step": 270040 }, { "epoch": 1.027876951653053, "grad_norm": 0.12935812771320343, "learning_rate": 0.00029536618070318877, "loss": 2.0577, "step": 270050 }, { "epoch": 1.0279150141211757, "grad_norm": 0.12722337245941162, "learning_rate": 0.00029526439814564086, "loss": 2.0404, "step": 270060 }, { "epoch": 1.0279530765892984, "grad_norm": 0.14798662066459656, "learning_rate": 0.0002951626661632862, "loss": 2.0445, "step": 270070 }, { "epoch": 1.027991139057421, "grad_norm": 0.1255602240562439, "learning_rate": 0.000295060984680808, "loss": 2.0257, "step": 270080 }, { "epoch": 1.0280292015255437, "grad_norm": 0.1310020089149475, "learning_rate": 0.00029495935362307637, "loss": 2.0485, "step": 270090 }, { "epoch": 1.0280672639936663, "grad_norm": 0.12309551239013672, "learning_rate": 0.0002948577729151471, "loss": 2.0537, "step": 270100 }, { "epoch": 1.028105326461789, "grad_norm": 0.13925056159496307, "learning_rate": 0.00029475624248226205, "loss": 2.049, "step": 270110 }, { "epoch": 1.0281433889299119, "grad_norm": 0.13675113022327423, "learning_rate": 0.0002946547622498475, "loss": 2.0443, "step": 270120 }, { "epoch": 1.0281814513980345, "grad_norm": 0.13782602548599243, "learning_rate": 0.0002945533321435139, "loss": 2.042, "step": 270130 }, { "epoch": 1.0282195138661572, "grad_norm": 0.12527993321418762, "learning_rate": 0.0002944519520890553, "loss": 2.0517, "step": 270140 }, { "epoch": 1.0282575763342798, "grad_norm": 0.12390842288732529, "learning_rate": 0.0002943506220124489, "loss": 2.0416, "step": 270150 }, { "epoch": 1.0282956388024025, "grad_norm": 0.12301144003868103, "learning_rate": 0.0002942493418398538, "loss": 2.0467, "step": 270160 }, { "epoch": 1.0283337012705251, "grad_norm": 0.12911370396614075, "learning_rate": 0.000294148111497611, "loss": 2.0395, "step": 270170 }, { "epoch": 1.0283717637386478, "grad_norm": 0.12177471816539764, "learning_rate": 0.0002940469309122424, "loss": 2.0481, "step": 270180 }, { "epoch": 1.0284098262067705, "grad_norm": 0.12141739577054977, "learning_rate": 0.00029394580001045035, "loss": 2.0451, "step": 270190 }, { "epoch": 1.0284478886748933, "grad_norm": 0.1493520736694336, "learning_rate": 0.000293844718719117, "loss": 2.0475, "step": 270200 }, { "epoch": 1.028485951143016, "grad_norm": 0.11416112631559372, "learning_rate": 0.0002937436869653036, "loss": 2.0371, "step": 270210 }, { "epoch": 1.0285240136111387, "grad_norm": 0.1214093267917633, "learning_rate": 0.00029364270467625007, "loss": 2.05, "step": 270220 }, { "epoch": 1.0285620760792613, "grad_norm": 0.1179608702659607, "learning_rate": 0.00029354177177937427, "loss": 2.0268, "step": 270230 }, { "epoch": 1.028600138547384, "grad_norm": 0.13988269865512848, "learning_rate": 0.00029344088820227113, "loss": 2.0496, "step": 270240 }, { "epoch": 1.0286382010155066, "grad_norm": 0.14148426055908203, "learning_rate": 0.00029334005387271256, "loss": 2.0278, "step": 270250 }, { "epoch": 1.0286762634836293, "grad_norm": 0.1297960877418518, "learning_rate": 0.0002932392687186467, "loss": 2.0563, "step": 270260 }, { "epoch": 1.028714325951752, "grad_norm": 0.12837962806224823, "learning_rate": 0.00029313853266819685, "loss": 2.0566, "step": 270270 }, { "epoch": 1.0287523884198746, "grad_norm": 0.11966746300458908, "learning_rate": 0.0002930378456496615, "loss": 2.0314, "step": 270280 }, { "epoch": 1.0287904508879975, "grad_norm": 0.1341601312160492, "learning_rate": 0.00029293720759151343, "loss": 2.0373, "step": 270290 }, { "epoch": 1.0288285133561201, "grad_norm": 0.12876875698566437, "learning_rate": 0.0002928366184223991, "loss": 2.0385, "step": 270300 }, { "epoch": 1.0288665758242428, "grad_norm": 0.1265905350446701, "learning_rate": 0.0002927360780711382, "loss": 2.0351, "step": 270310 }, { "epoch": 1.0289046382923654, "grad_norm": 0.13416585326194763, "learning_rate": 0.00029263558646672286, "loss": 2.0451, "step": 270320 }, { "epoch": 1.028942700760488, "grad_norm": 0.12282083928585052, "learning_rate": 0.00029253514353831715, "loss": 2.0508, "step": 270330 }, { "epoch": 1.0289807632286108, "grad_norm": 0.1517842561006546, "learning_rate": 0.00029243474921525684, "loss": 2.0515, "step": 270340 }, { "epoch": 1.0290188256967334, "grad_norm": 0.16193543374538422, "learning_rate": 0.00029233440342704817, "loss": 2.0535, "step": 270350 }, { "epoch": 1.029056888164856, "grad_norm": 0.12937527894973755, "learning_rate": 0.0002922341061033678, "loss": 2.0328, "step": 270360 }, { "epoch": 1.0290949506329787, "grad_norm": 0.12712572515010834, "learning_rate": 0.00029213385717406185, "loss": 2.0375, "step": 270370 }, { "epoch": 1.0291330131011016, "grad_norm": 0.12684941291809082, "learning_rate": 0.0002920336565691458, "loss": 2.0545, "step": 270380 }, { "epoch": 1.0291710755692243, "grad_norm": 0.13399958610534668, "learning_rate": 0.0002919335042188035, "loss": 2.0469, "step": 270390 }, { "epoch": 1.029209138037347, "grad_norm": 0.13286960124969482, "learning_rate": 0.00029183340005338676, "loss": 2.0511, "step": 270400 }, { "epoch": 1.0292472005054696, "grad_norm": 0.1161641776561737, "learning_rate": 0.00029173334400341475, "loss": 2.0366, "step": 270410 }, { "epoch": 1.0292852629735922, "grad_norm": 0.12700442969799042, "learning_rate": 0.0002916333359995734, "loss": 2.0338, "step": 270420 }, { "epoch": 1.029323325441715, "grad_norm": 0.1438145488500595, "learning_rate": 0.0002915333759727151, "loss": 2.0537, "step": 270430 }, { "epoch": 1.0293613879098376, "grad_norm": 0.13152122497558594, "learning_rate": 0.00029143346385385795, "loss": 2.0535, "step": 270440 }, { "epoch": 1.0293994503779602, "grad_norm": 0.11739667505025864, "learning_rate": 0.00029133359957418475, "loss": 2.0367, "step": 270450 }, { "epoch": 1.029437512846083, "grad_norm": 0.12094622105360031, "learning_rate": 0.0002912337830650434, "loss": 2.0609, "step": 270460 }, { "epoch": 1.0294755753142057, "grad_norm": 0.1305398941040039, "learning_rate": 0.00029113401425794576, "loss": 2.0372, "step": 270470 }, { "epoch": 1.0295136377823284, "grad_norm": 0.13244089484214783, "learning_rate": 0.00029103429308456694, "loss": 2.0342, "step": 270480 }, { "epoch": 1.029551700250451, "grad_norm": 0.13122345507144928, "learning_rate": 0.00029093461947674515, "loss": 2.0369, "step": 270490 }, { "epoch": 1.0295897627185737, "grad_norm": 0.12100960314273834, "learning_rate": 0.0002908349933664811, "loss": 2.0455, "step": 270500 }, { "epoch": 1.0296278251866964, "grad_norm": 0.1490957885980606, "learning_rate": 0.0002907354146859372, "loss": 2.0532, "step": 270510 }, { "epoch": 1.029665887654819, "grad_norm": 0.13087870180606842, "learning_rate": 0.0002906358833674373, "loss": 2.0472, "step": 270520 }, { "epoch": 1.0297039501229417, "grad_norm": 0.13519656658172607, "learning_rate": 0.0002905363993434659, "loss": 2.0577, "step": 270530 }, { "epoch": 1.0297420125910643, "grad_norm": 0.13375785946846008, "learning_rate": 0.00029043696254666795, "loss": 2.036, "step": 270540 }, { "epoch": 1.0297800750591872, "grad_norm": 0.12649527192115784, "learning_rate": 0.000290337572909848, "loss": 2.0623, "step": 270550 }, { "epoch": 1.0298181375273099, "grad_norm": 0.14169186353683472, "learning_rate": 0.0002902382303659697, "loss": 2.0439, "step": 270560 }, { "epoch": 1.0298561999954325, "grad_norm": 0.14085915684700012, "learning_rate": 0.00029013893484815565, "loss": 2.0355, "step": 270570 }, { "epoch": 1.0298942624635552, "grad_norm": 0.13160523772239685, "learning_rate": 0.00029003968628968634, "loss": 2.0297, "step": 270580 }, { "epoch": 1.0299323249316779, "grad_norm": 0.1466887891292572, "learning_rate": 0.000289940484624, "loss": 2.0551, "step": 270590 }, { "epoch": 1.0299703873998005, "grad_norm": 0.15212509036064148, "learning_rate": 0.0002898413297846918, "loss": 2.0437, "step": 270600 }, { "epoch": 1.0300084498679232, "grad_norm": 0.1452605426311493, "learning_rate": 0.00028974222170551376, "loss": 2.056, "step": 270610 }, { "epoch": 1.0300465123360458, "grad_norm": 0.1186077743768692, "learning_rate": 0.00028964316032037375, "loss": 2.066, "step": 270620 }, { "epoch": 1.0300845748041687, "grad_norm": 0.1270798295736313, "learning_rate": 0.0002895441455633351, "loss": 2.0402, "step": 270630 }, { "epoch": 1.0301226372722914, "grad_norm": 0.1419951617717743, "learning_rate": 0.0002894451773686166, "loss": 2.0533, "step": 270640 }, { "epoch": 1.030160699740414, "grad_norm": 0.13650859892368317, "learning_rate": 0.00028934625567059104, "loss": 2.0297, "step": 270650 }, { "epoch": 1.0301987622085367, "grad_norm": 0.11910858750343323, "learning_rate": 0.0002892473804037856, "loss": 2.0631, "step": 270660 }, { "epoch": 1.0302368246766593, "grad_norm": 0.12136068940162659, "learning_rate": 0.0002891485515028808, "loss": 2.0589, "step": 270670 }, { "epoch": 1.030274887144782, "grad_norm": 0.12515807151794434, "learning_rate": 0.00028904976890271016, "loss": 2.0337, "step": 270680 }, { "epoch": 1.0303129496129046, "grad_norm": 0.13482242822647095, "learning_rate": 0.0002889510325382598, "loss": 2.0463, "step": 270690 }, { "epoch": 1.0303510120810273, "grad_norm": 0.14589546620845795, "learning_rate": 0.00028885234234466784, "loss": 2.0644, "step": 270700 }, { "epoch": 1.03038907454915, "grad_norm": 0.1272658407688141, "learning_rate": 0.0002887536982572239, "loss": 2.0505, "step": 270710 }, { "epoch": 1.0304271370172728, "grad_norm": 0.15014109015464783, "learning_rate": 0.0002886551002113685, "loss": 2.0567, "step": 270720 }, { "epoch": 1.0304651994853955, "grad_norm": 0.11785303056240082, "learning_rate": 0.00028855654814269294, "loss": 2.0389, "step": 270730 }, { "epoch": 1.0305032619535182, "grad_norm": 0.12073080986738205, "learning_rate": 0.0002884580419869382, "loss": 2.0508, "step": 270740 }, { "epoch": 1.0305413244216408, "grad_norm": 0.14707377552986145, "learning_rate": 0.0002883595816799951, "loss": 2.0511, "step": 270750 }, { "epoch": 1.0305793868897635, "grad_norm": 0.13029642403125763, "learning_rate": 0.0002882611671579034, "loss": 2.033, "step": 270760 }, { "epoch": 1.0306174493578861, "grad_norm": 0.13907068967819214, "learning_rate": 0.00028816279835685145, "loss": 2.0579, "step": 270770 }, { "epoch": 1.0306555118260088, "grad_norm": 0.14463141560554504, "learning_rate": 0.0002880644752131756, "loss": 2.0451, "step": 270780 }, { "epoch": 1.0306935742941314, "grad_norm": 0.1304018646478653, "learning_rate": 0.00028796619766336, "loss": 2.0389, "step": 270790 }, { "epoch": 1.0307316367622543, "grad_norm": 0.14264269173145294, "learning_rate": 0.00028786796564403573, "loss": 2.0434, "step": 270800 }, { "epoch": 1.030769699230377, "grad_norm": 0.13232994079589844, "learning_rate": 0.0002877697790919807, "loss": 2.0544, "step": 270810 }, { "epoch": 1.0308077616984996, "grad_norm": 0.12084189802408218, "learning_rate": 0.0002876716379441189, "loss": 2.0471, "step": 270820 }, { "epoch": 1.0308458241666223, "grad_norm": 0.13656097650527954, "learning_rate": 0.00028757354213751994, "loss": 2.0594, "step": 270830 }, { "epoch": 1.030883886634745, "grad_norm": 0.12707364559173584, "learning_rate": 0.0002874754916093989, "loss": 2.047, "step": 270840 }, { "epoch": 1.0309219491028676, "grad_norm": 0.13456393778324127, "learning_rate": 0.0002873774862971155, "loss": 2.0666, "step": 270850 }, { "epoch": 1.0309600115709903, "grad_norm": 0.11755143851041794, "learning_rate": 0.00028727952613817354, "loss": 2.0423, "step": 270860 }, { "epoch": 1.030998074039113, "grad_norm": 0.12134380638599396, "learning_rate": 0.000287181611070221, "loss": 2.0474, "step": 270870 }, { "epoch": 1.0310361365072356, "grad_norm": 0.14861641824245453, "learning_rate": 0.0002870837410310492, "loss": 2.0432, "step": 270880 }, { "epoch": 1.0310741989753585, "grad_norm": 0.13673312962055206, "learning_rate": 0.0002869859159585921, "loss": 2.0353, "step": 270890 }, { "epoch": 1.0311122614434811, "grad_norm": 0.12540844082832336, "learning_rate": 0.0002868881357909263, "loss": 2.0423, "step": 270900 }, { "epoch": 1.0311503239116038, "grad_norm": 0.13241389393806458, "learning_rate": 0.0002867904004662705, "loss": 2.0427, "step": 270910 }, { "epoch": 1.0311883863797264, "grad_norm": 0.1488330215215683, "learning_rate": 0.00028669270992298456, "loss": 2.0439, "step": 270920 }, { "epoch": 1.031226448847849, "grad_norm": 0.13117197155952454, "learning_rate": 0.00028659506409956995, "loss": 2.0439, "step": 270930 }, { "epoch": 1.0312645113159717, "grad_norm": 0.12436238676309586, "learning_rate": 0.00028649746293466833, "loss": 2.0582, "step": 270940 }, { "epoch": 1.0313025737840944, "grad_norm": 0.134196937084198, "learning_rate": 0.0002863999063670617, "loss": 2.0517, "step": 270950 }, { "epoch": 1.031340636252217, "grad_norm": 0.12425374239683151, "learning_rate": 0.00028630239433567197, "loss": 2.0472, "step": 270960 }, { "epoch": 1.0313786987203397, "grad_norm": 0.12356572598218918, "learning_rate": 0.00028620492677955997, "loss": 2.0396, "step": 270970 }, { "epoch": 1.0314167611884626, "grad_norm": 0.1313382238149643, "learning_rate": 0.0002861075036379256, "loss": 2.0492, "step": 270980 }, { "epoch": 1.0314548236565853, "grad_norm": 0.13148029148578644, "learning_rate": 0.0002860101248501073, "loss": 2.0416, "step": 270990 }, { "epoch": 1.031492886124708, "grad_norm": 0.12867027521133423, "learning_rate": 0.0002859127903555812, "loss": 2.0288, "step": 271000 }, { "epoch": 1.0315309485928306, "grad_norm": 0.12998327612876892, "learning_rate": 0.00028581550009396106, "loss": 2.0544, "step": 271010 }, { "epoch": 1.0315690110609532, "grad_norm": 0.1239062026143074, "learning_rate": 0.0002857182540049977, "loss": 2.033, "step": 271020 }, { "epoch": 1.0316070735290759, "grad_norm": 0.1187189444899559, "learning_rate": 0.0002856210520285788, "loss": 2.0454, "step": 271030 }, { "epoch": 1.0316451359971985, "grad_norm": 0.14411544799804688, "learning_rate": 0.00028552389410472785, "loss": 2.0359, "step": 271040 }, { "epoch": 1.0316831984653212, "grad_norm": 0.12118139117956161, "learning_rate": 0.00028542678017360446, "loss": 2.0341, "step": 271050 }, { "epoch": 1.031721260933444, "grad_norm": 0.13053321838378906, "learning_rate": 0.00028532971017550344, "loss": 2.0394, "step": 271060 }, { "epoch": 1.0317593234015667, "grad_norm": 0.1759636551141739, "learning_rate": 0.00028523268405085465, "loss": 2.0489, "step": 271070 }, { "epoch": 1.0317973858696894, "grad_norm": 0.14842788875102997, "learning_rate": 0.0002851357017402224, "loss": 2.0419, "step": 271080 }, { "epoch": 1.031835448337812, "grad_norm": 0.12284272164106369, "learning_rate": 0.0002850387631843049, "loss": 2.0399, "step": 271090 }, { "epoch": 1.0318735108059347, "grad_norm": 0.1309538185596466, "learning_rate": 0.00028494186832393433, "loss": 2.0462, "step": 271100 }, { "epoch": 1.0319115732740574, "grad_norm": 0.14089371263980865, "learning_rate": 0.00028484501710007585, "loss": 2.0324, "step": 271110 }, { "epoch": 1.03194963574218, "grad_norm": 0.1265069842338562, "learning_rate": 0.0002847482094538275, "loss": 2.0399, "step": 271120 }, { "epoch": 1.0319876982103027, "grad_norm": 0.1347149908542633, "learning_rate": 0.00028465144532641975, "loss": 2.0483, "step": 271130 }, { "epoch": 1.0320257606784253, "grad_norm": 0.13016733527183533, "learning_rate": 0.00028455472465921497, "loss": 2.0477, "step": 271140 }, { "epoch": 1.0320638231465482, "grad_norm": 0.14013062417507172, "learning_rate": 0.0002844580473937073, "loss": 2.0515, "step": 271150 }, { "epoch": 1.0321018856146709, "grad_norm": 0.1346028596162796, "learning_rate": 0.0002843614134715218, "loss": 2.0354, "step": 271160 }, { "epoch": 1.0321399480827935, "grad_norm": 0.1217736080288887, "learning_rate": 0.0002842648228344142, "loss": 2.0338, "step": 271170 }, { "epoch": 1.0321780105509162, "grad_norm": 0.11644167453050613, "learning_rate": 0.000284168275424271, "loss": 2.0353, "step": 271180 }, { "epoch": 1.0322160730190388, "grad_norm": 0.11918290704488754, "learning_rate": 0.00028407177118310815, "loss": 2.0449, "step": 271190 }, { "epoch": 1.0322541354871615, "grad_norm": 0.12893658876419067, "learning_rate": 0.00028397531005307133, "loss": 2.0265, "step": 271200 }, { "epoch": 1.0322921979552842, "grad_norm": 0.1387089639902115, "learning_rate": 0.0002838788919764353, "loss": 2.0351, "step": 271210 }, { "epoch": 1.0323302604234068, "grad_norm": 0.12826283276081085, "learning_rate": 0.00028378251689560347, "loss": 2.0457, "step": 271220 }, { "epoch": 1.0323683228915295, "grad_norm": 0.11565086990594864, "learning_rate": 0.0002836861847531077, "loss": 2.0441, "step": 271230 }, { "epoch": 1.0324063853596523, "grad_norm": 0.1372179388999939, "learning_rate": 0.00028358989549160757, "loss": 2.051, "step": 271240 }, { "epoch": 1.032444447827775, "grad_norm": 0.16050764918327332, "learning_rate": 0.00028349364905389034, "loss": 2.0405, "step": 271250 }, { "epoch": 1.0324825102958977, "grad_norm": 0.12388958036899567, "learning_rate": 0.00028339744538287024, "loss": 2.0409, "step": 271260 }, { "epoch": 1.0325205727640203, "grad_norm": 0.13892515003681183, "learning_rate": 0.0002833012844215884, "loss": 2.0442, "step": 271270 }, { "epoch": 1.032558635232143, "grad_norm": 0.12876741588115692, "learning_rate": 0.000283205166113212, "loss": 2.0392, "step": 271280 }, { "epoch": 1.0325966977002656, "grad_norm": 0.13448983430862427, "learning_rate": 0.0002831090904010344, "loss": 2.0362, "step": 271290 }, { "epoch": 1.0326347601683883, "grad_norm": 0.13363835215568542, "learning_rate": 0.0002830130572284744, "loss": 2.0282, "step": 271300 }, { "epoch": 1.032672822636511, "grad_norm": 0.1334499567747116, "learning_rate": 0.0002829170665390759, "loss": 2.038, "step": 271310 }, { "epoch": 1.0327108851046338, "grad_norm": 0.1251898854970932, "learning_rate": 0.00028282111827650767, "loss": 2.0388, "step": 271320 }, { "epoch": 1.0327489475727565, "grad_norm": 0.14659687876701355, "learning_rate": 0.00028272521238456285, "loss": 2.0515, "step": 271330 }, { "epoch": 1.0327870100408791, "grad_norm": 0.12558038532733917, "learning_rate": 0.0002826293488071584, "loss": 2.0342, "step": 271340 }, { "epoch": 1.0328250725090018, "grad_norm": 0.1283547431230545, "learning_rate": 0.00028253352748833514, "loss": 2.0362, "step": 271350 }, { "epoch": 1.0328631349771245, "grad_norm": 0.12782080471515656, "learning_rate": 0.0002824377483722571, "loss": 2.0587, "step": 271360 }, { "epoch": 1.032901197445247, "grad_norm": 0.12600749731063843, "learning_rate": 0.00028234201140321083, "loss": 2.041, "step": 271370 }, { "epoch": 1.0329392599133698, "grad_norm": 0.13655886054039001, "learning_rate": 0.0002822463165256058, "loss": 2.0494, "step": 271380 }, { "epoch": 1.0329773223814924, "grad_norm": 0.1363477259874344, "learning_rate": 0.00028215066368397323, "loss": 2.0404, "step": 271390 }, { "epoch": 1.033015384849615, "grad_norm": 0.1418614387512207, "learning_rate": 0.00028205505282296635, "loss": 2.0365, "step": 271400 }, { "epoch": 1.033053447317738, "grad_norm": 0.12292768061161041, "learning_rate": 0.00028195948388735946, "loss": 2.037, "step": 271410 }, { "epoch": 1.0330915097858606, "grad_norm": 0.11791270226240158, "learning_rate": 0.000281863956822048, "loss": 2.0417, "step": 271420 }, { "epoch": 1.0331295722539833, "grad_norm": 0.11475887894630432, "learning_rate": 0.00028176847157204807, "loss": 2.0327, "step": 271430 }, { "epoch": 1.033167634722106, "grad_norm": 0.1630665808916092, "learning_rate": 0.0002816730280824958, "loss": 2.0361, "step": 271440 }, { "epoch": 1.0332056971902286, "grad_norm": 0.13486617803573608, "learning_rate": 0.00028157762629864736, "loss": 2.0385, "step": 271450 }, { "epoch": 1.0332437596583512, "grad_norm": 0.1664140820503235, "learning_rate": 0.00028148226616587845, "loss": 2.0249, "step": 271460 }, { "epoch": 1.033281822126474, "grad_norm": 0.13183072209358215, "learning_rate": 0.00028138694762968366, "loss": 2.0455, "step": 271470 }, { "epoch": 1.0333198845945966, "grad_norm": 0.12325718253850937, "learning_rate": 0.0002812916706356767, "loss": 2.0388, "step": 271480 }, { "epoch": 1.0333579470627194, "grad_norm": 0.1310417354106903, "learning_rate": 0.00028119643512958937, "loss": 2.0444, "step": 271490 }, { "epoch": 1.033396009530842, "grad_norm": 0.12681756913661957, "learning_rate": 0.00028110124105727176, "loss": 2.0375, "step": 271500 }, { "epoch": 1.0334340719989648, "grad_norm": 0.12703047692775726, "learning_rate": 0.0002810060883646914, "loss": 2.0323, "step": 271510 }, { "epoch": 1.0334721344670874, "grad_norm": 0.1270838975906372, "learning_rate": 0.00028091097699793356, "loss": 2.039, "step": 271520 }, { "epoch": 1.03351019693521, "grad_norm": 0.13786223530769348, "learning_rate": 0.0002808159069032, "loss": 2.0454, "step": 271530 }, { "epoch": 1.0335482594033327, "grad_norm": 0.12944789230823517, "learning_rate": 0.00028072087802680955, "loss": 2.0484, "step": 271540 }, { "epoch": 1.0335863218714554, "grad_norm": 0.12100300192832947, "learning_rate": 0.0002806258903151969, "loss": 2.0474, "step": 271550 }, { "epoch": 1.033624384339578, "grad_norm": 0.160098597407341, "learning_rate": 0.00028053094371491305, "loss": 2.0456, "step": 271560 }, { "epoch": 1.0336624468077007, "grad_norm": 0.1333358883857727, "learning_rate": 0.0002804360381726242, "loss": 2.0491, "step": 271570 }, { "epoch": 1.0337005092758236, "grad_norm": 0.1314467042684555, "learning_rate": 0.000280341173635112, "loss": 2.0408, "step": 271580 }, { "epoch": 1.0337385717439462, "grad_norm": 0.13625569641590118, "learning_rate": 0.000280246350049273, "loss": 2.0622, "step": 271590 }, { "epoch": 1.033776634212069, "grad_norm": 0.12897242605686188, "learning_rate": 0.000280151567362118, "loss": 2.0384, "step": 271600 }, { "epoch": 1.0338146966801915, "grad_norm": 0.12388896942138672, "learning_rate": 0.0002800568255207723, "loss": 2.02, "step": 271610 }, { "epoch": 1.0338527591483142, "grad_norm": 0.13515430688858032, "learning_rate": 0.00027996212447247484, "loss": 2.0463, "step": 271620 }, { "epoch": 1.0338908216164369, "grad_norm": 0.13997548818588257, "learning_rate": 0.0002798674641645781, "loss": 2.0293, "step": 271630 }, { "epoch": 1.0339288840845595, "grad_norm": 0.1346750557422638, "learning_rate": 0.0002797728445445476, "loss": 2.0568, "step": 271640 }, { "epoch": 1.0339669465526822, "grad_norm": 0.12908905744552612, "learning_rate": 0.0002796782655599619, "loss": 2.0483, "step": 271650 }, { "epoch": 1.034005009020805, "grad_norm": 0.12039399892091751, "learning_rate": 0.00027958372715851187, "loss": 2.0488, "step": 271660 }, { "epoch": 1.0340430714889277, "grad_norm": 0.13701388239860535, "learning_rate": 0.0002794892292880006, "loss": 2.042, "step": 271670 }, { "epoch": 1.0340811339570504, "grad_norm": 0.1296730488538742, "learning_rate": 0.0002793947718963427, "loss": 2.048, "step": 271680 }, { "epoch": 1.034119196425173, "grad_norm": 0.14297781884670258, "learning_rate": 0.0002793003549315647, "loss": 2.0565, "step": 271690 }, { "epoch": 1.0341572588932957, "grad_norm": 0.1596149206161499, "learning_rate": 0.0002792059783418038, "loss": 2.0322, "step": 271700 }, { "epoch": 1.0341953213614183, "grad_norm": 0.13431543111801147, "learning_rate": 0.0002791116420753084, "loss": 2.0342, "step": 271710 }, { "epoch": 1.034233383829541, "grad_norm": 0.11944739520549774, "learning_rate": 0.00027901734608043704, "loss": 2.0376, "step": 271720 }, { "epoch": 1.0342714462976637, "grad_norm": 0.13216999173164368, "learning_rate": 0.00027892309030565855, "loss": 2.0317, "step": 271730 }, { "epoch": 1.0343095087657863, "grad_norm": 0.13707774877548218, "learning_rate": 0.0002788288746995515, "loss": 2.0322, "step": 271740 }, { "epoch": 1.0343475712339092, "grad_norm": 0.13045634329319, "learning_rate": 0.0002787346992108041, "loss": 2.0405, "step": 271750 }, { "epoch": 1.0343856337020318, "grad_norm": 0.13780523836612701, "learning_rate": 0.00027864056378821346, "loss": 2.0402, "step": 271760 }, { "epoch": 1.0344236961701545, "grad_norm": 0.11843425035476685, "learning_rate": 0.00027854646838068565, "loss": 2.0361, "step": 271770 }, { "epoch": 1.0344617586382772, "grad_norm": 0.1744195818901062, "learning_rate": 0.00027845241293723524, "loss": 2.0439, "step": 271780 }, { "epoch": 1.0344998211063998, "grad_norm": 0.13265101611614227, "learning_rate": 0.000278358397406985, "loss": 2.0341, "step": 271790 }, { "epoch": 1.0345378835745225, "grad_norm": 0.1773340404033661, "learning_rate": 0.00027826442173916547, "loss": 2.0429, "step": 271800 }, { "epoch": 1.0345759460426451, "grad_norm": 0.12701316177845, "learning_rate": 0.0002781704858831149, "loss": 2.0415, "step": 271810 }, { "epoch": 1.0346140085107678, "grad_norm": 0.14962537586688995, "learning_rate": 0.00027807658978827853, "loss": 2.0328, "step": 271820 }, { "epoch": 1.0346520709788904, "grad_norm": 0.12321054190397263, "learning_rate": 0.0002779827334042087, "loss": 2.0429, "step": 271830 }, { "epoch": 1.0346901334470133, "grad_norm": 0.4225773215293884, "learning_rate": 0.00027788891668056425, "loss": 2.045, "step": 271840 }, { "epoch": 1.034728195915136, "grad_norm": 0.14254769682884216, "learning_rate": 0.00027779513956711026, "loss": 2.0565, "step": 271850 }, { "epoch": 1.0347662583832586, "grad_norm": 0.13923725485801697, "learning_rate": 0.00027770140201371793, "loss": 2.0555, "step": 271860 }, { "epoch": 1.0348043208513813, "grad_norm": 0.13378120958805084, "learning_rate": 0.00027760770397036385, "loss": 2.0425, "step": 271870 }, { "epoch": 1.034842383319504, "grad_norm": 0.14077191054821014, "learning_rate": 0.0002775140453871301, "loss": 2.0569, "step": 271880 }, { "epoch": 1.0348804457876266, "grad_norm": 0.13247641921043396, "learning_rate": 0.00027742042621420386, "loss": 2.0413, "step": 271890 }, { "epoch": 1.0349185082557493, "grad_norm": 0.1335470974445343, "learning_rate": 0.0002773268464018769, "loss": 2.0605, "step": 271900 }, { "epoch": 1.034956570723872, "grad_norm": 0.12615036964416504, "learning_rate": 0.00027723330590054534, "loss": 2.041, "step": 271910 }, { "epoch": 1.0349946331919948, "grad_norm": 0.12654559314250946, "learning_rate": 0.0002771398046607096, "loss": 2.0372, "step": 271920 }, { "epoch": 1.0350326956601175, "grad_norm": 0.14396308362483978, "learning_rate": 0.0002770463426329738, "loss": 2.0384, "step": 271930 }, { "epoch": 1.0350707581282401, "grad_norm": 0.15201355516910553, "learning_rate": 0.0002769529197680454, "loss": 2.034, "step": 271940 }, { "epoch": 1.0351088205963628, "grad_norm": 0.1299097239971161, "learning_rate": 0.0002768595360167353, "loss": 2.0445, "step": 271950 }, { "epoch": 1.0351468830644854, "grad_norm": 0.12313937395811081, "learning_rate": 0.00027676619132995704, "loss": 2.0524, "step": 271960 }, { "epoch": 1.035184945532608, "grad_norm": 0.14119631052017212, "learning_rate": 0.00027667288565872707, "loss": 2.0346, "step": 271970 }, { "epoch": 1.0352230080007307, "grad_norm": 0.13589946925640106, "learning_rate": 0.00027657961895416374, "loss": 2.0463, "step": 271980 }, { "epoch": 1.0352610704688534, "grad_norm": 0.1256546825170517, "learning_rate": 0.00027648639116748766, "loss": 2.0517, "step": 271990 }, { "epoch": 1.035299132936976, "grad_norm": 0.13876435160636902, "learning_rate": 0.0002763932022500211, "loss": 2.0387, "step": 272000 }, { "epoch": 1.035337195405099, "grad_norm": 0.13768640160560608, "learning_rate": 0.0002763000521531874, "loss": 2.0265, "step": 272010 }, { "epoch": 1.0353752578732216, "grad_norm": 0.13261787593364716, "learning_rate": 0.00027620694082851154, "loss": 2.0189, "step": 272020 }, { "epoch": 1.0354133203413443, "grad_norm": 0.11902239173650742, "learning_rate": 0.00027611386822761885, "loss": 2.036, "step": 272030 }, { "epoch": 1.035451382809467, "grad_norm": 0.1425875723361969, "learning_rate": 0.00027602083430223546, "loss": 2.0414, "step": 272040 }, { "epoch": 1.0354894452775896, "grad_norm": 0.14108528196811676, "learning_rate": 0.0002759278390041875, "loss": 2.0214, "step": 272050 }, { "epoch": 1.0355275077457122, "grad_norm": 0.17860999703407288, "learning_rate": 0.00027583488228540107, "loss": 2.0408, "step": 272060 }, { "epoch": 1.0355655702138349, "grad_norm": 0.14166538417339325, "learning_rate": 0.000275741964097902, "loss": 2.0608, "step": 272070 }, { "epoch": 1.0356036326819575, "grad_norm": 0.13897563517093658, "learning_rate": 0.0002756490843938155, "loss": 2.0373, "step": 272080 }, { "epoch": 1.0356416951500802, "grad_norm": 0.12821650505065918, "learning_rate": 0.0002755562431253656, "loss": 2.0291, "step": 272090 }, { "epoch": 1.035679757618203, "grad_norm": 0.1173110380768776, "learning_rate": 0.0002754634402448754, "loss": 2.0345, "step": 272100 }, { "epoch": 1.0357178200863257, "grad_norm": 0.1263628900051117, "learning_rate": 0.00027537067570476616, "loss": 2.0261, "step": 272110 }, { "epoch": 1.0357558825544484, "grad_norm": 0.13653993606567383, "learning_rate": 0.0002752779494575577, "loss": 2.0366, "step": 272120 }, { "epoch": 1.035793945022571, "grad_norm": 0.12434863299131393, "learning_rate": 0.0002751852614558675, "loss": 2.0579, "step": 272130 }, { "epoch": 1.0358320074906937, "grad_norm": 0.12659211456775665, "learning_rate": 0.00027509261165241074, "loss": 2.0289, "step": 272140 }, { "epoch": 1.0358700699588164, "grad_norm": 0.12357845157384872, "learning_rate": 0.000275, "loss": 2.0485, "step": 272150 }, { "epoch": 1.035908132426939, "grad_norm": 0.12654438614845276, "learning_rate": 0.0002749074264515449, "loss": 2.045, "step": 272160 }, { "epoch": 1.0359461948950617, "grad_norm": 0.11802220344543457, "learning_rate": 0.00027481489096005184, "loss": 2.0446, "step": 272170 }, { "epoch": 1.0359842573631846, "grad_norm": 0.14644622802734375, "learning_rate": 0.00027472239347862385, "loss": 2.0423, "step": 272180 }, { "epoch": 1.0360223198313072, "grad_norm": 0.13627606630325317, "learning_rate": 0.0002746299339604599, "loss": 2.0408, "step": 272190 }, { "epoch": 1.0360603822994299, "grad_norm": 0.14931724965572357, "learning_rate": 0.00027453751235885526, "loss": 2.0406, "step": 272200 }, { "epoch": 1.0360984447675525, "grad_norm": 0.17233909666538239, "learning_rate": 0.00027444512862720086, "loss": 2.0531, "step": 272210 }, { "epoch": 1.0361365072356752, "grad_norm": 0.13387896120548248, "learning_rate": 0.0002743527827189826, "loss": 2.0429, "step": 272220 }, { "epoch": 1.0361745697037978, "grad_norm": 0.14065547287464142, "learning_rate": 0.0002742604745877822, "loss": 2.0369, "step": 272230 }, { "epoch": 1.0362126321719205, "grad_norm": 0.1187807098031044, "learning_rate": 0.00027416820418727574, "loss": 2.0478, "step": 272240 }, { "epoch": 1.0362506946400432, "grad_norm": 0.1296232044696808, "learning_rate": 0.00027407597147123406, "loss": 2.0303, "step": 272250 }, { "epoch": 1.0362887571081658, "grad_norm": 0.12152990698814392, "learning_rate": 0.0002739837763935223, "loss": 2.0536, "step": 272260 }, { "epoch": 1.0363268195762887, "grad_norm": 0.14488112926483154, "learning_rate": 0.00027389161890809975, "loss": 2.0202, "step": 272270 }, { "epoch": 1.0363648820444114, "grad_norm": 0.13850657641887665, "learning_rate": 0.00027379949896901934, "loss": 2.0382, "step": 272280 }, { "epoch": 1.036402944512534, "grad_norm": 0.1515287607908249, "learning_rate": 0.00027370741653042776, "loss": 2.0255, "step": 272290 }, { "epoch": 1.0364410069806567, "grad_norm": 0.13836178183555603, "learning_rate": 0.00027361537154656457, "loss": 2.0439, "step": 272300 }, { "epoch": 1.0364790694487793, "grad_norm": 0.13903303444385529, "learning_rate": 0.0002735233639717627, "loss": 2.0438, "step": 272310 }, { "epoch": 1.036517131916902, "grad_norm": 0.1336706131696701, "learning_rate": 0.0002734313937604476, "loss": 2.04, "step": 272320 }, { "epoch": 1.0365551943850246, "grad_norm": 0.12238036096096039, "learning_rate": 0.00027333946086713725, "loss": 2.0453, "step": 272330 }, { "epoch": 1.0365932568531473, "grad_norm": 0.12669286131858826, "learning_rate": 0.0002732475652464418, "loss": 2.0288, "step": 272340 }, { "epoch": 1.0366313193212702, "grad_norm": 0.15388906002044678, "learning_rate": 0.00027315570685306336, "loss": 2.0514, "step": 272350 }, { "epoch": 1.0366693817893928, "grad_norm": 0.12682406604290009, "learning_rate": 0.0002730638856417956, "loss": 2.0594, "step": 272360 }, { "epoch": 1.0367074442575155, "grad_norm": 0.12043902277946472, "learning_rate": 0.00027297210156752394, "loss": 2.026, "step": 272370 }, { "epoch": 1.0367455067256381, "grad_norm": 0.12616755068302155, "learning_rate": 0.00027288035458522455, "loss": 2.0319, "step": 272380 }, { "epoch": 1.0367835691937608, "grad_norm": 0.13175073266029358, "learning_rate": 0.0002727886446499648, "loss": 2.0323, "step": 272390 }, { "epoch": 1.0368216316618835, "grad_norm": 0.1313619464635849, "learning_rate": 0.0002726969717169024, "loss": 2.0324, "step": 272400 }, { "epoch": 1.0368596941300061, "grad_norm": 0.15082502365112305, "learning_rate": 0.0002726053357412859, "loss": 2.0228, "step": 272410 }, { "epoch": 1.0368977565981288, "grad_norm": 0.12719838321208954, "learning_rate": 0.0002725137366784535, "loss": 2.0329, "step": 272420 }, { "epoch": 1.0369358190662514, "grad_norm": 0.11831434071063995, "learning_rate": 0.00027242217448383366, "loss": 2.0315, "step": 272430 }, { "epoch": 1.0369738815343743, "grad_norm": 0.15007321536540985, "learning_rate": 0.0002723306491129442, "loss": 2.0404, "step": 272440 }, { "epoch": 1.037011944002497, "grad_norm": 0.1263130009174347, "learning_rate": 0.00027223916052139253, "loss": 2.0303, "step": 272450 }, { "epoch": 1.0370500064706196, "grad_norm": 0.14173053205013275, "learning_rate": 0.00027214770866487503, "loss": 2.0309, "step": 272460 }, { "epoch": 1.0370880689387423, "grad_norm": 0.13739918172359467, "learning_rate": 0.00027205629349917707, "loss": 2.0275, "step": 272470 }, { "epoch": 1.037126131406865, "grad_norm": 0.1491842418909073, "learning_rate": 0.0002719649149801724, "loss": 2.0511, "step": 272480 }, { "epoch": 1.0371641938749876, "grad_norm": 0.13553743064403534, "learning_rate": 0.00027187357306382356, "loss": 2.0328, "step": 272490 }, { "epoch": 1.0372022563431103, "grad_norm": 0.13617704808712006, "learning_rate": 0.00027178226770618077, "loss": 2.0338, "step": 272500 }, { "epoch": 1.037240318811233, "grad_norm": 0.13658751547336578, "learning_rate": 0.00027169099886338257, "loss": 2.0559, "step": 272510 }, { "epoch": 1.0372783812793558, "grad_norm": 0.122580386698246, "learning_rate": 0.0002715997664916547, "loss": 2.0365, "step": 272520 }, { "epoch": 1.0373164437474784, "grad_norm": 0.13053767383098602, "learning_rate": 0.00027150857054731057, "loss": 2.0461, "step": 272530 }, { "epoch": 1.037354506215601, "grad_norm": 0.15087807178497314, "learning_rate": 0.00027141741098675074, "loss": 2.0341, "step": 272540 }, { "epoch": 1.0373925686837238, "grad_norm": 0.14107906818389893, "learning_rate": 0.00027132628776646263, "loss": 2.0347, "step": 272550 }, { "epoch": 1.0374306311518464, "grad_norm": 0.1257706731557846, "learning_rate": 0.0002712352008430201, "loss": 2.0359, "step": 272560 }, { "epoch": 1.037468693619969, "grad_norm": 0.14777903258800507, "learning_rate": 0.00027114415017308384, "loss": 2.0242, "step": 272570 }, { "epoch": 1.0375067560880917, "grad_norm": 0.1389368772506714, "learning_rate": 0.0002710531357134004, "loss": 2.0403, "step": 272580 }, { "epoch": 1.0375448185562144, "grad_norm": 0.13209329545497894, "learning_rate": 0.00027096215742080233, "loss": 2.0348, "step": 272590 }, { "epoch": 1.037582881024337, "grad_norm": 0.1406853199005127, "learning_rate": 0.00027087121525220804, "loss": 2.0433, "step": 272600 }, { "epoch": 1.03762094349246, "grad_norm": 0.1262650340795517, "learning_rate": 0.00027078030916462113, "loss": 2.0485, "step": 272610 }, { "epoch": 1.0376590059605826, "grad_norm": 0.14530049264431, "learning_rate": 0.00027068943911513077, "loss": 2.0375, "step": 272620 }, { "epoch": 1.0376970684287052, "grad_norm": 0.14594286680221558, "learning_rate": 0.00027059860506091076, "loss": 2.0299, "step": 272630 }, { "epoch": 1.037735130896828, "grad_norm": 0.14979077875614166, "learning_rate": 0.00027050780695921995, "loss": 2.0371, "step": 272640 }, { "epoch": 1.0377731933649506, "grad_norm": 0.1400003582239151, "learning_rate": 0.0002704170447674015, "loss": 2.0479, "step": 272650 }, { "epoch": 1.0378112558330732, "grad_norm": 0.13226519525051117, "learning_rate": 0.00027032631844288294, "loss": 2.0343, "step": 272660 }, { "epoch": 1.0378493183011959, "grad_norm": 0.1601780354976654, "learning_rate": 0.000270235627943176, "loss": 2.0328, "step": 272670 }, { "epoch": 1.0378873807693185, "grad_norm": 0.12874449789524078, "learning_rate": 0.00027014497322587586, "loss": 2.0652, "step": 272680 }, { "epoch": 1.0379254432374414, "grad_norm": 0.15249601006507874, "learning_rate": 0.00027005435424866164, "loss": 2.0322, "step": 272690 }, { "epoch": 1.037963505705564, "grad_norm": 0.13022339344024658, "learning_rate": 0.00026996377096929564, "loss": 2.0343, "step": 272700 }, { "epoch": 1.0380015681736867, "grad_norm": 0.12486381083726883, "learning_rate": 0.00026987322334562336, "loss": 2.0522, "step": 272710 }, { "epoch": 1.0380396306418094, "grad_norm": 0.12613098323345184, "learning_rate": 0.0002697827113355733, "loss": 2.0299, "step": 272720 }, { "epoch": 1.038077693109932, "grad_norm": 0.14147455990314484, "learning_rate": 0.00026969223489715623, "loss": 2.0421, "step": 272730 }, { "epoch": 1.0381157555780547, "grad_norm": 0.14256823062896729, "learning_rate": 0.00026960179398846586, "loss": 2.0428, "step": 272740 }, { "epoch": 1.0381538180461773, "grad_norm": 0.15050435066223145, "learning_rate": 0.0002695113885676778, "loss": 2.036, "step": 272750 }, { "epoch": 1.0381918805143, "grad_norm": 0.1313105970621109, "learning_rate": 0.00026942101859304984, "loss": 2.0243, "step": 272760 }, { "epoch": 1.0382299429824227, "grad_norm": 0.1230410784482956, "learning_rate": 0.0002693306840229214, "loss": 2.039, "step": 272770 }, { "epoch": 1.0382680054505455, "grad_norm": 0.13287854194641113, "learning_rate": 0.00026924038481571344, "loss": 2.0566, "step": 272780 }, { "epoch": 1.0383060679186682, "grad_norm": 0.13118687272071838, "learning_rate": 0.0002691501209299284, "loss": 2.0441, "step": 272790 }, { "epoch": 1.0383441303867909, "grad_norm": 0.14854682981967926, "learning_rate": 0.00026905989232414967, "loss": 2.0348, "step": 272800 }, { "epoch": 1.0383821928549135, "grad_norm": 0.12359170615673065, "learning_rate": 0.0002689696989570416, "loss": 2.0315, "step": 272810 }, { "epoch": 1.0384202553230362, "grad_norm": 0.13356876373291016, "learning_rate": 0.000268879540787349, "loss": 2.0433, "step": 272820 }, { "epoch": 1.0384583177911588, "grad_norm": 0.1332310438156128, "learning_rate": 0.0002687894177738974, "loss": 2.0274, "step": 272830 }, { "epoch": 1.0384963802592815, "grad_norm": 0.1265527904033661, "learning_rate": 0.0002686993298755924, "loss": 2.0395, "step": 272840 }, { "epoch": 1.0385344427274041, "grad_norm": 0.14267970621585846, "learning_rate": 0.0002686092770514197, "loss": 2.024, "step": 272850 }, { "epoch": 1.0385725051955268, "grad_norm": 0.14452096819877625, "learning_rate": 0.00026851925926044443, "loss": 2.0316, "step": 272860 }, { "epoch": 1.0386105676636497, "grad_norm": 0.15038394927978516, "learning_rate": 0.0002684292764618118, "loss": 2.0601, "step": 272870 }, { "epoch": 1.0386486301317723, "grad_norm": 0.16037602722644806, "learning_rate": 0.00026833932861474596, "loss": 2.0338, "step": 272880 }, { "epoch": 1.038686692599895, "grad_norm": 0.13514462113380432, "learning_rate": 0.00026824941567855036, "loss": 2.0381, "step": 272890 }, { "epoch": 1.0387247550680176, "grad_norm": 0.12547416985034943, "learning_rate": 0.0002681595376126074, "loss": 2.0307, "step": 272900 }, { "epoch": 1.0387628175361403, "grad_norm": 0.14363206923007965, "learning_rate": 0.0002680696943763781, "loss": 2.0256, "step": 272910 }, { "epoch": 1.038800880004263, "grad_norm": 0.1408882886171341, "learning_rate": 0.00026797988592940195, "loss": 2.0426, "step": 272920 }, { "epoch": 1.0388389424723856, "grad_norm": 0.13903652131557465, "learning_rate": 0.0002678901122312968, "loss": 2.0399, "step": 272930 }, { "epoch": 1.0388770049405083, "grad_norm": 0.17271898686885834, "learning_rate": 0.00026780037324175847, "loss": 2.0265, "step": 272940 }, { "epoch": 1.038915067408631, "grad_norm": 0.1400110274553299, "learning_rate": 0.0002677106689205608, "loss": 2.0324, "step": 272950 }, { "epoch": 1.0389531298767538, "grad_norm": 0.13623112440109253, "learning_rate": 0.000267620999227555, "loss": 2.0388, "step": 272960 }, { "epoch": 1.0389911923448765, "grad_norm": 0.1342500001192093, "learning_rate": 0.00026753136412267, "loss": 2.0245, "step": 272970 }, { "epoch": 1.0390292548129991, "grad_norm": 0.13029596209526062, "learning_rate": 0.00026744176356591166, "loss": 2.0379, "step": 272980 }, { "epoch": 1.0390673172811218, "grad_norm": 0.13370917737483978, "learning_rate": 0.0002673521975173632, "loss": 2.0379, "step": 272990 }, { "epoch": 1.0391053797492444, "grad_norm": 0.12633267045021057, "learning_rate": 0.0002672626659371843, "loss": 2.0312, "step": 273000 }, { "epoch": 1.039143442217367, "grad_norm": 0.140619158744812, "learning_rate": 0.0002671731687856115, "loss": 2.0352, "step": 273010 }, { "epoch": 1.0391815046854898, "grad_norm": 0.13824288547039032, "learning_rate": 0.00026708370602295775, "loss": 2.0447, "step": 273020 }, { "epoch": 1.0392195671536124, "grad_norm": 0.13469131290912628, "learning_rate": 0.00026699427760961184, "loss": 2.0596, "step": 273030 }, { "epoch": 1.0392576296217353, "grad_norm": 0.12450501322746277, "learning_rate": 0.00026690488350603883, "loss": 2.0334, "step": 273040 }, { "epoch": 1.039295692089858, "grad_norm": 0.13858427107334137, "learning_rate": 0.0002668155236727796, "loss": 2.0428, "step": 273050 }, { "epoch": 1.0393337545579806, "grad_norm": 0.15618272125720978, "learning_rate": 0.00026672619807045056, "loss": 2.0179, "step": 273060 }, { "epoch": 1.0393718170261033, "grad_norm": 0.14906159043312073, "learning_rate": 0.0002666369066597434, "loss": 2.0539, "step": 273070 }, { "epoch": 1.039409879494226, "grad_norm": 0.147879496216774, "learning_rate": 0.00026654764940142495, "loss": 2.0518, "step": 273080 }, { "epoch": 1.0394479419623486, "grad_norm": 0.13153234124183655, "learning_rate": 0.00026645842625633726, "loss": 2.0236, "step": 273090 }, { "epoch": 1.0394860044304712, "grad_norm": 0.13339228928089142, "learning_rate": 0.0002663692371853969, "loss": 2.0288, "step": 273100 }, { "epoch": 1.039524066898594, "grad_norm": 0.14011885225772858, "learning_rate": 0.00026628008214959516, "loss": 2.0262, "step": 273110 }, { "epoch": 1.0395621293667165, "grad_norm": 0.12014271318912506, "learning_rate": 0.0002661909611099976, "loss": 2.0378, "step": 273120 }, { "epoch": 1.0396001918348394, "grad_norm": 0.12502440810203552, "learning_rate": 0.000266101874027744, "loss": 2.038, "step": 273130 }, { "epoch": 1.039638254302962, "grad_norm": 0.13825981318950653, "learning_rate": 0.0002660128208640482, "loss": 2.0349, "step": 273140 }, { "epoch": 1.0396763167710847, "grad_norm": 0.13164453208446503, "learning_rate": 0.00026592380158019767, "loss": 2.0223, "step": 273150 }, { "epoch": 1.0397143792392074, "grad_norm": 0.133603036403656, "learning_rate": 0.00026583481613755354, "loss": 2.0272, "step": 273160 }, { "epoch": 1.03975244170733, "grad_norm": 0.16143639385700226, "learning_rate": 0.00026574586449755043, "loss": 2.0446, "step": 273170 }, { "epoch": 1.0397905041754527, "grad_norm": 0.1411830633878708, "learning_rate": 0.0002656569466216959, "loss": 2.0311, "step": 273180 }, { "epoch": 1.0398285666435754, "grad_norm": 0.1281070113182068, "learning_rate": 0.0002655680624715708, "loss": 2.0435, "step": 273190 }, { "epoch": 1.039866629111698, "grad_norm": 0.13342636823654175, "learning_rate": 0.0002654792120088285, "loss": 2.0375, "step": 273200 }, { "epoch": 1.039904691579821, "grad_norm": 0.1617501974105835, "learning_rate": 0.00026539039519519526, "loss": 2.0263, "step": 273210 }, { "epoch": 1.0399427540479436, "grad_norm": 0.1403946727514267, "learning_rate": 0.0002653016119924695, "loss": 2.0484, "step": 273220 }, { "epoch": 1.0399808165160662, "grad_norm": 0.1406700313091278, "learning_rate": 0.00026521286236252207, "loss": 2.0434, "step": 273230 }, { "epoch": 1.0400188789841889, "grad_norm": 0.14084210991859436, "learning_rate": 0.00026512414626729574, "loss": 2.0388, "step": 273240 }, { "epoch": 1.0400569414523115, "grad_norm": 0.1333620548248291, "learning_rate": 0.0002650354636688053, "loss": 2.0467, "step": 273250 }, { "epoch": 1.0400950039204342, "grad_norm": 0.13422276079654694, "learning_rate": 0.0002649468145291368, "loss": 2.0377, "step": 273260 }, { "epoch": 1.0401330663885568, "grad_norm": 0.23148223757743835, "learning_rate": 0.0002648581988104483, "loss": 2.0431, "step": 273270 }, { "epoch": 1.0401711288566795, "grad_norm": 0.1293201595544815, "learning_rate": 0.00026476961647496866, "loss": 2.0378, "step": 273280 }, { "epoch": 1.0402091913248022, "grad_norm": 0.13805337250232697, "learning_rate": 0.0002646810674849982, "loss": 2.0464, "step": 273290 }, { "epoch": 1.040247253792925, "grad_norm": 0.12398272007703781, "learning_rate": 0.00026459255180290775, "loss": 2.0379, "step": 273300 }, { "epoch": 1.0402853162610477, "grad_norm": 0.14543074369430542, "learning_rate": 0.0002645040693911393, "loss": 2.0313, "step": 273310 }, { "epoch": 1.0403233787291704, "grad_norm": 0.13031961023807526, "learning_rate": 0.0002644156202122051, "loss": 2.0453, "step": 273320 }, { "epoch": 1.040361441197293, "grad_norm": 0.13371948897838593, "learning_rate": 0.00026432720422868775, "loss": 2.0357, "step": 273330 }, { "epoch": 1.0403995036654157, "grad_norm": 0.13025528192520142, "learning_rate": 0.00026423882140324005, "loss": 2.0281, "step": 273340 }, { "epoch": 1.0404375661335383, "grad_norm": 0.14672107994556427, "learning_rate": 0.0002641504716985849, "loss": 2.0361, "step": 273350 }, { "epoch": 1.040475628601661, "grad_norm": 0.13892264664173126, "learning_rate": 0.00026406215507751487, "loss": 2.0315, "step": 273360 }, { "epoch": 1.0405136910697836, "grad_norm": 0.12774324417114258, "learning_rate": 0.00026397387150289196, "loss": 2.0449, "step": 273370 }, { "epoch": 1.0405517535379065, "grad_norm": 0.13852806389331818, "learning_rate": 0.00026388562093764814, "loss": 2.033, "step": 273380 }, { "epoch": 1.0405898160060292, "grad_norm": 0.13382737338542938, "learning_rate": 0.00026379740334478397, "loss": 2.0342, "step": 273390 }, { "epoch": 1.0406278784741518, "grad_norm": 0.1346932351589203, "learning_rate": 0.0002637092186873696, "loss": 2.0376, "step": 273400 }, { "epoch": 1.0406659409422745, "grad_norm": 0.1198713481426239, "learning_rate": 0.0002636210669285437, "loss": 2.0221, "step": 273410 }, { "epoch": 1.0407040034103971, "grad_norm": 0.13439087569713593, "learning_rate": 0.00026353294803151394, "loss": 2.0116, "step": 273420 }, { "epoch": 1.0407420658785198, "grad_norm": 0.1452450007200241, "learning_rate": 0.0002634448619595564, "loss": 2.0341, "step": 273430 }, { "epoch": 1.0407801283466425, "grad_norm": 0.1251915544271469, "learning_rate": 0.0002633568086760154, "loss": 2.0497, "step": 273440 }, { "epoch": 1.0408181908147651, "grad_norm": 0.14364951848983765, "learning_rate": 0.0002632687881443035, "loss": 2.0435, "step": 273450 }, { "epoch": 1.0408562532828878, "grad_norm": 0.13844889402389526, "learning_rate": 0.00026318080032790135, "loss": 2.0302, "step": 273460 }, { "epoch": 1.0408943157510107, "grad_norm": 0.14765089750289917, "learning_rate": 0.00026309284519035735, "loss": 2.0338, "step": 273470 }, { "epoch": 1.0409323782191333, "grad_norm": 0.13139985501766205, "learning_rate": 0.0002630049226952875, "loss": 2.0472, "step": 273480 }, { "epoch": 1.040970440687256, "grad_norm": 0.14244161546230316, "learning_rate": 0.0002629170328063753, "loss": 2.0378, "step": 273490 }, { "epoch": 1.0410085031553786, "grad_norm": 0.13524176180362701, "learning_rate": 0.00026282917548737156, "loss": 2.0261, "step": 273500 }, { "epoch": 1.0410465656235013, "grad_norm": 0.12593677639961243, "learning_rate": 0.00026274135070209414, "loss": 2.0361, "step": 273510 }, { "epoch": 1.041084628091624, "grad_norm": 0.12570716440677643, "learning_rate": 0.00026265355841442804, "loss": 2.0419, "step": 273520 }, { "epoch": 1.0411226905597466, "grad_norm": 0.1178411990404129, "learning_rate": 0.0002625657985883247, "loss": 2.0258, "step": 273530 }, { "epoch": 1.0411607530278693, "grad_norm": 0.12717683613300323, "learning_rate": 0.0002624780711878024, "loss": 2.0434, "step": 273540 }, { "epoch": 1.0411988154959921, "grad_norm": 0.14389148354530334, "learning_rate": 0.0002623903761769458, "loss": 2.0383, "step": 273550 }, { "epoch": 1.0412368779641148, "grad_norm": 0.11844097077846527, "learning_rate": 0.00026230271351990577, "loss": 2.03, "step": 273560 }, { "epoch": 1.0412749404322375, "grad_norm": 0.13370178639888763, "learning_rate": 0.0002622150831808993, "loss": 2.0407, "step": 273570 }, { "epoch": 1.04131300290036, "grad_norm": 0.13318881392478943, "learning_rate": 0.00026212748512420916, "loss": 2.0337, "step": 273580 }, { "epoch": 1.0413510653684828, "grad_norm": 0.152360200881958, "learning_rate": 0.0002620399193141841, "loss": 2.0212, "step": 273590 }, { "epoch": 1.0413891278366054, "grad_norm": 0.14211878180503845, "learning_rate": 0.00026195238571523837, "loss": 2.0308, "step": 273600 }, { "epoch": 1.041427190304728, "grad_norm": 0.14196383953094482, "learning_rate": 0.0002618648842918514, "loss": 2.0319, "step": 273610 }, { "epoch": 1.0414652527728507, "grad_norm": 0.12660767138004303, "learning_rate": 0.00026177741500856813, "loss": 2.0417, "step": 273620 }, { "epoch": 1.0415033152409734, "grad_norm": 0.13070152699947357, "learning_rate": 0.00026168997782999835, "loss": 2.0413, "step": 273630 }, { "epoch": 1.0415413777090963, "grad_norm": 0.13289307057857513, "learning_rate": 0.000261602572720817, "loss": 2.0326, "step": 273640 }, { "epoch": 1.041579440177219, "grad_norm": 0.1299850195646286, "learning_rate": 0.0002615151996457636, "loss": 2.037, "step": 273650 }, { "epoch": 1.0416175026453416, "grad_norm": 0.1389370858669281, "learning_rate": 0.00026142785856964214, "loss": 2.032, "step": 273660 }, { "epoch": 1.0416555651134642, "grad_norm": 0.1402798444032669, "learning_rate": 0.0002613405494573212, "loss": 2.0354, "step": 273670 }, { "epoch": 1.041693627581587, "grad_norm": 0.12128802388906479, "learning_rate": 0.00026125327227373353, "loss": 2.0256, "step": 273680 }, { "epoch": 1.0417316900497096, "grad_norm": 0.13357381522655487, "learning_rate": 0.000261166026983876, "loss": 2.0463, "step": 273690 }, { "epoch": 1.0417697525178322, "grad_norm": 0.1297796219587326, "learning_rate": 0.00026107881355280914, "loss": 2.0438, "step": 273700 }, { "epoch": 1.0418078149859549, "grad_norm": 0.1287699043750763, "learning_rate": 0.0002609916319456576, "loss": 2.033, "step": 273710 }, { "epoch": 1.0418458774540775, "grad_norm": 0.14555631577968597, "learning_rate": 0.00026090448212760944, "loss": 2.0287, "step": 273720 }, { "epoch": 1.0418839399222004, "grad_norm": 0.12862415611743927, "learning_rate": 0.00026081736406391594, "loss": 2.0335, "step": 273730 }, { "epoch": 1.041922002390323, "grad_norm": 0.12859775125980377, "learning_rate": 0.000260730277719892, "loss": 2.0239, "step": 273740 }, { "epoch": 1.0419600648584457, "grad_norm": 0.12264101952314377, "learning_rate": 0.00026064322306091546, "loss": 2.0345, "step": 273750 }, { "epoch": 1.0419981273265684, "grad_norm": 0.13000193238258362, "learning_rate": 0.0002605562000524271, "loss": 2.025, "step": 273760 }, { "epoch": 1.042036189794691, "grad_norm": 0.1287056803703308, "learning_rate": 0.0002604692086599303, "loss": 2.0306, "step": 273770 }, { "epoch": 1.0420742522628137, "grad_norm": 0.14237751066684723, "learning_rate": 0.0002603822488489914, "loss": 2.0388, "step": 273780 }, { "epoch": 1.0421123147309364, "grad_norm": 0.1337154060602188, "learning_rate": 0.00026029532058523903, "loss": 2.0257, "step": 273790 }, { "epoch": 1.042150377199059, "grad_norm": 0.11729374527931213, "learning_rate": 0.00026020842383436403, "loss": 2.0373, "step": 273800 }, { "epoch": 1.0421884396671817, "grad_norm": 0.1402532309293747, "learning_rate": 0.0002601215585621195, "loss": 2.0214, "step": 273810 }, { "epoch": 1.0422265021353045, "grad_norm": 0.1256987452507019, "learning_rate": 0.0002600347247343205, "loss": 2.0273, "step": 273820 }, { "epoch": 1.0422645646034272, "grad_norm": 0.13618886470794678, "learning_rate": 0.00025994792231684395, "loss": 2.0352, "step": 273830 }, { "epoch": 1.0423026270715499, "grad_norm": 0.1377623826265335, "learning_rate": 0.0002598611512756283, "loss": 2.0506, "step": 273840 }, { "epoch": 1.0423406895396725, "grad_norm": 0.122389055788517, "learning_rate": 0.0002597744115766737, "loss": 2.0467, "step": 273850 }, { "epoch": 1.0423787520077952, "grad_norm": 0.12941765785217285, "learning_rate": 0.00025968770318604166, "loss": 2.0364, "step": 273860 }, { "epoch": 1.0424168144759178, "grad_norm": 0.13198088109493256, "learning_rate": 0.0002596010260698547, "loss": 2.0416, "step": 273870 }, { "epoch": 1.0424548769440405, "grad_norm": 0.1301337331533432, "learning_rate": 0.0002595143801942966, "loss": 2.0274, "step": 273880 }, { "epoch": 1.0424929394121631, "grad_norm": 0.12306160479784012, "learning_rate": 0.0002594277655256119, "loss": 2.033, "step": 273890 }, { "epoch": 1.042531001880286, "grad_norm": 0.13080520927906036, "learning_rate": 0.00025934118203010584, "loss": 2.0339, "step": 273900 }, { "epoch": 1.0425690643484087, "grad_norm": 0.13939636945724487, "learning_rate": 0.00025925462967414447, "loss": 2.0369, "step": 273910 }, { "epoch": 1.0426071268165313, "grad_norm": 0.12812650203704834, "learning_rate": 0.00025916810842415404, "loss": 2.0381, "step": 273920 }, { "epoch": 1.042645189284654, "grad_norm": 0.13757014274597168, "learning_rate": 0.0002590816182466214, "loss": 2.0457, "step": 273930 }, { "epoch": 1.0426832517527767, "grad_norm": 0.11760999262332916, "learning_rate": 0.0002589951591080931, "loss": 2.0266, "step": 273940 }, { "epoch": 1.0427213142208993, "grad_norm": 0.125470831990242, "learning_rate": 0.0002589087309751761, "loss": 2.0354, "step": 273950 }, { "epoch": 1.042759376689022, "grad_norm": 0.15457892417907715, "learning_rate": 0.0002588223338145369, "loss": 2.0321, "step": 273960 }, { "epoch": 1.0427974391571446, "grad_norm": 0.12411262840032578, "learning_rate": 0.0002587359675929018, "loss": 2.0329, "step": 273970 }, { "epoch": 1.0428355016252673, "grad_norm": 0.12908662855625153, "learning_rate": 0.0002586496322770566, "loss": 2.037, "step": 273980 }, { "epoch": 1.0428735640933902, "grad_norm": 0.15328560769557953, "learning_rate": 0.0002585633278338465, "loss": 2.0478, "step": 273990 }, { "epoch": 1.0429116265615128, "grad_norm": 0.1366276890039444, "learning_rate": 0.000258477054230176, "loss": 2.0388, "step": 274000 }, { "epoch": 1.0429496890296355, "grad_norm": 0.1253184974193573, "learning_rate": 0.00025839081143300856, "loss": 2.0429, "step": 274010 }, { "epoch": 1.0429877514977581, "grad_norm": 0.12240269780158997, "learning_rate": 0.0002583045994093668, "loss": 2.0464, "step": 274020 }, { "epoch": 1.0430258139658808, "grad_norm": 0.17729179561138153, "learning_rate": 0.00025821841812633174, "loss": 2.0296, "step": 274030 }, { "epoch": 1.0430638764340034, "grad_norm": 0.14219191670417786, "learning_rate": 0.0002581322675510435, "loss": 2.0277, "step": 274040 }, { "epoch": 1.043101938902126, "grad_norm": 0.1508656144142151, "learning_rate": 0.0002580461476507004, "loss": 2.0166, "step": 274050 }, { "epoch": 1.0431400013702488, "grad_norm": 0.13650383055210114, "learning_rate": 0.0002579600583925592, "loss": 2.0246, "step": 274060 }, { "epoch": 1.0431780638383716, "grad_norm": 0.12071377784013748, "learning_rate": 0.000257873999743935, "loss": 2.0229, "step": 274070 }, { "epoch": 1.0432161263064943, "grad_norm": 0.13448010385036469, "learning_rate": 0.0002577879716722007, "loss": 2.0491, "step": 274080 }, { "epoch": 1.043254188774617, "grad_norm": 0.14792564511299133, "learning_rate": 0.00025770197414478723, "loss": 2.0428, "step": 274090 }, { "epoch": 1.0432922512427396, "grad_norm": 0.13969039916992188, "learning_rate": 0.00025761600712918354, "loss": 2.0426, "step": 274100 }, { "epoch": 1.0433303137108623, "grad_norm": 0.1547282338142395, "learning_rate": 0.00025753007059293586, "loss": 2.0579, "step": 274110 }, { "epoch": 1.043368376178985, "grad_norm": 0.1221390813589096, "learning_rate": 0.0002574441645036481, "loss": 2.0368, "step": 274120 }, { "epoch": 1.0434064386471076, "grad_norm": 0.13443832099437714, "learning_rate": 0.0002573582888289814, "loss": 2.0221, "step": 274130 }, { "epoch": 1.0434445011152302, "grad_norm": 0.12936048209667206, "learning_rate": 0.00025727244353665436, "loss": 2.0312, "step": 274140 }, { "epoch": 1.043482563583353, "grad_norm": 0.1321532279253006, "learning_rate": 0.00025718662859444225, "loss": 2.0479, "step": 274150 }, { "epoch": 1.0435206260514758, "grad_norm": 0.12923943996429443, "learning_rate": 0.0002571008439701776, "loss": 2.0438, "step": 274160 }, { "epoch": 1.0435586885195984, "grad_norm": 0.1430782824754715, "learning_rate": 0.00025701508963174965, "loss": 2.0335, "step": 274170 }, { "epoch": 1.043596750987721, "grad_norm": 0.13641083240509033, "learning_rate": 0.0002569293655471041, "loss": 2.0469, "step": 274180 }, { "epoch": 1.0436348134558437, "grad_norm": 0.14894016087055206, "learning_rate": 0.0002568436716842434, "loss": 2.0388, "step": 274190 }, { "epoch": 1.0436728759239664, "grad_norm": 0.1385067105293274, "learning_rate": 0.0002567580080112263, "loss": 2.031, "step": 274200 }, { "epoch": 1.043710938392089, "grad_norm": 0.14582498371601105, "learning_rate": 0.00025667237449616753, "loss": 2.0342, "step": 274210 }, { "epoch": 1.0437490008602117, "grad_norm": 0.1438489854335785, "learning_rate": 0.0002565867711072383, "loss": 2.0439, "step": 274220 }, { "epoch": 1.0437870633283344, "grad_norm": 0.13223670423030853, "learning_rate": 0.00025650119781266546, "loss": 2.0338, "step": 274230 }, { "epoch": 1.0438251257964573, "grad_norm": 0.14658309519290924, "learning_rate": 0.0002564156545807319, "loss": 2.0453, "step": 274240 }, { "epoch": 1.04386318826458, "grad_norm": 0.13508301973342896, "learning_rate": 0.000256330141379776, "loss": 2.0511, "step": 274250 }, { "epoch": 1.0439012507327026, "grad_norm": 0.13011658191680908, "learning_rate": 0.0002562446581781916, "loss": 2.032, "step": 274260 }, { "epoch": 1.0439393132008252, "grad_norm": 0.13241124153137207, "learning_rate": 0.00025615920494442823, "loss": 2.0309, "step": 274270 }, { "epoch": 1.0439773756689479, "grad_norm": 0.13608020544052124, "learning_rate": 0.0002560737816469907, "loss": 2.0371, "step": 274280 }, { "epoch": 1.0440154381370705, "grad_norm": 0.32768458127975464, "learning_rate": 0.0002559883882544385, "loss": 2.0252, "step": 274290 }, { "epoch": 1.0440535006051932, "grad_norm": 0.12332502007484436, "learning_rate": 0.0002559030247353865, "loss": 2.0292, "step": 274300 }, { "epoch": 1.0440915630733159, "grad_norm": 0.14246225357055664, "learning_rate": 0.0002558176910585044, "loss": 2.0181, "step": 274310 }, { "epoch": 1.0441296255414385, "grad_norm": 0.1291736364364624, "learning_rate": 0.0002557323871925165, "loss": 2.0446, "step": 274320 }, { "epoch": 1.0441676880095614, "grad_norm": 0.1337514966726303, "learning_rate": 0.00025564711310620183, "loss": 2.0262, "step": 274330 }, { "epoch": 1.044205750477684, "grad_norm": 0.15136483311653137, "learning_rate": 0.0002555618687683937, "loss": 2.0272, "step": 274340 }, { "epoch": 1.0442438129458067, "grad_norm": 0.1356852501630783, "learning_rate": 0.00025547665414797985, "loss": 2.0434, "step": 274350 }, { "epoch": 1.0442818754139294, "grad_norm": 0.14751608669757843, "learning_rate": 0.0002553914692139022, "loss": 2.0278, "step": 274360 }, { "epoch": 1.044319937882052, "grad_norm": 0.12296520173549652, "learning_rate": 0.0002553063139351569, "loss": 2.0389, "step": 274370 }, { "epoch": 1.0443580003501747, "grad_norm": 0.15096762776374817, "learning_rate": 0.00025522118828079365, "loss": 2.0292, "step": 274380 }, { "epoch": 1.0443960628182973, "grad_norm": 0.14363795518875122, "learning_rate": 0.0002551360922199163, "loss": 2.0455, "step": 274390 }, { "epoch": 1.04443412528642, "grad_norm": 0.15220364928245544, "learning_rate": 0.0002550510257216822, "loss": 2.0414, "step": 274400 }, { "epoch": 1.0444721877545429, "grad_norm": 0.1270347386598587, "learning_rate": 0.00025496598875530225, "loss": 2.0379, "step": 274410 }, { "epoch": 1.0445102502226655, "grad_norm": 0.14191339910030365, "learning_rate": 0.0002548809812900408, "loss": 2.0224, "step": 274420 }, { "epoch": 1.0445483126907882, "grad_norm": 0.13303406536579132, "learning_rate": 0.00025479600329521545, "loss": 2.0361, "step": 274430 }, { "epoch": 1.0445863751589108, "grad_norm": 0.1290275901556015, "learning_rate": 0.00025471105474019693, "loss": 2.0192, "step": 274440 }, { "epoch": 1.0446244376270335, "grad_norm": 0.1343204528093338, "learning_rate": 0.00025462613559440906, "loss": 2.0309, "step": 274450 }, { "epoch": 1.0446625000951562, "grad_norm": 0.13578853011131287, "learning_rate": 0.00025454124582732844, "loss": 2.048, "step": 274460 }, { "epoch": 1.0447005625632788, "grad_norm": 0.13064728677272797, "learning_rate": 0.0002544563854084846, "loss": 2.0372, "step": 274470 }, { "epoch": 1.0447386250314015, "grad_norm": 0.15233071148395538, "learning_rate": 0.0002543715543074595, "loss": 2.032, "step": 274480 }, { "epoch": 1.0447766874995241, "grad_norm": 0.14285831153392792, "learning_rate": 0.00025428675249388773, "loss": 2.0326, "step": 274490 }, { "epoch": 1.044814749967647, "grad_norm": 0.1405845433473587, "learning_rate": 0.00025420197993745623, "loss": 2.0417, "step": 274500 }, { "epoch": 1.0448528124357697, "grad_norm": 0.12189560383558273, "learning_rate": 0.00025411723660790427, "loss": 2.0389, "step": 274510 }, { "epoch": 1.0448908749038923, "grad_norm": 0.12811321020126343, "learning_rate": 0.00025403252247502316, "loss": 2.023, "step": 274520 }, { "epoch": 1.044928937372015, "grad_norm": 0.16220244765281677, "learning_rate": 0.0002539478375086562, "loss": 2.037, "step": 274530 }, { "epoch": 1.0449669998401376, "grad_norm": 0.13246352970600128, "learning_rate": 0.0002538631816786986, "loss": 2.0438, "step": 274540 }, { "epoch": 1.0450050623082603, "grad_norm": 0.12363100796937943, "learning_rate": 0.00025377855495509736, "loss": 2.0369, "step": 274550 }, { "epoch": 1.045043124776383, "grad_norm": 0.13489417731761932, "learning_rate": 0.00025369395730785114, "loss": 2.0226, "step": 274560 }, { "epoch": 1.0450811872445056, "grad_norm": 0.13529403507709503, "learning_rate": 0.0002536093887070099, "loss": 2.0421, "step": 274570 }, { "epoch": 1.0451192497126283, "grad_norm": 0.13258054852485657, "learning_rate": 0.00025352484912267526, "loss": 2.0218, "step": 274580 }, { "epoch": 1.0451573121807511, "grad_norm": 0.14793935418128967, "learning_rate": 0.0002534403385249999, "loss": 2.0355, "step": 274590 }, { "epoch": 1.0451953746488738, "grad_norm": 0.1330820918083191, "learning_rate": 0.0002533558568841876, "loss": 2.0364, "step": 274600 }, { "epoch": 1.0452334371169965, "grad_norm": 0.13027338683605194, "learning_rate": 0.0002532714041704934, "loss": 2.0392, "step": 274610 }, { "epoch": 1.0452714995851191, "grad_norm": 0.13190694153308868, "learning_rate": 0.00025318698035422307, "loss": 2.0415, "step": 274620 }, { "epoch": 1.0453095620532418, "grad_norm": 0.1548970639705658, "learning_rate": 0.00025310258540573306, "loss": 2.0412, "step": 274630 }, { "epoch": 1.0453476245213644, "grad_norm": 0.16039757430553436, "learning_rate": 0.0002530182192954306, "loss": 2.041, "step": 274640 }, { "epoch": 1.045385686989487, "grad_norm": 0.13856640458106995, "learning_rate": 0.0002529338819937734, "loss": 2.0256, "step": 274650 }, { "epoch": 1.0454237494576097, "grad_norm": 0.13383691012859344, "learning_rate": 0.0002528495734712697, "loss": 2.0324, "step": 274660 }, { "epoch": 1.0454618119257324, "grad_norm": 0.12692676484584808, "learning_rate": 0.00025276529369847766, "loss": 2.0259, "step": 274670 }, { "epoch": 1.0454998743938553, "grad_norm": 0.1338866949081421, "learning_rate": 0.0002526810426460061, "loss": 2.0271, "step": 274680 }, { "epoch": 1.045537936861978, "grad_norm": 0.14236962795257568, "learning_rate": 0.0002525968202845135, "loss": 2.0362, "step": 274690 }, { "epoch": 1.0455759993301006, "grad_norm": 0.14252808690071106, "learning_rate": 0.00025251262658470836, "loss": 2.0381, "step": 274700 }, { "epoch": 1.0456140617982232, "grad_norm": 0.14136750996112823, "learning_rate": 0.00025242846151734915, "loss": 2.031, "step": 274710 }, { "epoch": 1.045652124266346, "grad_norm": 0.12346253544092178, "learning_rate": 0.0002523443250532439, "loss": 2.0303, "step": 274720 }, { "epoch": 1.0456901867344686, "grad_norm": 0.17243121564388275, "learning_rate": 0.0002522602171632501, "loss": 2.0304, "step": 274730 }, { "epoch": 1.0457282492025912, "grad_norm": 0.14957967400550842, "learning_rate": 0.0002521761378182749, "loss": 2.0196, "step": 274740 }, { "epoch": 1.0457663116707139, "grad_norm": 0.1367514431476593, "learning_rate": 0.00025209208698927476, "loss": 2.0339, "step": 274750 }, { "epoch": 1.0458043741388368, "grad_norm": 0.13444387912750244, "learning_rate": 0.0002520080646472551, "loss": 2.0379, "step": 274760 }, { "epoch": 1.0458424366069594, "grad_norm": 0.12551210820674896, "learning_rate": 0.00025192407076327084, "loss": 2.0408, "step": 274770 }, { "epoch": 1.045880499075082, "grad_norm": 0.13665346801280975, "learning_rate": 0.00025184010530842553, "loss": 2.0374, "step": 274780 }, { "epoch": 1.0459185615432047, "grad_norm": 0.13541540503501892, "learning_rate": 0.00025175616825387183, "loss": 2.0345, "step": 274790 }, { "epoch": 1.0459566240113274, "grad_norm": 0.12491797655820847, "learning_rate": 0.00025167225957081096, "loss": 2.0243, "step": 274800 }, { "epoch": 1.04599468647945, "grad_norm": 0.13685432076454163, "learning_rate": 0.00025158837923049305, "loss": 2.0278, "step": 274810 }, { "epoch": 1.0460327489475727, "grad_norm": 0.14867368340492249, "learning_rate": 0.0002515045272042165, "loss": 2.0173, "step": 274820 }, { "epoch": 1.0460708114156954, "grad_norm": 0.15146611630916595, "learning_rate": 0.00025142070346332815, "loss": 2.0342, "step": 274830 }, { "epoch": 1.046108873883818, "grad_norm": 0.1487848460674286, "learning_rate": 0.0002513369079792232, "loss": 2.0382, "step": 274840 }, { "epoch": 1.046146936351941, "grad_norm": 0.13378088176250458, "learning_rate": 0.00025125314072334503, "loss": 2.0527, "step": 274850 }, { "epoch": 1.0461849988200635, "grad_norm": 0.14431817829608917, "learning_rate": 0.0002511694016671851, "loss": 2.0422, "step": 274860 }, { "epoch": 1.0462230612881862, "grad_norm": 0.1392163783311844, "learning_rate": 0.00025108569078228284, "loss": 2.0454, "step": 274870 }, { "epoch": 1.0462611237563089, "grad_norm": 0.15907195210456848, "learning_rate": 0.00025100200804022534, "loss": 2.0303, "step": 274880 }, { "epoch": 1.0462991862244315, "grad_norm": 0.14727604389190674, "learning_rate": 0.00025091835341264766, "loss": 2.0254, "step": 274890 }, { "epoch": 1.0463372486925542, "grad_norm": 0.14289073646068573, "learning_rate": 0.00025083472687123244, "loss": 2.04, "step": 274900 }, { "epoch": 1.0463753111606768, "grad_norm": 0.12701305747032166, "learning_rate": 0.00025075112838770966, "loss": 2.03, "step": 274910 }, { "epoch": 1.0464133736287995, "grad_norm": 0.1313587874174118, "learning_rate": 0.00025066755793385677, "loss": 2.0391, "step": 274920 }, { "epoch": 1.0464514360969224, "grad_norm": 0.16116932034492493, "learning_rate": 0.0002505840154814986, "loss": 2.033, "step": 274930 }, { "epoch": 1.046489498565045, "grad_norm": 0.14866022765636444, "learning_rate": 0.00025050050100250697, "loss": 2.047, "step": 274940 }, { "epoch": 1.0465275610331677, "grad_norm": 0.13143713772296906, "learning_rate": 0.00025041701446880106, "loss": 2.017, "step": 274950 }, { "epoch": 1.0465656235012903, "grad_norm": 0.13434137403964996, "learning_rate": 0.0002503335558523466, "loss": 2.025, "step": 274960 }, { "epoch": 1.046603685969413, "grad_norm": 0.13187500834465027, "learning_rate": 0.0002502501251251564, "loss": 2.017, "step": 274970 }, { "epoch": 1.0466417484375357, "grad_norm": 0.15085522830486298, "learning_rate": 0.00025016672225929015, "loss": 2.029, "step": 274980 }, { "epoch": 1.0466798109056583, "grad_norm": 0.1359572410583496, "learning_rate": 0.0002500833472268538, "loss": 2.0563, "step": 274990 }, { "epoch": 1.046717873373781, "grad_norm": 0.13730421662330627, "learning_rate": 0.00025, "loss": 2.0221, "step": 275000 }, { "epoch": 1.0467559358419036, "grad_norm": 0.15225379168987274, "learning_rate": 0.0002499166805509279, "loss": 2.0296, "step": 275010 }, { "epoch": 1.0467939983100265, "grad_norm": 0.1194249764084816, "learning_rate": 0.0002498333888518827, "loss": 2.0216, "step": 275020 }, { "epoch": 1.0468320607781492, "grad_norm": 0.1379227489233017, "learning_rate": 0.00024975012487515603, "loss": 2.0371, "step": 275030 }, { "epoch": 1.0468701232462718, "grad_norm": 0.27223625779151917, "learning_rate": 0.0002496668885930855, "loss": 2.0251, "step": 275040 }, { "epoch": 1.0469081857143945, "grad_norm": 0.1424230933189392, "learning_rate": 0.0002495836799780547, "loss": 2.0391, "step": 275050 }, { "epoch": 1.0469462481825171, "grad_norm": 0.13158190250396729, "learning_rate": 0.0002495004990024931, "loss": 2.0478, "step": 275060 }, { "epoch": 1.0469843106506398, "grad_norm": 0.1202026754617691, "learning_rate": 0.00024941734563887574, "loss": 2.0332, "step": 275070 }, { "epoch": 1.0470223731187625, "grad_norm": 0.12430751323699951, "learning_rate": 0.0002493342198597237, "loss": 2.0147, "step": 275080 }, { "epoch": 1.047060435586885, "grad_norm": 0.13746048510074615, "learning_rate": 0.00024925112163760334, "loss": 2.0338, "step": 275090 }, { "epoch": 1.047098498055008, "grad_norm": 0.12649886310100555, "learning_rate": 0.0002491680509451265, "loss": 2.0366, "step": 275100 }, { "epoch": 1.0471365605231306, "grad_norm": 0.14034858345985413, "learning_rate": 0.00024908500775495035, "loss": 2.034, "step": 275110 }, { "epoch": 1.0471746229912533, "grad_norm": 0.14112162590026855, "learning_rate": 0.00024900199203977736, "loss": 2.0321, "step": 275120 }, { "epoch": 1.047212685459376, "grad_norm": 0.1548360139131546, "learning_rate": 0.00024891900377235505, "loss": 2.0299, "step": 275130 }, { "epoch": 1.0472507479274986, "grad_norm": 0.13118277490139008, "learning_rate": 0.000248836042925476, "loss": 2.0324, "step": 275140 }, { "epoch": 1.0472888103956213, "grad_norm": 0.14753767848014832, "learning_rate": 0.00024875310947197776, "loss": 2.029, "step": 275150 }, { "epoch": 1.047326872863744, "grad_norm": 0.14345189929008484, "learning_rate": 0.0002486702033847426, "loss": 2.0378, "step": 275160 }, { "epoch": 1.0473649353318666, "grad_norm": 0.12945662438869476, "learning_rate": 0.0002485873246366975, "loss": 2.0304, "step": 275170 }, { "epoch": 1.0474029977999892, "grad_norm": 0.1253436952829361, "learning_rate": 0.0002485044732008142, "loss": 2.0398, "step": 275180 }, { "epoch": 1.0474410602681121, "grad_norm": 0.13015608489513397, "learning_rate": 0.0002484216490501087, "loss": 2.0305, "step": 275190 }, { "epoch": 1.0474791227362348, "grad_norm": 0.14374291896820068, "learning_rate": 0.00024833885215764167, "loss": 2.0542, "step": 275200 }, { "epoch": 1.0475171852043574, "grad_norm": 0.12700749933719635, "learning_rate": 0.00024825608249651795, "loss": 2.0357, "step": 275210 }, { "epoch": 1.04755524767248, "grad_norm": 0.1461777687072754, "learning_rate": 0.00024817334003988646, "loss": 2.0085, "step": 275220 }, { "epoch": 1.0475933101406028, "grad_norm": 0.14648723602294922, "learning_rate": 0.00024809062476094043, "loss": 2.0371, "step": 275230 }, { "epoch": 1.0476313726087254, "grad_norm": 0.1811038702726364, "learning_rate": 0.0002480079366329169, "loss": 2.0408, "step": 275240 }, { "epoch": 1.047669435076848, "grad_norm": 0.14561040699481964, "learning_rate": 0.0002479252756290971, "loss": 2.0397, "step": 275250 }, { "epoch": 1.0477074975449707, "grad_norm": 0.15173044800758362, "learning_rate": 0.00024784264172280566, "loss": 2.0314, "step": 275260 }, { "epoch": 1.0477455600130936, "grad_norm": 0.13430675864219666, "learning_rate": 0.00024776003488741124, "loss": 2.037, "step": 275270 }, { "epoch": 1.0477836224812163, "grad_norm": 0.13322807848453522, "learning_rate": 0.0002476774550963258, "loss": 2.0252, "step": 275280 }, { "epoch": 1.047821684949339, "grad_norm": 0.13812817633152008, "learning_rate": 0.00024759490232300507, "loss": 2.047, "step": 275290 }, { "epoch": 1.0478597474174616, "grad_norm": 0.12764814496040344, "learning_rate": 0.000247512376540948, "loss": 2.043, "step": 275300 }, { "epoch": 1.0478978098855842, "grad_norm": 0.1589909940958023, "learning_rate": 0.00024742987772369697, "loss": 2.0366, "step": 275310 }, { "epoch": 1.0479358723537069, "grad_norm": 0.14342698454856873, "learning_rate": 0.0002473474058448373, "loss": 2.014, "step": 275320 }, { "epoch": 1.0479739348218295, "grad_norm": 0.14049477875232697, "learning_rate": 0.00024726496087799774, "loss": 2.0314, "step": 275330 }, { "epoch": 1.0480119972899522, "grad_norm": 0.14000898599624634, "learning_rate": 0.0002471825427968498, "loss": 2.0284, "step": 275340 }, { "epoch": 1.0480500597580749, "grad_norm": 0.12767021358013153, "learning_rate": 0.00024710015157510803, "loss": 2.0357, "step": 275350 }, { "epoch": 1.0480881222261977, "grad_norm": 0.13420067727565765, "learning_rate": 0.00024701778718652965, "loss": 2.028, "step": 275360 }, { "epoch": 1.0481261846943204, "grad_norm": 0.13943490386009216, "learning_rate": 0.0002469354496049147, "loss": 2.0407, "step": 275370 }, { "epoch": 1.048164247162443, "grad_norm": 0.1490197330713272, "learning_rate": 0.00024685313880410574, "loss": 2.0161, "step": 275380 }, { "epoch": 1.0482023096305657, "grad_norm": 0.14449959993362427, "learning_rate": 0.000246770854757988, "loss": 2.026, "step": 275390 }, { "epoch": 1.0482403720986884, "grad_norm": 0.13023877143859863, "learning_rate": 0.00024668859744048896, "loss": 2.0419, "step": 275400 }, { "epoch": 1.048278434566811, "grad_norm": 0.1355513632297516, "learning_rate": 0.0002466063668255784, "loss": 2.0382, "step": 275410 }, { "epoch": 1.0483164970349337, "grad_norm": 0.12848785519599915, "learning_rate": 0.0002465241628872685, "loss": 2.0306, "step": 275420 }, { "epoch": 1.0483545595030563, "grad_norm": 0.14631952345371246, "learning_rate": 0.0002464419855996134, "loss": 2.0328, "step": 275430 }, { "epoch": 1.048392621971179, "grad_norm": 0.13640548288822174, "learning_rate": 0.0002463598349367093, "loss": 2.0235, "step": 275440 }, { "epoch": 1.0484306844393019, "grad_norm": 0.13725143671035767, "learning_rate": 0.0002462777108726945, "loss": 2.0416, "step": 275450 }, { "epoch": 1.0484687469074245, "grad_norm": 0.14505118131637573, "learning_rate": 0.0002461956133817489, "loss": 2.0331, "step": 275460 }, { "epoch": 1.0485068093755472, "grad_norm": 0.1433112770318985, "learning_rate": 0.0002461135424380943, "loss": 2.039, "step": 275470 }, { "epoch": 1.0485448718436698, "grad_norm": 0.13896414637565613, "learning_rate": 0.00024603149801599413, "loss": 2.0261, "step": 275480 }, { "epoch": 1.0485829343117925, "grad_norm": 0.15077711641788483, "learning_rate": 0.00024594948008975324, "loss": 2.0315, "step": 275490 }, { "epoch": 1.0486209967799152, "grad_norm": 0.11837725341320038, "learning_rate": 0.0002458674886337182, "loss": 2.0328, "step": 275500 }, { "epoch": 1.0486590592480378, "grad_norm": 0.14062966406345367, "learning_rate": 0.0002457855236222768, "loss": 2.0226, "step": 275510 }, { "epoch": 1.0486971217161605, "grad_norm": 0.16681206226348877, "learning_rate": 0.000245703585029858, "loss": 2.0255, "step": 275520 }, { "epoch": 1.0487351841842831, "grad_norm": 0.12906672060489655, "learning_rate": 0.0002456216728309321, "loss": 2.0141, "step": 275530 }, { "epoch": 1.048773246652406, "grad_norm": 0.13691098988056183, "learning_rate": 0.00024553978700001054, "loss": 2.0251, "step": 275540 }, { "epoch": 1.0488113091205287, "grad_norm": 0.13565468788146973, "learning_rate": 0.0002454579275116456, "loss": 2.0432, "step": 275550 }, { "epoch": 1.0488493715886513, "grad_norm": 0.13613511621952057, "learning_rate": 0.0002453760943404305, "loss": 2.0194, "step": 275560 }, { "epoch": 1.048887434056774, "grad_norm": 0.16795943677425385, "learning_rate": 0.0002452942874609994, "loss": 2.0425, "step": 275570 }, { "epoch": 1.0489254965248966, "grad_norm": 0.14237631857395172, "learning_rate": 0.00024521250684802706, "loss": 2.0397, "step": 275580 }, { "epoch": 1.0489635589930193, "grad_norm": 0.14784595370292664, "learning_rate": 0.0002451307524762288, "loss": 2.0284, "step": 275590 }, { "epoch": 1.049001621461142, "grad_norm": 0.1270940750837326, "learning_rate": 0.0002450490243203607, "loss": 2.0278, "step": 275600 }, { "epoch": 1.0490396839292646, "grad_norm": 0.12831611931324005, "learning_rate": 0.00024496732235521925, "loss": 2.0134, "step": 275610 }, { "epoch": 1.0490777463973875, "grad_norm": 0.1486426740884781, "learning_rate": 0.000244885646555641, "loss": 2.0376, "step": 275620 }, { "epoch": 1.0491158088655101, "grad_norm": 0.12868672609329224, "learning_rate": 0.0002448039968965031, "loss": 2.0269, "step": 275630 }, { "epoch": 1.0491538713336328, "grad_norm": 0.1494320034980774, "learning_rate": 0.00024472237335272277, "loss": 2.0179, "step": 275640 }, { "epoch": 1.0491919338017555, "grad_norm": 0.14237873256206512, "learning_rate": 0.0002446407758992573, "loss": 2.0128, "step": 275650 }, { "epoch": 1.0492299962698781, "grad_norm": 0.14436769485473633, "learning_rate": 0.00024455920451110395, "loss": 2.0428, "step": 275660 }, { "epoch": 1.0492680587380008, "grad_norm": 0.13775093853473663, "learning_rate": 0.00024447765916330006, "loss": 2.0403, "step": 275670 }, { "epoch": 1.0493061212061234, "grad_norm": 0.12932050228118896, "learning_rate": 0.0002443961398309225, "loss": 2.0173, "step": 275680 }, { "epoch": 1.049344183674246, "grad_norm": 0.12811337411403656, "learning_rate": 0.0002443146464890881, "loss": 2.0213, "step": 275690 }, { "epoch": 1.0493822461423687, "grad_norm": 0.13555051386356354, "learning_rate": 0.0002442331791129532, "loss": 2.0079, "step": 275700 }, { "epoch": 1.0494203086104916, "grad_norm": 0.1272488683462143, "learning_rate": 0.00024415173767771385, "loss": 2.0245, "step": 275710 }, { "epoch": 1.0494583710786143, "grad_norm": 0.14617857336997986, "learning_rate": 0.00024407032215860547, "loss": 2.0303, "step": 275720 }, { "epoch": 1.049496433546737, "grad_norm": 0.13590756058692932, "learning_rate": 0.00024398893253090275, "loss": 2.035, "step": 275730 }, { "epoch": 1.0495344960148596, "grad_norm": 0.1549926996231079, "learning_rate": 0.00024390756876991988, "loss": 2.0305, "step": 275740 }, { "epoch": 1.0495725584829823, "grad_norm": 0.13491860032081604, "learning_rate": 0.00024382623085101, "loss": 2.034, "step": 275750 }, { "epoch": 1.049610620951105, "grad_norm": 0.14095671474933624, "learning_rate": 0.00024374491874956571, "loss": 2.0196, "step": 275760 }, { "epoch": 1.0496486834192276, "grad_norm": 0.15967071056365967, "learning_rate": 0.00024366363244101837, "loss": 2.0319, "step": 275770 }, { "epoch": 1.0496867458873502, "grad_norm": 0.16383443772792816, "learning_rate": 0.0002435823719008383, "loss": 2.027, "step": 275780 }, { "epoch": 1.049724808355473, "grad_norm": 0.1587381362915039, "learning_rate": 0.0002435011371045348, "loss": 2.0313, "step": 275790 }, { "epoch": 1.0497628708235958, "grad_norm": 0.13336656987667084, "learning_rate": 0.00024341992802765584, "loss": 2.023, "step": 275800 }, { "epoch": 1.0498009332917184, "grad_norm": 0.14063481986522675, "learning_rate": 0.00024333874464578804, "loss": 2.0407, "step": 275810 }, { "epoch": 1.049838995759841, "grad_norm": 0.134406179189682, "learning_rate": 0.0002432575869345568, "loss": 2.0231, "step": 275820 }, { "epoch": 1.0498770582279637, "grad_norm": 0.12986144423484802, "learning_rate": 0.00024317645486962586, "loss": 2.0294, "step": 275830 }, { "epoch": 1.0499151206960864, "grad_norm": 0.13074316084384918, "learning_rate": 0.00024309534842669738, "loss": 2.0335, "step": 275840 }, { "epoch": 1.049953183164209, "grad_norm": 0.11960646510124207, "learning_rate": 0.00024301426758151212, "loss": 2.0371, "step": 275850 }, { "epoch": 1.0499912456323317, "grad_norm": 0.15164650976657867, "learning_rate": 0.0002429332123098487, "loss": 2.04, "step": 275860 }, { "epoch": 1.0500293081004544, "grad_norm": 0.14507368206977844, "learning_rate": 0.00024285218258752417, "loss": 2.0258, "step": 275870 }, { "epoch": 1.0500673705685772, "grad_norm": 0.13732139766216278, "learning_rate": 0.0002427711783903937, "loss": 2.0295, "step": 275880 }, { "epoch": 1.0501054330367, "grad_norm": 0.13564445078372955, "learning_rate": 0.0002426901996943503, "loss": 2.0415, "step": 275890 }, { "epoch": 1.0501434955048226, "grad_norm": 0.14474515616893768, "learning_rate": 0.000242609246475325, "loss": 2.0391, "step": 275900 }, { "epoch": 1.0501815579729452, "grad_norm": 0.176593616604805, "learning_rate": 0.00024252831870928666, "loss": 2.0354, "step": 275910 }, { "epoch": 1.0502196204410679, "grad_norm": 0.1409319043159485, "learning_rate": 0.00024244741637224193, "loss": 2.0167, "step": 275920 }, { "epoch": 1.0502576829091905, "grad_norm": 0.14716550707817078, "learning_rate": 0.00024236653944023496, "loss": 2.0202, "step": 275930 }, { "epoch": 1.0502957453773132, "grad_norm": 0.1268341839313507, "learning_rate": 0.0002422856878893478, "loss": 2.0114, "step": 275940 }, { "epoch": 1.0503338078454358, "grad_norm": 0.12487328052520752, "learning_rate": 0.00024220486169569967, "loss": 2.0437, "step": 275950 }, { "epoch": 1.0503718703135587, "grad_norm": 0.13905835151672363, "learning_rate": 0.00024212406083544747, "loss": 2.0203, "step": 275960 }, { "epoch": 1.0504099327816814, "grad_norm": 0.13599219918251038, "learning_rate": 0.0002420432852847853, "loss": 2.021, "step": 275970 }, { "epoch": 1.050447995249804, "grad_norm": 0.13935434818267822, "learning_rate": 0.00024196253501994458, "loss": 2.028, "step": 275980 }, { "epoch": 1.0504860577179267, "grad_norm": 0.15049731731414795, "learning_rate": 0.0002418818100171939, "loss": 2.0315, "step": 275990 }, { "epoch": 1.0505241201860493, "grad_norm": 0.13765917718410492, "learning_rate": 0.0002418011102528389, "loss": 2.0183, "step": 276000 }, { "epoch": 1.050562182654172, "grad_norm": 0.13514918088912964, "learning_rate": 0.00024172043570322226, "loss": 2.0324, "step": 276010 }, { "epoch": 1.0506002451222947, "grad_norm": 0.12284824997186661, "learning_rate": 0.00024163978634472372, "loss": 2.037, "step": 276020 }, { "epoch": 1.0506383075904173, "grad_norm": 0.13643833994865417, "learning_rate": 0.00024155916215375973, "loss": 2.0215, "step": 276030 }, { "epoch": 1.05067637005854, "grad_norm": 0.1364985704421997, "learning_rate": 0.0002414785631067835, "loss": 2.0274, "step": 276040 }, { "epoch": 1.0507144325266629, "grad_norm": 0.13563884794712067, "learning_rate": 0.00024139798918028498, "loss": 2.0269, "step": 276050 }, { "epoch": 1.0507524949947855, "grad_norm": 0.15985845029354095, "learning_rate": 0.00024131744035079083, "loss": 2.0319, "step": 276060 }, { "epoch": 1.0507905574629082, "grad_norm": 0.13042674958705902, "learning_rate": 0.0002412369165948641, "loss": 2.0382, "step": 276070 }, { "epoch": 1.0508286199310308, "grad_norm": 0.13331478834152222, "learning_rate": 0.0002411564178891043, "loss": 2.0234, "step": 276080 }, { "epoch": 1.0508666823991535, "grad_norm": 0.13373886048793793, "learning_rate": 0.00024107594421014745, "loss": 2.0215, "step": 276090 }, { "epoch": 1.0509047448672761, "grad_norm": 0.1378992199897766, "learning_rate": 0.00024099549553466582, "loss": 2.0323, "step": 276100 }, { "epoch": 1.0509428073353988, "grad_norm": 0.14647714793682098, "learning_rate": 0.00024091507183936767, "loss": 2.0383, "step": 276110 }, { "epoch": 1.0509808698035215, "grad_norm": 0.1651303470134735, "learning_rate": 0.00024083467310099784, "loss": 2.0249, "step": 276120 }, { "epoch": 1.0510189322716443, "grad_norm": 0.12446706742048264, "learning_rate": 0.0002407542992963368, "loss": 2.0287, "step": 276130 }, { "epoch": 1.051056994739767, "grad_norm": 0.1194441169500351, "learning_rate": 0.0002406739504022012, "loss": 2.0217, "step": 276140 }, { "epoch": 1.0510950572078896, "grad_norm": 0.132504403591156, "learning_rate": 0.0002405936263954437, "loss": 2.0266, "step": 276150 }, { "epoch": 1.0511331196760123, "grad_norm": 0.11552822589874268, "learning_rate": 0.0002405133272529525, "loss": 2.0301, "step": 276160 }, { "epoch": 1.051171182144135, "grad_norm": 0.13374902307987213, "learning_rate": 0.0002404330529516518, "loss": 2.0188, "step": 276170 }, { "epoch": 1.0512092446122576, "grad_norm": 0.1289547234773636, "learning_rate": 0.0002403528034685014, "loss": 2.0263, "step": 276180 }, { "epoch": 1.0512473070803803, "grad_norm": 0.1595178246498108, "learning_rate": 0.00024027257878049663, "loss": 2.0196, "step": 276190 }, { "epoch": 1.051285369548503, "grad_norm": 0.13148796558380127, "learning_rate": 0.0002401923788646684, "loss": 2.0193, "step": 276200 }, { "epoch": 1.0513234320166256, "grad_norm": 0.1301811933517456, "learning_rate": 0.00024011220369808307, "loss": 2.0175, "step": 276210 }, { "epoch": 1.0513614944847485, "grad_norm": 0.17643029987812042, "learning_rate": 0.00024003205325784233, "loss": 2.0483, "step": 276220 }, { "epoch": 1.0513995569528711, "grad_norm": 0.14449796080589294, "learning_rate": 0.00023995192752108313, "loss": 2.0255, "step": 276230 }, { "epoch": 1.0514376194209938, "grad_norm": 0.1294335275888443, "learning_rate": 0.00023987182646497773, "loss": 2.0206, "step": 276240 }, { "epoch": 1.0514756818891164, "grad_norm": 0.1255398392677307, "learning_rate": 0.00023979175006673344, "loss": 2.0332, "step": 276250 }, { "epoch": 1.051513744357239, "grad_norm": 0.12768849730491638, "learning_rate": 0.00023971169830359259, "loss": 2.0397, "step": 276260 }, { "epoch": 1.0515518068253618, "grad_norm": 0.13748249411582947, "learning_rate": 0.0002396316711528327, "loss": 2.0474, "step": 276270 }, { "epoch": 1.0515898692934844, "grad_norm": 0.13064044713974, "learning_rate": 0.00023955166859176592, "loss": 2.0314, "step": 276280 }, { "epoch": 1.051627931761607, "grad_norm": 0.1798001527786255, "learning_rate": 0.00023947169059773942, "loss": 2.0249, "step": 276290 }, { "epoch": 1.0516659942297297, "grad_norm": 0.14949892461299896, "learning_rate": 0.00023939173714813516, "loss": 2.027, "step": 276300 }, { "epoch": 1.0517040566978526, "grad_norm": 0.13493582606315613, "learning_rate": 0.0002393118082203697, "loss": 2.0383, "step": 276310 }, { "epoch": 1.0517421191659753, "grad_norm": 0.1302517056465149, "learning_rate": 0.000239231903791894, "loss": 2.0331, "step": 276320 }, { "epoch": 1.051780181634098, "grad_norm": 0.12950988113880157, "learning_rate": 0.00023915202384019413, "loss": 2.0274, "step": 276330 }, { "epoch": 1.0518182441022206, "grad_norm": 0.1417592167854309, "learning_rate": 0.00023907216834279, "loss": 2.032, "step": 276340 }, { "epoch": 1.0518563065703432, "grad_norm": 0.17708420753479004, "learning_rate": 0.00023899233727723624, "loss": 2.0362, "step": 276350 }, { "epoch": 1.051894369038466, "grad_norm": 0.15068641304969788, "learning_rate": 0.00023891253062112178, "loss": 2.0568, "step": 276360 }, { "epoch": 1.0519324315065886, "grad_norm": 0.14147871732711792, "learning_rate": 0.00023883274835206976, "loss": 2.0339, "step": 276370 }, { "epoch": 1.0519704939747112, "grad_norm": 0.14862096309661865, "learning_rate": 0.00023875299044773734, "loss": 2.0263, "step": 276380 }, { "epoch": 1.052008556442834, "grad_norm": 0.15561573207378387, "learning_rate": 0.0002386732568858161, "loss": 2.0253, "step": 276390 }, { "epoch": 1.0520466189109567, "grad_norm": 0.14745838940143585, "learning_rate": 0.0002385935476440313, "loss": 2.0103, "step": 276400 }, { "epoch": 1.0520846813790794, "grad_norm": 0.1444399207830429, "learning_rate": 0.00023851386270014235, "loss": 2.0236, "step": 276410 }, { "epoch": 1.052122743847202, "grad_norm": 0.156083881855011, "learning_rate": 0.0002384342020319425, "loss": 2.0307, "step": 276420 }, { "epoch": 1.0521608063153247, "grad_norm": 0.13538765907287598, "learning_rate": 0.00023835456561725882, "loss": 2.0299, "step": 276430 }, { "epoch": 1.0521988687834474, "grad_norm": 0.13774365186691284, "learning_rate": 0.000238274953433952, "loss": 2.0312, "step": 276440 }, { "epoch": 1.05223693125157, "grad_norm": 0.17226563394069672, "learning_rate": 0.00023819536545991655, "loss": 2.0228, "step": 276450 }, { "epoch": 1.0522749937196927, "grad_norm": 0.14496228098869324, "learning_rate": 0.00023811580167308044, "loss": 2.0265, "step": 276460 }, { "epoch": 1.0523130561878153, "grad_norm": 0.13886311650276184, "learning_rate": 0.00023803626205140527, "loss": 2.0403, "step": 276470 }, { "epoch": 1.0523511186559382, "grad_norm": 0.13826139271259308, "learning_rate": 0.00023795674657288608, "loss": 2.0228, "step": 276480 }, { "epoch": 1.0523891811240609, "grad_norm": 0.12250851094722748, "learning_rate": 0.00023787725521555108, "loss": 2.037, "step": 276490 }, { "epoch": 1.0524272435921835, "grad_norm": 0.1252516806125641, "learning_rate": 0.0002377977879574621, "loss": 2.0248, "step": 276500 }, { "epoch": 1.0524653060603062, "grad_norm": 0.1385105699300766, "learning_rate": 0.000237718344776714, "loss": 2.0175, "step": 276510 }, { "epoch": 1.0525033685284289, "grad_norm": 0.15018312633037567, "learning_rate": 0.00023763892565143485, "loss": 2.0349, "step": 276520 }, { "epoch": 1.0525414309965515, "grad_norm": 0.12454996258020401, "learning_rate": 0.0002375595305597858, "loss": 2.0264, "step": 276530 }, { "epoch": 1.0525794934646742, "grad_norm": 0.13958518207073212, "learning_rate": 0.0002374801594799611, "loss": 2.0332, "step": 276540 }, { "epoch": 1.0526175559327968, "grad_norm": 0.16262899339199066, "learning_rate": 0.00023740081239018786, "loss": 2.0252, "step": 276550 }, { "epoch": 1.0526556184009195, "grad_norm": 0.14138750731945038, "learning_rate": 0.00023732148926872604, "loss": 2.0178, "step": 276560 }, { "epoch": 1.0526936808690424, "grad_norm": 0.1345914602279663, "learning_rate": 0.00023724219009386865, "loss": 2.0228, "step": 276570 }, { "epoch": 1.052731743337165, "grad_norm": 0.12766501307487488, "learning_rate": 0.00023716291484394115, "loss": 2.0277, "step": 276580 }, { "epoch": 1.0527698058052877, "grad_norm": 0.12269017845392227, "learning_rate": 0.00023708366349730188, "loss": 2.0324, "step": 276590 }, { "epoch": 1.0528078682734103, "grad_norm": 0.14488330483436584, "learning_rate": 0.00023700443603234169, "loss": 2.0332, "step": 276600 }, { "epoch": 1.052845930741533, "grad_norm": 0.15273307263851166, "learning_rate": 0.000236925232427484, "loss": 2.0324, "step": 276610 }, { "epoch": 1.0528839932096556, "grad_norm": 0.14215412735939026, "learning_rate": 0.00023684605266118463, "loss": 2.0301, "step": 276620 }, { "epoch": 1.0529220556777783, "grad_norm": 0.1337355077266693, "learning_rate": 0.00023676689671193202, "loss": 2.0284, "step": 276630 }, { "epoch": 1.052960118145901, "grad_norm": 0.14778606593608856, "learning_rate": 0.0002366877645582467, "loss": 2.0232, "step": 276640 }, { "epoch": 1.0529981806140238, "grad_norm": 0.14975306391716003, "learning_rate": 0.00023660865617868154, "loss": 2.0143, "step": 276650 }, { "epoch": 1.0530362430821465, "grad_norm": 0.12948152422904968, "learning_rate": 0.00023652957155182163, "loss": 2.0306, "step": 276660 }, { "epoch": 1.0530743055502692, "grad_norm": 0.1450575739145279, "learning_rate": 0.00023645051065628427, "loss": 2.0324, "step": 276670 }, { "epoch": 1.0531123680183918, "grad_norm": 0.14224261045455933, "learning_rate": 0.00023637147347071864, "loss": 2.0158, "step": 276680 }, { "epoch": 1.0531504304865145, "grad_norm": 0.12313112616539001, "learning_rate": 0.0002362924599738061, "loss": 2.026, "step": 276690 }, { "epoch": 1.0531884929546371, "grad_norm": 0.13612818717956543, "learning_rate": 0.00023621347014425977, "loss": 2.0447, "step": 276700 }, { "epoch": 1.0532265554227598, "grad_norm": 0.12820477783679962, "learning_rate": 0.00023613450396082474, "loss": 2.0325, "step": 276710 }, { "epoch": 1.0532646178908824, "grad_norm": 0.1335642784833908, "learning_rate": 0.00023605556140227795, "loss": 2.0345, "step": 276720 }, { "epoch": 1.053302680359005, "grad_norm": 0.1384914219379425, "learning_rate": 0.0002359766424474279, "loss": 2.0255, "step": 276730 }, { "epoch": 1.053340742827128, "grad_norm": 0.12948639690876007, "learning_rate": 0.00023589774707511485, "loss": 2.015, "step": 276740 }, { "epoch": 1.0533788052952506, "grad_norm": 0.16162152588367462, "learning_rate": 0.0002358188752642107, "loss": 2.021, "step": 276750 }, { "epoch": 1.0534168677633733, "grad_norm": 0.12111581861972809, "learning_rate": 0.00023574002699361878, "loss": 2.0229, "step": 276760 }, { "epoch": 1.053454930231496, "grad_norm": 0.1492808610200882, "learning_rate": 0.00023566120224227395, "loss": 2.0413, "step": 276770 }, { "epoch": 1.0534929926996186, "grad_norm": 0.14243555068969727, "learning_rate": 0.0002355824009891424, "loss": 2.016, "step": 276780 }, { "epoch": 1.0535310551677413, "grad_norm": 0.13206037878990173, "learning_rate": 0.00023550362321322183, "loss": 2.0301, "step": 276790 }, { "epoch": 1.053569117635864, "grad_norm": 0.133035346865654, "learning_rate": 0.0002354248688935409, "loss": 2.0395, "step": 276800 }, { "epoch": 1.0536071801039866, "grad_norm": 0.14485926926136017, "learning_rate": 0.00023534613800915984, "loss": 2.0231, "step": 276810 }, { "epoch": 1.0536452425721095, "grad_norm": 0.12455225735902786, "learning_rate": 0.00023526743053916971, "loss": 2.0061, "step": 276820 }, { "epoch": 1.053683305040232, "grad_norm": 0.15578719973564148, "learning_rate": 0.0002351887464626928, "loss": 2.0313, "step": 276830 }, { "epoch": 1.0537213675083548, "grad_norm": 0.14091117680072784, "learning_rate": 0.00023511008575888233, "loss": 2.0345, "step": 276840 }, { "epoch": 1.0537594299764774, "grad_norm": 0.14255601167678833, "learning_rate": 0.0002350314484069226, "loss": 2.0358, "step": 276850 }, { "epoch": 1.0537974924446, "grad_norm": 0.13675963878631592, "learning_rate": 0.00023495283438602855, "loss": 2.0202, "step": 276860 }, { "epoch": 1.0538355549127227, "grad_norm": 0.1342182606458664, "learning_rate": 0.00023487424367544624, "loss": 2.0126, "step": 276870 }, { "epoch": 1.0538736173808454, "grad_norm": 0.12606917321681976, "learning_rate": 0.0002347956762544522, "loss": 2.037, "step": 276880 }, { "epoch": 1.053911679848968, "grad_norm": 0.13598474860191345, "learning_rate": 0.00023471713210235386, "loss": 2.02, "step": 276890 }, { "epoch": 1.0539497423170907, "grad_norm": 0.13164277374744415, "learning_rate": 0.00023463861119848905, "loss": 2.0234, "step": 276900 }, { "epoch": 1.0539878047852136, "grad_norm": 0.1325596272945404, "learning_rate": 0.0002345601135222264, "loss": 2.036, "step": 276910 }, { "epoch": 1.0540258672533362, "grad_norm": 0.1430538147687912, "learning_rate": 0.00023448163905296493, "loss": 2.0429, "step": 276920 }, { "epoch": 1.054063929721459, "grad_norm": 0.1567510962486267, "learning_rate": 0.00023440318777013408, "loss": 2.0223, "step": 276930 }, { "epoch": 1.0541019921895816, "grad_norm": 0.16093851625919342, "learning_rate": 0.0002343247596531936, "loss": 2.0205, "step": 276940 }, { "epoch": 1.0541400546577042, "grad_norm": 0.13100646436214447, "learning_rate": 0.0002342463546816338, "loss": 2.027, "step": 276950 }, { "epoch": 1.0541781171258269, "grad_norm": 0.14721788465976715, "learning_rate": 0.00023416797283497487, "loss": 2.0404, "step": 276960 }, { "epoch": 1.0542161795939495, "grad_norm": 0.13433021306991577, "learning_rate": 0.00023408961409276746, "loss": 2.033, "step": 276970 }, { "epoch": 1.0542542420620722, "grad_norm": 0.13327881693840027, "learning_rate": 0.0002340112784345923, "loss": 2.0492, "step": 276980 }, { "epoch": 1.054292304530195, "grad_norm": 0.13001398742198944, "learning_rate": 0.00023393296584006, "loss": 2.0264, "step": 276990 }, { "epoch": 1.0543303669983177, "grad_norm": 0.15639451146125793, "learning_rate": 0.00023385467628881147, "loss": 2.036, "step": 277000 }, { "epoch": 1.0543684294664404, "grad_norm": 0.1349909007549286, "learning_rate": 0.00023377640976051728, "loss": 2.0342, "step": 277010 }, { "epoch": 1.054406491934563, "grad_norm": 0.13593465089797974, "learning_rate": 0.000233698166234878, "loss": 2.0304, "step": 277020 }, { "epoch": 1.0544445544026857, "grad_norm": 0.13852593302726746, "learning_rate": 0.00023361994569162404, "loss": 2.0327, "step": 277030 }, { "epoch": 1.0544826168708084, "grad_norm": 0.1467350423336029, "learning_rate": 0.0002335417481105155, "loss": 2.0256, "step": 277040 }, { "epoch": 1.054520679338931, "grad_norm": 0.1416371613740921, "learning_rate": 0.00023346357347134205, "loss": 2.0358, "step": 277050 }, { "epoch": 1.0545587418070537, "grad_norm": 0.14200444519519806, "learning_rate": 0.00023338542175392336, "loss": 2.0335, "step": 277060 }, { "epoch": 1.0545968042751763, "grad_norm": 0.13185830414295197, "learning_rate": 0.00023330729293810827, "loss": 2.0144, "step": 277070 }, { "epoch": 1.0546348667432992, "grad_norm": 0.1465560793876648, "learning_rate": 0.0002332291870037753, "loss": 2.0302, "step": 277080 }, { "epoch": 1.0546729292114219, "grad_norm": 0.12864398956298828, "learning_rate": 0.00023315110393083255, "loss": 2.0304, "step": 277090 }, { "epoch": 1.0547109916795445, "grad_norm": 0.14548641443252563, "learning_rate": 0.00023307304369921721, "loss": 2.0303, "step": 277100 }, { "epoch": 1.0547490541476672, "grad_norm": 0.14383628964424133, "learning_rate": 0.00023299500628889603, "loss": 2.0268, "step": 277110 }, { "epoch": 1.0547871166157898, "grad_norm": 0.15194641053676605, "learning_rate": 0.00023291699167986497, "loss": 2.0306, "step": 277120 }, { "epoch": 1.0548251790839125, "grad_norm": 0.13964703679084778, "learning_rate": 0.00023283899985214907, "loss": 2.0204, "step": 277130 }, { "epoch": 1.0548632415520351, "grad_norm": 0.135025292634964, "learning_rate": 0.00023276103078580273, "loss": 2.0271, "step": 277140 }, { "epoch": 1.0549013040201578, "grad_norm": 0.14040318131446838, "learning_rate": 0.00023268308446090934, "loss": 2.0271, "step": 277150 }, { "epoch": 1.0549393664882805, "grad_norm": 0.7150856256484985, "learning_rate": 0.00023260516085758126, "loss": 2.0461, "step": 277160 }, { "epoch": 1.0549774289564033, "grad_norm": 0.1341201215982437, "learning_rate": 0.00023252725995595987, "loss": 2.0262, "step": 277170 }, { "epoch": 1.055015491424526, "grad_norm": 0.1424829214811325, "learning_rate": 0.0002324493817362155, "loss": 2.0297, "step": 277180 }, { "epoch": 1.0550535538926487, "grad_norm": 0.16039422154426575, "learning_rate": 0.00023237152617854728, "loss": 2.0216, "step": 277190 }, { "epoch": 1.0550916163607713, "grad_norm": 0.1337854564189911, "learning_rate": 0.00023229369326318317, "loss": 2.0255, "step": 277200 }, { "epoch": 1.055129678828894, "grad_norm": 0.1322571486234665, "learning_rate": 0.00023221588297037983, "loss": 2.0318, "step": 277210 }, { "epoch": 1.0551677412970166, "grad_norm": 0.13549922406673431, "learning_rate": 0.00023213809528042255, "loss": 2.0272, "step": 277220 }, { "epoch": 1.0552058037651393, "grad_norm": 0.13208764791488647, "learning_rate": 0.00023206033017362534, "loss": 2.0349, "step": 277230 }, { "epoch": 1.055243866233262, "grad_norm": 0.1279682070016861, "learning_rate": 0.00023198258763033075, "loss": 2.0268, "step": 277240 }, { "epoch": 1.0552819287013848, "grad_norm": 0.13463786244392395, "learning_rate": 0.00023190486763090978, "loss": 2.0357, "step": 277250 }, { "epoch": 1.0553199911695075, "grad_norm": 0.13447913527488708, "learning_rate": 0.00023182717015576195, "loss": 2.0129, "step": 277260 }, { "epoch": 1.0553580536376301, "grad_norm": 0.13115116953849792, "learning_rate": 0.00023174949518531501, "loss": 2.04, "step": 277270 }, { "epoch": 1.0553961161057528, "grad_norm": 0.12983772158622742, "learning_rate": 0.00023167184270002524, "loss": 2.0406, "step": 277280 }, { "epoch": 1.0554341785738754, "grad_norm": 0.15062974393367767, "learning_rate": 0.00023159421268037706, "loss": 2.0226, "step": 277290 }, { "epoch": 1.055472241041998, "grad_norm": 0.15623483061790466, "learning_rate": 0.00023151660510688316, "loss": 2.023, "step": 277300 }, { "epoch": 1.0555103035101208, "grad_norm": 0.1440696120262146, "learning_rate": 0.00023143901996008432, "loss": 2.0077, "step": 277310 }, { "epoch": 1.0555483659782434, "grad_norm": 0.15217626094818115, "learning_rate": 0.00023136145722054948, "loss": 2.0199, "step": 277320 }, { "epoch": 1.055586428446366, "grad_norm": 0.12807799875736237, "learning_rate": 0.00023128391686887568, "loss": 2.0236, "step": 277330 }, { "epoch": 1.055624490914489, "grad_norm": 0.1437712162733078, "learning_rate": 0.0002312063988856878, "loss": 2.0185, "step": 277340 }, { "epoch": 1.0556625533826116, "grad_norm": 0.13763098418712616, "learning_rate": 0.0002311289032516387, "loss": 2.015, "step": 277350 }, { "epoch": 1.0557006158507343, "grad_norm": 0.13203462958335876, "learning_rate": 0.00023105142994740924, "loss": 2.0194, "step": 277360 }, { "epoch": 1.055738678318857, "grad_norm": 0.1410224586725235, "learning_rate": 0.00023097397895370793, "loss": 2.0335, "step": 277370 }, { "epoch": 1.0557767407869796, "grad_norm": 0.1614682972431183, "learning_rate": 0.0002308965502512711, "loss": 2.008, "step": 277380 }, { "epoch": 1.0558148032551022, "grad_norm": 0.13247433304786682, "learning_rate": 0.00023081914382086283, "loss": 2.0175, "step": 277390 }, { "epoch": 1.055852865723225, "grad_norm": 0.14368799328804016, "learning_rate": 0.00023074175964327482, "loss": 2.0175, "step": 277400 }, { "epoch": 1.0558909281913476, "grad_norm": 0.14326095581054688, "learning_rate": 0.0002306643976993263, "loss": 2.0171, "step": 277410 }, { "epoch": 1.0559289906594702, "grad_norm": 0.16751118004322052, "learning_rate": 0.00023058705796986419, "loss": 2.0364, "step": 277420 }, { "epoch": 1.055967053127593, "grad_norm": 0.13226917386054993, "learning_rate": 0.00023050974043576272, "loss": 2.0149, "step": 277430 }, { "epoch": 1.0560051155957157, "grad_norm": 0.13986603915691376, "learning_rate": 0.0002304324450779236, "loss": 2.0374, "step": 277440 }, { "epoch": 1.0560431780638384, "grad_norm": 0.14492225646972656, "learning_rate": 0.00023035517187727607, "loss": 2.0419, "step": 277450 }, { "epoch": 1.056081240531961, "grad_norm": 0.13461005687713623, "learning_rate": 0.00023027792081477643, "loss": 2.0392, "step": 277460 }, { "epoch": 1.0561193030000837, "grad_norm": 0.17020630836486816, "learning_rate": 0.00023020069187140846, "loss": 2.02, "step": 277470 }, { "epoch": 1.0561573654682064, "grad_norm": 0.13969464600086212, "learning_rate": 0.00023012348502818308, "loss": 2.0317, "step": 277480 }, { "epoch": 1.056195427936329, "grad_norm": 0.1441717892885208, "learning_rate": 0.00023004630026613826, "loss": 2.0206, "step": 277490 }, { "epoch": 1.0562334904044517, "grad_norm": 0.12981465458869934, "learning_rate": 0.00022996913756633913, "loss": 2.0178, "step": 277500 }, { "epoch": 1.0562715528725746, "grad_norm": 0.16938796639442444, "learning_rate": 0.00022989199690987805, "loss": 2.0176, "step": 277510 }, { "epoch": 1.0563096153406972, "grad_norm": 0.15160812437534332, "learning_rate": 0.00022981487827787413, "loss": 2.0262, "step": 277520 }, { "epoch": 1.0563476778088199, "grad_norm": 0.12253037095069885, "learning_rate": 0.0002297377816514734, "loss": 2.0199, "step": 277530 }, { "epoch": 1.0563857402769425, "grad_norm": 0.1367233842611313, "learning_rate": 0.00022966070701184904, "loss": 2.0395, "step": 277540 }, { "epoch": 1.0564238027450652, "grad_norm": 0.13288183510303497, "learning_rate": 0.00022958365434020084, "loss": 2.0296, "step": 277550 }, { "epoch": 1.0564618652131879, "grad_norm": 0.11826697736978531, "learning_rate": 0.00022950662361775537, "loss": 2.0217, "step": 277560 }, { "epoch": 1.0564999276813105, "grad_norm": 0.12652520835399628, "learning_rate": 0.00022942961482576602, "loss": 2.0218, "step": 277570 }, { "epoch": 1.0565379901494332, "grad_norm": 0.1527426689863205, "learning_rate": 0.00022935262794551286, "loss": 2.0275, "step": 277580 }, { "epoch": 1.0565760526175558, "grad_norm": 0.13205711543560028, "learning_rate": 0.00022927566295830243, "loss": 2.0192, "step": 277590 }, { "epoch": 1.0566141150856787, "grad_norm": 0.13617442548274994, "learning_rate": 0.00022919871984546797, "loss": 2.038, "step": 277600 }, { "epoch": 1.0566521775538014, "grad_norm": 0.13269749283790588, "learning_rate": 0.00022912179858836922, "loss": 2.0294, "step": 277610 }, { "epoch": 1.056690240021924, "grad_norm": 0.1346275359392166, "learning_rate": 0.00022904489916839233, "loss": 2.0185, "step": 277620 }, { "epoch": 1.0567283024900467, "grad_norm": 0.14745649695396423, "learning_rate": 0.0002289680215669499, "loss": 2.0402, "step": 277630 }, { "epoch": 1.0567663649581693, "grad_norm": 0.1354425996541977, "learning_rate": 0.00022889116576548085, "loss": 2.0331, "step": 277640 }, { "epoch": 1.056804427426292, "grad_norm": 0.13665327429771423, "learning_rate": 0.00022881433174545035, "loss": 2.0087, "step": 277650 }, { "epoch": 1.0568424898944146, "grad_norm": 0.1539125293493271, "learning_rate": 0.00022873751948834992, "loss": 2.0321, "step": 277660 }, { "epoch": 1.0568805523625373, "grad_norm": 0.13223835825920105, "learning_rate": 0.0002286607289756973, "loss": 2.0371, "step": 277670 }, { "epoch": 1.0569186148306602, "grad_norm": 0.1493256539106369, "learning_rate": 0.00022858396018903622, "loss": 2.023, "step": 277680 }, { "epoch": 1.0569566772987828, "grad_norm": 0.13801084458827972, "learning_rate": 0.0002285072131099367, "loss": 2.0171, "step": 277690 }, { "epoch": 1.0569947397669055, "grad_norm": 0.14287050068378448, "learning_rate": 0.00022843048771999463, "loss": 2.0375, "step": 277700 }, { "epoch": 1.0570328022350282, "grad_norm": 0.15522553026676178, "learning_rate": 0.00022835378400083196, "loss": 2.015, "step": 277710 }, { "epoch": 1.0570708647031508, "grad_norm": 0.13880471885204315, "learning_rate": 0.00022827710193409662, "loss": 2.0132, "step": 277720 }, { "epoch": 1.0571089271712735, "grad_norm": 0.13956932723522186, "learning_rate": 0.00022820044150146236, "loss": 2.0146, "step": 277730 }, { "epoch": 1.0571469896393961, "grad_norm": 0.12712182104587555, "learning_rate": 0.00022812380268462878, "loss": 2.0262, "step": 277740 }, { "epoch": 1.0571850521075188, "grad_norm": 0.18996763229370117, "learning_rate": 0.00022804718546532132, "loss": 2.027, "step": 277750 }, { "epoch": 1.0572231145756414, "grad_norm": 0.16294504702091217, "learning_rate": 0.00022797058982529112, "loss": 2.0087, "step": 277760 }, { "epoch": 1.0572611770437643, "grad_norm": 0.12692804634571075, "learning_rate": 0.00022789401574631503, "loss": 2.0337, "step": 277770 }, { "epoch": 1.057299239511887, "grad_norm": 0.1276542842388153, "learning_rate": 0.0002278174632101954, "loss": 2.0268, "step": 277780 }, { "epoch": 1.0573373019800096, "grad_norm": 0.14247260987758636, "learning_rate": 0.00022774093219876036, "loss": 2.0021, "step": 277790 }, { "epoch": 1.0573753644481323, "grad_norm": 0.13470324873924255, "learning_rate": 0.00022766442269386345, "loss": 2.0288, "step": 277800 }, { "epoch": 1.057413426916255, "grad_norm": 0.13499245047569275, "learning_rate": 0.00022758793467738376, "loss": 2.0175, "step": 277810 }, { "epoch": 1.0574514893843776, "grad_norm": 0.13271304965019226, "learning_rate": 0.00022751146813122574, "loss": 2.032, "step": 277820 }, { "epoch": 1.0574895518525003, "grad_norm": 0.12108422815799713, "learning_rate": 0.00022743502303731927, "loss": 2.0218, "step": 277830 }, { "epoch": 1.057527614320623, "grad_norm": 0.15185122191905975, "learning_rate": 0.0002273585993776196, "loss": 2.0304, "step": 277840 }, { "epoch": 1.0575656767887458, "grad_norm": 0.13970841467380524, "learning_rate": 0.00022728219713410714, "loss": 2.0184, "step": 277850 }, { "epoch": 1.0576037392568685, "grad_norm": 0.15076260268688202, "learning_rate": 0.00022720581628878767, "loss": 2.0196, "step": 277860 }, { "epoch": 1.0576418017249911, "grad_norm": 0.14045163989067078, "learning_rate": 0.00022712945682369206, "loss": 2.0299, "step": 277870 }, { "epoch": 1.0576798641931138, "grad_norm": 0.14571724832057953, "learning_rate": 0.00022705311872087643, "loss": 2.0213, "step": 277880 }, { "epoch": 1.0577179266612364, "grad_norm": 0.14794188737869263, "learning_rate": 0.00022697680196242175, "loss": 2.0362, "step": 277890 }, { "epoch": 1.057755989129359, "grad_norm": 0.13847602903842926, "learning_rate": 0.00022690050653043436, "loss": 2.0325, "step": 277900 }, { "epoch": 1.0577940515974817, "grad_norm": 0.13841918110847473, "learning_rate": 0.0002268242324070453, "loss": 2.034, "step": 277910 }, { "epoch": 1.0578321140656044, "grad_norm": 0.1277417689561844, "learning_rate": 0.00022674797957441067, "loss": 2.0145, "step": 277920 }, { "epoch": 1.057870176533727, "grad_norm": 0.1296491026878357, "learning_rate": 0.00022667174801471157, "loss": 2.0274, "step": 277930 }, { "epoch": 1.05790823900185, "grad_norm": 0.15428663790225983, "learning_rate": 0.00022659553771015367, "loss": 2.0266, "step": 277940 }, { "epoch": 1.0579463014699726, "grad_norm": 0.15810316801071167, "learning_rate": 0.0002265193486429677, "loss": 2.0248, "step": 277950 }, { "epoch": 1.0579843639380953, "grad_norm": 0.12902303040027618, "learning_rate": 0.00022644318079540894, "loss": 2.0204, "step": 277960 }, { "epoch": 1.058022426406218, "grad_norm": 0.13099555671215057, "learning_rate": 0.00022636703414975745, "loss": 2.0227, "step": 277970 }, { "epoch": 1.0580604888743406, "grad_norm": 0.14858795702457428, "learning_rate": 0.00022629090868831804, "loss": 2.0268, "step": 277980 }, { "epoch": 1.0580985513424632, "grad_norm": 0.13814105093479156, "learning_rate": 0.0002262148043934199, "loss": 2.0149, "step": 277990 }, { "epoch": 1.0581366138105859, "grad_norm": 0.13135714828968048, "learning_rate": 0.00022613872124741696, "loss": 2.0325, "step": 278000 }, { "epoch": 1.0581746762787085, "grad_norm": 0.11998411267995834, "learning_rate": 0.00022606265923268753, "loss": 2.0323, "step": 278010 }, { "epoch": 1.0582127387468312, "grad_norm": 0.13203677535057068, "learning_rate": 0.00022598661833163453, "loss": 2.0301, "step": 278020 }, { "epoch": 1.058250801214954, "grad_norm": 0.15732504427433014, "learning_rate": 0.00022591059852668512, "loss": 2.0235, "step": 278030 }, { "epoch": 1.0582888636830767, "grad_norm": 0.13207469880580902, "learning_rate": 0.00022583459980029088, "loss": 2.027, "step": 278040 }, { "epoch": 1.0583269261511994, "grad_norm": 0.16607141494750977, "learning_rate": 0.00022575862213492775, "loss": 2.0105, "step": 278050 }, { "epoch": 1.058364988619322, "grad_norm": 0.13722389936447144, "learning_rate": 0.00022568266551309597, "loss": 2.0212, "step": 278060 }, { "epoch": 1.0584030510874447, "grad_norm": 0.13708582520484924, "learning_rate": 0.00022560672991731983, "loss": 2.0176, "step": 278070 }, { "epoch": 1.0584411135555674, "grad_norm": 0.12867718935012817, "learning_rate": 0.00022553081533014798, "loss": 2.024, "step": 278080 }, { "epoch": 1.05847917602369, "grad_norm": 0.12874118983745575, "learning_rate": 0.0002254549217341531, "loss": 2.0304, "step": 278090 }, { "epoch": 1.0585172384918127, "grad_norm": 0.14795273542404175, "learning_rate": 0.00022537904911193197, "loss": 2.0174, "step": 278100 }, { "epoch": 1.0585553009599356, "grad_norm": 0.1385265588760376, "learning_rate": 0.00022530319744610544, "loss": 2.0182, "step": 278110 }, { "epoch": 1.0585933634280582, "grad_norm": 0.1351693719625473, "learning_rate": 0.00022522736671931826, "loss": 2.0282, "step": 278120 }, { "epoch": 1.0586314258961809, "grad_norm": 0.1506417840719223, "learning_rate": 0.00022515155691423927, "loss": 2.0175, "step": 278130 }, { "epoch": 1.0586694883643035, "grad_norm": 0.15787288546562195, "learning_rate": 0.000225075768013561, "loss": 2.017, "step": 278140 }, { "epoch": 1.0587075508324262, "grad_norm": 0.16656358540058136, "learning_rate": 0.000225, "loss": 2.0143, "step": 278150 }, { "epoch": 1.0587456133005488, "grad_norm": 0.14569364488124847, "learning_rate": 0.00022492425285629657, "loss": 2.027, "step": 278160 }, { "epoch": 1.0587836757686715, "grad_norm": 0.15189801156520844, "learning_rate": 0.00022484852656521476, "loss": 2.0172, "step": 278170 }, { "epoch": 1.0588217382367942, "grad_norm": 0.13380587100982666, "learning_rate": 0.00022477282110954235, "loss": 2.0282, "step": 278180 }, { "epoch": 1.0588598007049168, "grad_norm": 0.13768541812896729, "learning_rate": 0.00022469713647209067, "loss": 2.0224, "step": 278190 }, { "epoch": 1.0588978631730397, "grad_norm": 0.1390744000673294, "learning_rate": 0.00022462147263569487, "loss": 2.0329, "step": 278200 }, { "epoch": 1.0589359256411623, "grad_norm": 0.16329453885555267, "learning_rate": 0.00022454582958321363, "loss": 2.0321, "step": 278210 }, { "epoch": 1.058973988109285, "grad_norm": 0.16884244978427887, "learning_rate": 0.000224470207297529, "loss": 2.0298, "step": 278220 }, { "epoch": 1.0590120505774077, "grad_norm": 0.12581200897693634, "learning_rate": 0.00022439460576154658, "loss": 2.0398, "step": 278230 }, { "epoch": 1.0590501130455303, "grad_norm": 1.0800994634628296, "learning_rate": 0.00022431902495819556, "loss": 2.02, "step": 278240 }, { "epoch": 1.059088175513653, "grad_norm": 0.13833831250667572, "learning_rate": 0.0002242434648704284, "loss": 2.0356, "step": 278250 }, { "epoch": 1.0591262379817756, "grad_norm": 0.1597747802734375, "learning_rate": 0.00022416792548122084, "loss": 2.0125, "step": 278260 }, { "epoch": 1.0591643004498983, "grad_norm": 0.16858337819576263, "learning_rate": 0.0002240924067735721, "loss": 2.027, "step": 278270 }, { "epoch": 1.059202362918021, "grad_norm": 0.1446356326341629, "learning_rate": 0.00022401690873050456, "loss": 2.0293, "step": 278280 }, { "epoch": 1.0592404253861438, "grad_norm": 0.14237236976623535, "learning_rate": 0.00022394143133506374, "loss": 2.0253, "step": 278290 }, { "epoch": 1.0592784878542665, "grad_norm": 0.14626847207546234, "learning_rate": 0.00022386597457031847, "loss": 2.0134, "step": 278300 }, { "epoch": 1.0593165503223891, "grad_norm": 0.12890249490737915, "learning_rate": 0.0002237905384193607, "loss": 2.0212, "step": 278310 }, { "epoch": 1.0593546127905118, "grad_norm": 0.14841724932193756, "learning_rate": 0.00022371512286530531, "loss": 2.0129, "step": 278320 }, { "epoch": 1.0593926752586345, "grad_norm": 0.145298570394516, "learning_rate": 0.00022363972789129043, "loss": 2.0183, "step": 278330 }, { "epoch": 1.059430737726757, "grad_norm": 0.15068456530570984, "learning_rate": 0.00022356435348047706, "loss": 2.0285, "step": 278340 }, { "epoch": 1.0594688001948798, "grad_norm": 0.1366945505142212, "learning_rate": 0.00022348899961604902, "loss": 2.0372, "step": 278350 }, { "epoch": 1.0595068626630024, "grad_norm": 0.13482408225536346, "learning_rate": 0.0002234136662812134, "loss": 2.0077, "step": 278360 }, { "epoch": 1.0595449251311253, "grad_norm": 0.13060268759727478, "learning_rate": 0.00022333835345919977, "loss": 2.0194, "step": 278370 }, { "epoch": 1.059582987599248, "grad_norm": 0.16523779928684235, "learning_rate": 0.00022326306113326083, "loss": 2.0315, "step": 278380 }, { "epoch": 1.0596210500673706, "grad_norm": 0.13515925407409668, "learning_rate": 0.00022318778928667182, "loss": 2.0185, "step": 278390 }, { "epoch": 1.0596591125354933, "grad_norm": 0.13461363315582275, "learning_rate": 0.00022311253790273085, "loss": 2.0273, "step": 278400 }, { "epoch": 1.059697175003616, "grad_norm": 0.12945568561553955, "learning_rate": 0.00022303730696475866, "loss": 2.0309, "step": 278410 }, { "epoch": 1.0597352374717386, "grad_norm": 0.13417372107505798, "learning_rate": 0.00022296209645609867, "loss": 2.0159, "step": 278420 }, { "epoch": 1.0597732999398612, "grad_norm": 0.14353212714195251, "learning_rate": 0.00022288690636011684, "loss": 2.0281, "step": 278430 }, { "epoch": 1.059811362407984, "grad_norm": 0.1466580629348755, "learning_rate": 0.00022281173666020176, "loss": 2.0147, "step": 278440 }, { "epoch": 1.0598494248761066, "grad_norm": 0.15297137200832367, "learning_rate": 0.0002227365873397646, "loss": 2.031, "step": 278450 }, { "epoch": 1.0598874873442294, "grad_norm": 0.1253899484872818, "learning_rate": 0.00022266145838223877, "loss": 2.0232, "step": 278460 }, { "epoch": 1.059925549812352, "grad_norm": 0.14415845274925232, "learning_rate": 0.00022258634977108044, "loss": 2.0253, "step": 278470 }, { "epoch": 1.0599636122804748, "grad_norm": 0.14856302738189697, "learning_rate": 0.00022251126148976785, "loss": 2.0118, "step": 278480 }, { "epoch": 1.0600016747485974, "grad_norm": 0.13965021073818207, "learning_rate": 0.00022243619352180178, "loss": 2.0216, "step": 278490 }, { "epoch": 1.06003973721672, "grad_norm": 0.13246068358421326, "learning_rate": 0.00022236114585070531, "loss": 2.0192, "step": 278500 }, { "epoch": 1.0600777996848427, "grad_norm": 0.16952328383922577, "learning_rate": 0.00022228611846002367, "loss": 2.0101, "step": 278510 }, { "epoch": 1.0601158621529654, "grad_norm": 0.1641233265399933, "learning_rate": 0.00022221111133332444, "loss": 2.0194, "step": 278520 }, { "epoch": 1.060153924621088, "grad_norm": 0.14600826799869537, "learning_rate": 0.00022213612445419728, "loss": 2.0246, "step": 278530 }, { "epoch": 1.060191987089211, "grad_norm": 0.1428350955247879, "learning_rate": 0.00022206115780625408, "loss": 2.0211, "step": 278540 }, { "epoch": 1.0602300495573336, "grad_norm": 0.13691508769989014, "learning_rate": 0.00022198621137312874, "loss": 2.0261, "step": 278550 }, { "epoch": 1.0602681120254562, "grad_norm": 0.17456191778182983, "learning_rate": 0.00022191128513847718, "loss": 2.0111, "step": 278560 }, { "epoch": 1.060306174493579, "grad_norm": 0.15665015578269958, "learning_rate": 0.00022183637908597753, "loss": 2.0345, "step": 278570 }, { "epoch": 1.0603442369617015, "grad_norm": 0.14208164811134338, "learning_rate": 0.0002217614931993297, "loss": 2.0317, "step": 278580 }, { "epoch": 1.0603822994298242, "grad_norm": 0.15451577305793762, "learning_rate": 0.00022168662746225554, "loss": 2.0137, "step": 278590 }, { "epoch": 1.0604203618979469, "grad_norm": 0.1741928905248642, "learning_rate": 0.0002216117818584989, "loss": 2.0278, "step": 278600 }, { "epoch": 1.0604584243660695, "grad_norm": 0.16310615837574005, "learning_rate": 0.00022153695637182542, "loss": 2.0301, "step": 278610 }, { "epoch": 1.0604964868341922, "grad_norm": 0.13577622175216675, "learning_rate": 0.00022146215098602245, "loss": 2.0235, "step": 278620 }, { "epoch": 1.060534549302315, "grad_norm": 0.13898788392543793, "learning_rate": 0.00022138736568489935, "loss": 2.0281, "step": 278630 }, { "epoch": 1.0605726117704377, "grad_norm": 0.12679430842399597, "learning_rate": 0.00022131260045228697, "loss": 2.0204, "step": 278640 }, { "epoch": 1.0606106742385604, "grad_norm": 0.14395207166671753, "learning_rate": 0.0002212378552720378, "loss": 2.0266, "step": 278650 }, { "epoch": 1.060648736706683, "grad_norm": 0.13999640941619873, "learning_rate": 0.0002211631301280263, "loss": 2.0246, "step": 278660 }, { "epoch": 1.0606867991748057, "grad_norm": 0.19303567707538605, "learning_rate": 0.00022108842500414823, "loss": 2.0222, "step": 278670 }, { "epoch": 1.0607248616429283, "grad_norm": 0.13559174537658691, "learning_rate": 0.00022101373988432093, "loss": 2.0236, "step": 278680 }, { "epoch": 1.060762924111051, "grad_norm": 0.13610365986824036, "learning_rate": 0.0002209390747524835, "loss": 2.0226, "step": 278690 }, { "epoch": 1.0608009865791737, "grad_norm": 0.12796823680400848, "learning_rate": 0.00022086442959259623, "loss": 2.0216, "step": 278700 }, { "epoch": 1.0608390490472965, "grad_norm": 0.14899617433547974, "learning_rate": 0.00022078980438864105, "loss": 2.0291, "step": 278710 }, { "epoch": 1.0608771115154192, "grad_norm": 0.13698174059391022, "learning_rate": 0.0002207151991246212, "loss": 2.0272, "step": 278720 }, { "epoch": 1.0609151739835418, "grad_norm": 0.14914453029632568, "learning_rate": 0.0002206406137845613, "loss": 2.0174, "step": 278730 }, { "epoch": 1.0609532364516645, "grad_norm": 0.14008817076683044, "learning_rate": 0.00022056604835250722, "loss": 2.0165, "step": 278740 }, { "epoch": 1.0609912989197872, "grad_norm": 0.12607118487358093, "learning_rate": 0.00022049150281252628, "loss": 2.0137, "step": 278750 }, { "epoch": 1.0610293613879098, "grad_norm": 0.1694176197052002, "learning_rate": 0.00022041697714870695, "loss": 2.0284, "step": 278760 }, { "epoch": 1.0610674238560325, "grad_norm": 0.17194348573684692, "learning_rate": 0.00022034247134515878, "loss": 2.0313, "step": 278770 }, { "epoch": 1.0611054863241551, "grad_norm": 0.14293989539146423, "learning_rate": 0.00022026798538601273, "loss": 2.0262, "step": 278780 }, { "epoch": 1.0611435487922778, "grad_norm": 0.16035741567611694, "learning_rate": 0.00022019351925542063, "loss": 2.0306, "step": 278790 }, { "epoch": 1.0611816112604007, "grad_norm": 0.1362650990486145, "learning_rate": 0.00022011907293755556, "loss": 2.0145, "step": 278800 }, { "epoch": 1.0612196737285233, "grad_norm": 0.14739075303077698, "learning_rate": 0.00022004464641661164, "loss": 2.024, "step": 278810 }, { "epoch": 1.061257736196646, "grad_norm": 0.13756489753723145, "learning_rate": 0.0002199702396768039, "loss": 2.0093, "step": 278820 }, { "epoch": 1.0612957986647686, "grad_norm": 0.1316682994365692, "learning_rate": 0.00021989585270236833, "loss": 2.0122, "step": 278830 }, { "epoch": 1.0613338611328913, "grad_norm": 0.15358810126781464, "learning_rate": 0.00021982148547756202, "loss": 2.0323, "step": 278840 }, { "epoch": 1.061371923601014, "grad_norm": 0.12710179388523102, "learning_rate": 0.00021974713798666274, "loss": 2.0295, "step": 278850 }, { "epoch": 1.0614099860691366, "grad_norm": 0.12713980674743652, "learning_rate": 0.00021967281021396918, "loss": 2.0232, "step": 278860 }, { "epoch": 1.0614480485372593, "grad_norm": 0.16448360681533813, "learning_rate": 0.00021959850214380095, "loss": 2.0301, "step": 278870 }, { "epoch": 1.061486111005382, "grad_norm": 0.13466374576091766, "learning_rate": 0.0002195242137604983, "loss": 2.0257, "step": 278880 }, { "epoch": 1.0615241734735048, "grad_norm": 0.13751991093158722, "learning_rate": 0.0002194499450484222, "loss": 2.008, "step": 278890 }, { "epoch": 1.0615622359416275, "grad_norm": 0.13438063859939575, "learning_rate": 0.0002193756959919544, "loss": 2.0221, "step": 278900 }, { "epoch": 1.0616002984097501, "grad_norm": 0.13424231112003326, "learning_rate": 0.00021930146657549732, "loss": 2.0294, "step": 278910 }, { "epoch": 1.0616383608778728, "grad_norm": 0.13700388371944427, "learning_rate": 0.00021922725678347383, "loss": 2.0323, "step": 278920 }, { "epoch": 1.0616764233459954, "grad_norm": 0.13141176104545593, "learning_rate": 0.00021915306660032764, "loss": 2.0182, "step": 278930 }, { "epoch": 1.061714485814118, "grad_norm": 0.1494986116886139, "learning_rate": 0.00021907889601052282, "loss": 2.0313, "step": 278940 }, { "epoch": 1.0617525482822407, "grad_norm": 0.1353265792131424, "learning_rate": 0.00021900474499854389, "loss": 2.0058, "step": 278950 }, { "epoch": 1.0617906107503634, "grad_norm": 0.1495896428823471, "learning_rate": 0.0002189306135488961, "loss": 2.0244, "step": 278960 }, { "epoch": 1.0618286732184863, "grad_norm": 0.12826865911483765, "learning_rate": 0.00021885650164610483, "loss": 2.0158, "step": 278970 }, { "epoch": 1.061866735686609, "grad_norm": 0.13081242144107819, "learning_rate": 0.000218782409274716, "loss": 2.0146, "step": 278980 }, { "epoch": 1.0619047981547316, "grad_norm": 0.16037730872631073, "learning_rate": 0.00021870833641929595, "loss": 2.0065, "step": 278990 }, { "epoch": 1.0619428606228543, "grad_norm": 0.16274122893810272, "learning_rate": 0.00021863428306443113, "loss": 2.0105, "step": 279000 }, { "epoch": 1.061980923090977, "grad_norm": 0.1254984587430954, "learning_rate": 0.00021856024919472843, "loss": 2.0208, "step": 279010 }, { "epoch": 1.0620189855590996, "grad_norm": 0.13238771259784698, "learning_rate": 0.00021848623479481504, "loss": 2.0118, "step": 279020 }, { "epoch": 1.0620570480272222, "grad_norm": 0.12015458941459656, "learning_rate": 0.00021841223984933817, "loss": 2.023, "step": 279030 }, { "epoch": 1.0620951104953449, "grad_norm": 0.1459268033504486, "learning_rate": 0.0002183382643429652, "loss": 2.0236, "step": 279040 }, { "epoch": 1.0621331729634675, "grad_norm": 0.13737080991268158, "learning_rate": 0.0002182643082603839, "loss": 2.0123, "step": 279050 }, { "epoch": 1.0621712354315904, "grad_norm": 0.14364588260650635, "learning_rate": 0.0002181903715863018, "loss": 2.0331, "step": 279060 }, { "epoch": 1.062209297899713, "grad_norm": 0.133913055062294, "learning_rate": 0.0002181164543054467, "loss": 2.016, "step": 279070 }, { "epoch": 1.0622473603678357, "grad_norm": 0.14843854308128357, "learning_rate": 0.00021804255640256632, "loss": 2.0346, "step": 279080 }, { "epoch": 1.0622854228359584, "grad_norm": 0.12832611799240112, "learning_rate": 0.00021796867786242846, "loss": 2.0163, "step": 279090 }, { "epoch": 1.062323485304081, "grad_norm": 0.13493861258029938, "learning_rate": 0.00021789481866982068, "loss": 2.0199, "step": 279100 }, { "epoch": 1.0623615477722037, "grad_norm": 0.1524580419063568, "learning_rate": 0.00021782097880955077, "loss": 2.0283, "step": 279110 }, { "epoch": 1.0623996102403264, "grad_norm": 0.13833408057689667, "learning_rate": 0.00021774715826644602, "loss": 2.0288, "step": 279120 }, { "epoch": 1.062437672708449, "grad_norm": 0.14383922517299652, "learning_rate": 0.0002176733570253538, "loss": 2.0212, "step": 279130 }, { "epoch": 1.0624757351765717, "grad_norm": 0.13017255067825317, "learning_rate": 0.00021759957507114125, "loss": 2.0184, "step": 279140 }, { "epoch": 1.0625137976446946, "grad_norm": 0.1488547921180725, "learning_rate": 0.00021752581238869512, "loss": 2.026, "step": 279150 }, { "epoch": 1.0625518601128172, "grad_norm": 0.1583673655986786, "learning_rate": 0.00021745206896292207, "loss": 2.0161, "step": 279160 }, { "epoch": 1.0625899225809399, "grad_norm": 0.13907520473003387, "learning_rate": 0.00021737834477874842, "loss": 2.0199, "step": 279170 }, { "epoch": 1.0626279850490625, "grad_norm": 0.1313203126192093, "learning_rate": 0.00021730463982112003, "loss": 2.0093, "step": 279180 }, { "epoch": 1.0626660475171852, "grad_norm": 0.18097400665283203, "learning_rate": 0.00021723095407500255, "loss": 2.0272, "step": 279190 }, { "epoch": 1.0627041099853078, "grad_norm": 0.16231577098369598, "learning_rate": 0.000217157287525381, "loss": 2.0285, "step": 279200 }, { "epoch": 1.0627421724534305, "grad_norm": 0.1830163151025772, "learning_rate": 0.00021708364015726018, "loss": 2.0193, "step": 279210 }, { "epoch": 1.0627802349215532, "grad_norm": 0.14548464119434357, "learning_rate": 0.00021701001195566417, "loss": 2.0151, "step": 279220 }, { "epoch": 1.062818297389676, "grad_norm": 0.14360500872135162, "learning_rate": 0.00021693640290563676, "loss": 2.0163, "step": 279230 }, { "epoch": 1.0628563598577987, "grad_norm": 0.1566280722618103, "learning_rate": 0.00021686281299224103, "loss": 2.0032, "step": 279240 }, { "epoch": 1.0628944223259214, "grad_norm": 0.13956668972969055, "learning_rate": 0.00021678924220055952, "loss": 2.0294, "step": 279250 }, { "epoch": 1.062932484794044, "grad_norm": 0.13794146478176117, "learning_rate": 0.00021671569051569413, "loss": 2.0223, "step": 279260 }, { "epoch": 1.0629705472621667, "grad_norm": 0.1345396786928177, "learning_rate": 0.00021664215792276603, "loss": 2.0239, "step": 279270 }, { "epoch": 1.0630086097302893, "grad_norm": 0.13856376707553864, "learning_rate": 0.0002165686444069158, "loss": 2.0214, "step": 279280 }, { "epoch": 1.063046672198412, "grad_norm": 0.13800005614757538, "learning_rate": 0.00021649514995330328, "loss": 2.0078, "step": 279290 }, { "epoch": 1.0630847346665346, "grad_norm": 0.14292772114276886, "learning_rate": 0.00021642167454710744, "loss": 2.0175, "step": 279300 }, { "epoch": 1.0631227971346573, "grad_norm": 0.14993226528167725, "learning_rate": 0.00021634821817352647, "loss": 2.0193, "step": 279310 }, { "epoch": 1.0631608596027802, "grad_norm": 0.14130374789237976, "learning_rate": 0.00021627478081777786, "loss": 2.0215, "step": 279320 }, { "epoch": 1.0631989220709028, "grad_norm": 0.15642014145851135, "learning_rate": 0.00021620136246509801, "loss": 2.0208, "step": 279330 }, { "epoch": 1.0632369845390255, "grad_norm": 0.14840982854366302, "learning_rate": 0.00021612796310074262, "loss": 2.0143, "step": 279340 }, { "epoch": 1.0632750470071481, "grad_norm": 0.12628307938575745, "learning_rate": 0.00021605458270998636, "loss": 2.0154, "step": 279350 }, { "epoch": 1.0633131094752708, "grad_norm": 0.16346469521522522, "learning_rate": 0.00021598122127812274, "loss": 2.0042, "step": 279360 }, { "epoch": 1.0633511719433935, "grad_norm": 0.13518983125686646, "learning_rate": 0.00021590787879046465, "loss": 2.0242, "step": 279370 }, { "epoch": 1.0633892344115161, "grad_norm": 0.14704202115535736, "learning_rate": 0.0002158345552323436, "loss": 2.0159, "step": 279380 }, { "epoch": 1.0634272968796388, "grad_norm": 0.15335479378700256, "learning_rate": 0.00021576125058911011, "loss": 2.0225, "step": 279390 }, { "epoch": 1.0634653593477617, "grad_norm": 0.14868423342704773, "learning_rate": 0.00021568796484613368, "loss": 2.0191, "step": 279400 }, { "epoch": 1.0635034218158843, "grad_norm": 0.13510699570178986, "learning_rate": 0.0002156146979888025, "loss": 2.0252, "step": 279410 }, { "epoch": 1.063541484284007, "grad_norm": 0.14692704379558563, "learning_rate": 0.0002155414500025238, "loss": 2.0288, "step": 279420 }, { "epoch": 1.0635795467521296, "grad_norm": 0.16837383806705475, "learning_rate": 0.0002154682208727234, "loss": 1.9996, "step": 279430 }, { "epoch": 1.0636176092202523, "grad_norm": 0.15059910714626312, "learning_rate": 0.00021539501058484583, "loss": 2.0368, "step": 279440 }, { "epoch": 1.063655671688375, "grad_norm": 0.16362057626247406, "learning_rate": 0.00021532181912435462, "loss": 2.02, "step": 279450 }, { "epoch": 1.0636937341564976, "grad_norm": 0.1484278440475464, "learning_rate": 0.0002152486464767317, "loss": 2.0244, "step": 279460 }, { "epoch": 1.0637317966246203, "grad_norm": 0.17187005281448364, "learning_rate": 0.0002151754926274777, "loss": 2.015, "step": 279470 }, { "epoch": 1.063769859092743, "grad_norm": 0.14839616417884827, "learning_rate": 0.00021510235756211205, "loss": 2.0169, "step": 279480 }, { "epoch": 1.0638079215608658, "grad_norm": 0.16944752633571625, "learning_rate": 0.0002150292412661725, "loss": 2.0368, "step": 279490 }, { "epoch": 1.0638459840289884, "grad_norm": 0.1417141556739807, "learning_rate": 0.0002149561437252155, "loss": 2.0282, "step": 279500 }, { "epoch": 1.063884046497111, "grad_norm": 0.15089070796966553, "learning_rate": 0.000214883064924816, "loss": 2.0045, "step": 279510 }, { "epoch": 1.0639221089652338, "grad_norm": 0.146227166056633, "learning_rate": 0.0002148100048505675, "loss": 2.0196, "step": 279520 }, { "epoch": 1.0639601714333564, "grad_norm": 0.1489729881286621, "learning_rate": 0.00021473696348808176, "loss": 2.0222, "step": 279530 }, { "epoch": 1.063998233901479, "grad_norm": 0.16424356400966644, "learning_rate": 0.00021466394082298912, "loss": 2.0284, "step": 279540 }, { "epoch": 1.0640362963696017, "grad_norm": 0.15235692262649536, "learning_rate": 0.00021459093684093823, "loss": 2.0131, "step": 279550 }, { "epoch": 1.0640743588377244, "grad_norm": 0.13210387527942657, "learning_rate": 0.00021451795152759606, "loss": 2.0264, "step": 279560 }, { "epoch": 1.0641124213058473, "grad_norm": 0.14658477902412415, "learning_rate": 0.000214444984868648, "loss": 2.0255, "step": 279570 }, { "epoch": 1.06415048377397, "grad_norm": 0.14071625471115112, "learning_rate": 0.00021437203684979774, "loss": 2.0228, "step": 279580 }, { "epoch": 1.0641885462420926, "grad_norm": 0.1429414302110672, "learning_rate": 0.00021429910745676694, "loss": 2.0403, "step": 279590 }, { "epoch": 1.0642266087102152, "grad_norm": 0.13399270176887512, "learning_rate": 0.00021422619667529593, "loss": 2.0104, "step": 279600 }, { "epoch": 1.064264671178338, "grad_norm": 0.13192670047283173, "learning_rate": 0.0002141533044911428, "loss": 2.0224, "step": 279610 }, { "epoch": 1.0643027336464606, "grad_norm": 0.12973575294017792, "learning_rate": 0.00021408043089008407, "loss": 2.0116, "step": 279620 }, { "epoch": 1.0643407961145832, "grad_norm": 0.14499998092651367, "learning_rate": 0.00021400757585791425, "loss": 2.0223, "step": 279630 }, { "epoch": 1.0643788585827059, "grad_norm": 0.1375758945941925, "learning_rate": 0.00021393473938044604, "loss": 2.0265, "step": 279640 }, { "epoch": 1.0644169210508285, "grad_norm": 0.15999175608158112, "learning_rate": 0.00021386192144351007, "loss": 2.013, "step": 279650 }, { "epoch": 1.0644549835189514, "grad_norm": 0.12279332429170609, "learning_rate": 0.0002137891220329551, "loss": 2.0267, "step": 279660 }, { "epoch": 1.064493045987074, "grad_norm": 0.15905717015266418, "learning_rate": 0.00021371634113464782, "loss": 2.0063, "step": 279670 }, { "epoch": 1.0645311084551967, "grad_norm": 0.1357671469449997, "learning_rate": 0.00021364357873447293, "loss": 2.0415, "step": 279680 }, { "epoch": 1.0645691709233194, "grad_norm": 0.1571875363588333, "learning_rate": 0.000213570834818333, "loss": 2.0407, "step": 279690 }, { "epoch": 1.064607233391442, "grad_norm": 0.14225240051746368, "learning_rate": 0.00021349810937214858, "loss": 2.0186, "step": 279700 }, { "epoch": 1.0646452958595647, "grad_norm": 0.13177137076854706, "learning_rate": 0.00021342540238185797, "loss": 2.0141, "step": 279710 }, { "epoch": 1.0646833583276873, "grad_norm": 0.19607017934322357, "learning_rate": 0.00021335271383341747, "loss": 2.0255, "step": 279720 }, { "epoch": 1.06472142079581, "grad_norm": 0.12749847769737244, "learning_rate": 0.00021328004371280098, "loss": 2.024, "step": 279730 }, { "epoch": 1.0647594832639329, "grad_norm": 0.1525043100118637, "learning_rate": 0.00021320739200600026, "loss": 2.023, "step": 279740 }, { "epoch": 1.0647975457320555, "grad_norm": 0.1349322646856308, "learning_rate": 0.00021313475869902493, "loss": 2.0142, "step": 279750 }, { "epoch": 1.0648356082001782, "grad_norm": 0.13257881999015808, "learning_rate": 0.00021306214377790213, "loss": 2.0357, "step": 279760 }, { "epoch": 1.0648736706683009, "grad_norm": 0.16428206861019135, "learning_rate": 0.0002129895472286767, "loss": 2.0216, "step": 279770 }, { "epoch": 1.0649117331364235, "grad_norm": 0.14325910806655884, "learning_rate": 0.0002129169690374113, "loss": 2.018, "step": 279780 }, { "epoch": 1.0649497956045462, "grad_norm": 0.1777109056711197, "learning_rate": 0.000212844409190186, "loss": 2.0177, "step": 279790 }, { "epoch": 1.0649878580726688, "grad_norm": 0.13226689398288727, "learning_rate": 0.00021277186767309857, "loss": 2.0101, "step": 279800 }, { "epoch": 1.0650259205407915, "grad_norm": 0.15519066154956818, "learning_rate": 0.0002126993444722643, "loss": 2.0159, "step": 279810 }, { "epoch": 1.0650639830089141, "grad_norm": 0.13406550884246826, "learning_rate": 0.00021262683957381595, "loss": 2.0193, "step": 279820 }, { "epoch": 1.0651020454770368, "grad_norm": 0.14426659047603607, "learning_rate": 0.0002125543529639038, "loss": 2.0187, "step": 279830 }, { "epoch": 1.0651401079451597, "grad_norm": 0.12878841161727905, "learning_rate": 0.00021248188462869567, "loss": 2.012, "step": 279840 }, { "epoch": 1.0651781704132823, "grad_norm": 0.1430235654115677, "learning_rate": 0.00021240943455437671, "loss": 2.0205, "step": 279850 }, { "epoch": 1.065216232881405, "grad_norm": 0.16837656497955322, "learning_rate": 0.0002123370027271495, "loss": 2.0245, "step": 279860 }, { "epoch": 1.0652542953495276, "grad_norm": 0.17101293802261353, "learning_rate": 0.000212264589133234, "loss": 2.007, "step": 279870 }, { "epoch": 1.0652923578176503, "grad_norm": 0.14210744202136993, "learning_rate": 0.0002121921937588674, "loss": 2.0122, "step": 279880 }, { "epoch": 1.065330420285773, "grad_norm": 0.13525697588920593, "learning_rate": 0.0002121198165903044, "loss": 2.0233, "step": 279890 }, { "epoch": 1.0653684827538956, "grad_norm": 0.15535393357276917, "learning_rate": 0.00021204745761381672, "loss": 2.0213, "step": 279900 }, { "epoch": 1.0654065452220183, "grad_norm": 0.13685598969459534, "learning_rate": 0.00021197511681569364, "loss": 2.0258, "step": 279910 }, { "epoch": 1.0654446076901412, "grad_norm": 0.14752687513828278, "learning_rate": 0.00021190279418224135, "loss": 2.0152, "step": 279920 }, { "epoch": 1.0654826701582638, "grad_norm": 0.15611529350280762, "learning_rate": 0.00021183048969978336, "loss": 2.012, "step": 279930 }, { "epoch": 1.0655207326263865, "grad_norm": 0.15216854214668274, "learning_rate": 0.00021175820335466038, "loss": 2.022, "step": 279940 }, { "epoch": 1.0655587950945091, "grad_norm": 0.14078018069267273, "learning_rate": 0.00021168593513323008, "loss": 2.0187, "step": 279950 }, { "epoch": 1.0655968575626318, "grad_norm": 0.14143924415111542, "learning_rate": 0.00021161368502186745, "loss": 2.0227, "step": 279960 }, { "epoch": 1.0656349200307544, "grad_norm": 0.1464422047138214, "learning_rate": 0.00021154145300696442, "loss": 2.0351, "step": 279970 }, { "epoch": 1.065672982498877, "grad_norm": 0.16325105726718903, "learning_rate": 0.00021146923907492982, "loss": 2.023, "step": 279980 }, { "epoch": 1.0657110449669998, "grad_norm": 0.1634446084499359, "learning_rate": 0.0002113970432121898, "loss": 2.0239, "step": 279990 }, { "epoch": 1.0657491074351224, "grad_norm": 0.1478370875120163, "learning_rate": 0.00021132486540518714, "loss": 2.0105, "step": 280000 }, { "epoch": 1.0657871699032453, "grad_norm": 0.17000077664852142, "learning_rate": 0.00021125270564038178, "loss": 2.0199, "step": 280010 }, { "epoch": 1.065825232371368, "grad_norm": 0.17442534863948822, "learning_rate": 0.0002111805639042506, "loss": 2.0216, "step": 280020 }, { "epoch": 1.0658632948394906, "grad_norm": 0.15885937213897705, "learning_rate": 0.00021110844018328723, "loss": 2.0169, "step": 280030 }, { "epoch": 1.0659013573076133, "grad_norm": 0.13275925815105438, "learning_rate": 0.00021103633446400223, "loss": 2.0261, "step": 280040 }, { "epoch": 1.065939419775736, "grad_norm": 0.13854673504829407, "learning_rate": 0.00021096424673292292, "loss": 2.0376, "step": 280050 }, { "epoch": 1.0659774822438586, "grad_norm": 0.1444002389907837, "learning_rate": 0.00021089217697659352, "loss": 2.0045, "step": 280060 }, { "epoch": 1.0660155447119812, "grad_norm": 0.1577683538198471, "learning_rate": 0.0002108201251815749, "loss": 2.0062, "step": 280070 }, { "epoch": 1.066053607180104, "grad_norm": 0.21680620312690735, "learning_rate": 0.00021074809133444483, "loss": 2.0092, "step": 280080 }, { "epoch": 1.0660916696482268, "grad_norm": 0.134336456656456, "learning_rate": 0.00021067607542179763, "loss": 2.0295, "step": 280090 }, { "epoch": 1.0661297321163494, "grad_norm": 0.13651613891124725, "learning_rate": 0.00021060407743024435, "loss": 2.0166, "step": 280100 }, { "epoch": 1.066167794584472, "grad_norm": 0.1491308957338333, "learning_rate": 0.00021053209734641276, "loss": 2.0164, "step": 280110 }, { "epoch": 1.0662058570525947, "grad_norm": 0.1435723751783371, "learning_rate": 0.00021046013515694712, "loss": 2.0196, "step": 280120 }, { "epoch": 1.0662439195207174, "grad_norm": 0.16801053285598755, "learning_rate": 0.0002103881908485084, "loss": 2.0368, "step": 280130 }, { "epoch": 1.06628198198884, "grad_norm": 0.15588593482971191, "learning_rate": 0.00021031626440777407, "loss": 2.0134, "step": 280140 }, { "epoch": 1.0663200444569627, "grad_norm": 0.14381839334964752, "learning_rate": 0.0002102443558214382, "loss": 2.0138, "step": 280150 }, { "epoch": 1.0663581069250854, "grad_norm": 0.14546221494674683, "learning_rate": 0.00021017246507621125, "loss": 2.0079, "step": 280160 }, { "epoch": 1.066396169393208, "grad_norm": 0.13942362368106842, "learning_rate": 0.00021010059215882028, "loss": 2.0184, "step": 280170 }, { "epoch": 1.066434231861331, "grad_norm": 0.13316209614276886, "learning_rate": 0.00021002873705600872, "loss": 2.0044, "step": 280180 }, { "epoch": 1.0664722943294536, "grad_norm": 0.12746325135231018, "learning_rate": 0.00020995689975453647, "loss": 2.0146, "step": 280190 }, { "epoch": 1.0665103567975762, "grad_norm": 0.136347234249115, "learning_rate": 0.00020988508024117982, "loss": 2.0213, "step": 280200 }, { "epoch": 1.0665484192656989, "grad_norm": 0.13976691663265228, "learning_rate": 0.00020981327850273142, "loss": 2.024, "step": 280210 }, { "epoch": 1.0665864817338215, "grad_norm": 0.1449359655380249, "learning_rate": 0.00020974149452600017, "loss": 2.0065, "step": 280220 }, { "epoch": 1.0666245442019442, "grad_norm": 0.14353162050247192, "learning_rate": 0.00020966972829781138, "loss": 2.0173, "step": 280230 }, { "epoch": 1.0666626066700668, "grad_norm": 0.14832115173339844, "learning_rate": 0.00020959797980500667, "loss": 2.0146, "step": 280240 }, { "epoch": 1.0667006691381895, "grad_norm": 0.16132640838623047, "learning_rate": 0.00020952624903444373, "loss": 2.0172, "step": 280250 }, { "epoch": 1.0667387316063124, "grad_norm": 0.16305841505527496, "learning_rate": 0.00020945453597299667, "loss": 2.0124, "step": 280260 }, { "epoch": 1.066776794074435, "grad_norm": 0.15650595724582672, "learning_rate": 0.0002093828406075558, "loss": 2.0242, "step": 280270 }, { "epoch": 1.0668148565425577, "grad_norm": 0.13507379591464996, "learning_rate": 0.00020931116292502734, "loss": 2.0231, "step": 280280 }, { "epoch": 1.0668529190106804, "grad_norm": 0.12661010026931763, "learning_rate": 0.00020923950291233396, "loss": 2.0292, "step": 280290 }, { "epoch": 1.066890981478803, "grad_norm": 0.18024198710918427, "learning_rate": 0.00020916786055641423, "loss": 2.0196, "step": 280300 }, { "epoch": 1.0669290439469257, "grad_norm": 0.1324780285358429, "learning_rate": 0.00020909623584422287, "loss": 2.0223, "step": 280310 }, { "epoch": 1.0669671064150483, "grad_norm": 0.16044573485851288, "learning_rate": 0.00020902462876273077, "loss": 2.0313, "step": 280320 }, { "epoch": 1.067005168883171, "grad_norm": 0.1606072187423706, "learning_rate": 0.00020895303929892463, "loss": 2.0144, "step": 280330 }, { "epoch": 1.0670432313512936, "grad_norm": 0.14759883284568787, "learning_rate": 0.00020888146743980725, "loss": 2.0261, "step": 280340 }, { "epoch": 1.0670812938194165, "grad_norm": 0.1489844173192978, "learning_rate": 0.0002088099131723975, "loss": 2.0158, "step": 280350 }, { "epoch": 1.0671193562875392, "grad_norm": 0.1644773781299591, "learning_rate": 0.00020873837648373013, "loss": 2.0165, "step": 280360 }, { "epoch": 1.0671574187556618, "grad_norm": 0.14012792706489563, "learning_rate": 0.00020866685736085566, "loss": 2.014, "step": 280370 }, { "epoch": 1.0671954812237845, "grad_norm": 0.14115259051322937, "learning_rate": 0.00020859535579084078, "loss": 2.0088, "step": 280380 }, { "epoch": 1.0672335436919071, "grad_norm": 0.1442381888628006, "learning_rate": 0.0002085238717607678, "loss": 2.0138, "step": 280390 }, { "epoch": 1.0672716061600298, "grad_norm": 0.1616717129945755, "learning_rate": 0.00020845240525773495, "loss": 2.0148, "step": 280400 }, { "epoch": 1.0673096686281525, "grad_norm": 0.15273220837116241, "learning_rate": 0.00020838095626885634, "loss": 2.0208, "step": 280410 }, { "epoch": 1.0673477310962751, "grad_norm": 0.13198786973953247, "learning_rate": 0.0002083095247812618, "loss": 2.017, "step": 280420 }, { "epoch": 1.067385793564398, "grad_norm": 0.16148214042186737, "learning_rate": 0.00020823811078209681, "loss": 2.0236, "step": 280430 }, { "epoch": 1.0674238560325207, "grad_norm": 0.1520000398159027, "learning_rate": 0.00020816671425852285, "loss": 2.016, "step": 280440 }, { "epoch": 1.0674619185006433, "grad_norm": 0.1314682811498642, "learning_rate": 0.0002080953351977168, "loss": 2.0105, "step": 280450 }, { "epoch": 1.067499980968766, "grad_norm": 0.15124158561229706, "learning_rate": 0.00020802397358687136, "loss": 2.0239, "step": 280460 }, { "epoch": 1.0675380434368886, "grad_norm": 0.1314118504524231, "learning_rate": 0.00020795262941319492, "loss": 2.0265, "step": 280470 }, { "epoch": 1.0675761059050113, "grad_norm": 0.14328762888908386, "learning_rate": 0.00020788130266391141, "loss": 2.0218, "step": 280480 }, { "epoch": 1.067614168373134, "grad_norm": 0.14062193036079407, "learning_rate": 0.00020780999332626037, "loss": 2.0277, "step": 280490 }, { "epoch": 1.0676522308412566, "grad_norm": 0.16746476292610168, "learning_rate": 0.00020773870138749695, "loss": 2.0281, "step": 280500 }, { "epoch": 1.0676902933093793, "grad_norm": 0.1793404072523117, "learning_rate": 0.00020766742683489182, "loss": 2.0281, "step": 280510 }, { "epoch": 1.0677283557775021, "grad_norm": 0.13390576839447021, "learning_rate": 0.00020759616965573108, "loss": 2.0243, "step": 280520 }, { "epoch": 1.0677664182456248, "grad_norm": 0.1407162994146347, "learning_rate": 0.00020752492983731646, "loss": 2.0249, "step": 280530 }, { "epoch": 1.0678044807137475, "grad_norm": 0.1637965440750122, "learning_rate": 0.00020745370736696507, "loss": 2.0256, "step": 280540 }, { "epoch": 1.06784254318187, "grad_norm": 0.1503283828496933, "learning_rate": 0.00020738250223200939, "loss": 2.0117, "step": 280550 }, { "epoch": 1.0678806056499928, "grad_norm": 0.1429990977048874, "learning_rate": 0.00020731131441979744, "loss": 2.0321, "step": 280560 }, { "epoch": 1.0679186681181154, "grad_norm": 0.15503156185150146, "learning_rate": 0.00020724014391769262, "loss": 2.0142, "step": 280570 }, { "epoch": 1.067956730586238, "grad_norm": 0.1364317238330841, "learning_rate": 0.00020716899071307354, "loss": 2.0115, "step": 280580 }, { "epoch": 1.0679947930543607, "grad_norm": 0.17652592062950134, "learning_rate": 0.0002070978547933343, "loss": 2.0062, "step": 280590 }, { "epoch": 1.0680328555224836, "grad_norm": 0.15647253394126892, "learning_rate": 0.00020702673614588423, "loss": 2.0302, "step": 280600 }, { "epoch": 1.0680709179906063, "grad_norm": 0.14261257648468018, "learning_rate": 0.00020695563475814794, "loss": 2.0226, "step": 280610 }, { "epoch": 1.068108980458729, "grad_norm": 0.1298782229423523, "learning_rate": 0.00020688455061756522, "loss": 2.0295, "step": 280620 }, { "epoch": 1.0681470429268516, "grad_norm": 0.18369466066360474, "learning_rate": 0.00020681348371159132, "loss": 2.013, "step": 280630 }, { "epoch": 1.0681851053949742, "grad_norm": 0.14287227392196655, "learning_rate": 0.00020674243402769643, "loss": 2.0163, "step": 280640 }, { "epoch": 1.068223167863097, "grad_norm": 0.1325698047876358, "learning_rate": 0.000206671401553366, "loss": 2.0251, "step": 280650 }, { "epoch": 1.0682612303312196, "grad_norm": 0.1377692073583603, "learning_rate": 0.00020660038627610066, "loss": 2.0164, "step": 280660 }, { "epoch": 1.0682992927993422, "grad_norm": 0.14473956823349, "learning_rate": 0.00020652938818341627, "loss": 2.0188, "step": 280670 }, { "epoch": 1.0683373552674649, "grad_norm": 0.14381441473960876, "learning_rate": 0.00020645840726284348, "loss": 2.0155, "step": 280680 }, { "epoch": 1.0683754177355875, "grad_norm": 0.18615978956222534, "learning_rate": 0.00020638744350192834, "loss": 2.0263, "step": 280690 }, { "epoch": 1.0684134802037104, "grad_norm": 0.14097805321216583, "learning_rate": 0.00020631649688823174, "loss": 2.0228, "step": 280700 }, { "epoch": 1.068451542671833, "grad_norm": 0.14481240510940552, "learning_rate": 0.0002062455674093297, "loss": 2.025, "step": 280710 }, { "epoch": 1.0684896051399557, "grad_norm": 0.13994210958480835, "learning_rate": 0.00020617465505281318, "loss": 2.0091, "step": 280720 }, { "epoch": 1.0685276676080784, "grad_norm": 0.17684991657733917, "learning_rate": 0.0002061037598062881, "loss": 2.0189, "step": 280730 }, { "epoch": 1.068565730076201, "grad_norm": 0.16578590869903564, "learning_rate": 0.00020603288165737537, "loss": 2.0175, "step": 280740 }, { "epoch": 1.0686037925443237, "grad_norm": 0.1451704055070877, "learning_rate": 0.00020596202059371084, "loss": 2.0248, "step": 280750 }, { "epoch": 1.0686418550124464, "grad_norm": 0.15710540115833282, "learning_rate": 0.00020589117660294514, "loss": 2.0152, "step": 280760 }, { "epoch": 1.068679917480569, "grad_norm": 0.16066260635852814, "learning_rate": 0.0002058203496727439, "loss": 2.028, "step": 280770 }, { "epoch": 1.0687179799486919, "grad_norm": 0.13257549703121185, "learning_rate": 0.00020574953979078753, "loss": 2.0165, "step": 280780 }, { "epoch": 1.0687560424168145, "grad_norm": 0.14641395211219788, "learning_rate": 0.0002056787469447713, "loss": 2.0122, "step": 280790 }, { "epoch": 1.0687941048849372, "grad_norm": 0.13742470741271973, "learning_rate": 0.00020560797112240508, "loss": 2.0188, "step": 280800 }, { "epoch": 1.0688321673530599, "grad_norm": 0.18226590752601624, "learning_rate": 0.0002055372123114139, "loss": 2.0338, "step": 280810 }, { "epoch": 1.0688702298211825, "grad_norm": 0.13812443614006042, "learning_rate": 0.0002054664704995372, "loss": 2.0412, "step": 280820 }, { "epoch": 1.0689082922893052, "grad_norm": 0.20717187225818634, "learning_rate": 0.00020539574567452924, "loss": 2.018, "step": 280830 }, { "epoch": 1.0689463547574278, "grad_norm": 0.14729878306388855, "learning_rate": 0.00020532503782415902, "loss": 2.0214, "step": 280840 }, { "epoch": 1.0689844172255505, "grad_norm": 0.14876174926757812, "learning_rate": 0.00020525434693621008, "loss": 2.001, "step": 280850 }, { "epoch": 1.0690224796936731, "grad_norm": 0.14767611026763916, "learning_rate": 0.00020518367299848084, "loss": 2.0185, "step": 280860 }, { "epoch": 1.069060542161796, "grad_norm": 0.13235405087471008, "learning_rate": 0.00020511301599878418, "loss": 2.0158, "step": 280870 }, { "epoch": 1.0690986046299187, "grad_norm": 0.13786683976650238, "learning_rate": 0.0002050423759249475, "loss": 2.0356, "step": 280880 }, { "epoch": 1.0691366670980413, "grad_norm": 0.13260993361473083, "learning_rate": 0.00020497175276481294, "loss": 2.0064, "step": 280890 }, { "epoch": 1.069174729566164, "grad_norm": 0.13280153274536133, "learning_rate": 0.00020490114650623708, "loss": 2.0152, "step": 280900 }, { "epoch": 1.0692127920342867, "grad_norm": 0.15440459549427032, "learning_rate": 0.0002048305571370912, "loss": 2.0104, "step": 280910 }, { "epoch": 1.0692508545024093, "grad_norm": 0.13955621421337128, "learning_rate": 0.00020475998464526075, "loss": 2.0256, "step": 280920 }, { "epoch": 1.069288916970532, "grad_norm": 0.15699417889118195, "learning_rate": 0.00020468942901864594, "loss": 2.0169, "step": 280930 }, { "epoch": 1.0693269794386546, "grad_norm": 0.1380460411310196, "learning_rate": 0.00020461889024516144, "loss": 2.0085, "step": 280940 }, { "epoch": 1.0693650419067775, "grad_norm": 0.13854175806045532, "learning_rate": 0.00020454836831273606, "loss": 2.0236, "step": 280950 }, { "epoch": 1.0694031043749002, "grad_norm": 0.17974607646465302, "learning_rate": 0.00020447786320931333, "loss": 2.0237, "step": 280960 }, { "epoch": 1.0694411668430228, "grad_norm": 0.13895480334758759, "learning_rate": 0.000204407374922851, "loss": 2.0189, "step": 280970 }, { "epoch": 1.0694792293111455, "grad_norm": 0.12747804820537567, "learning_rate": 0.0002043369034413211, "loss": 2.0098, "step": 280980 }, { "epoch": 1.0695172917792681, "grad_norm": 0.1594598889350891, "learning_rate": 0.00020426644875271027, "loss": 2.0271, "step": 280990 }, { "epoch": 1.0695553542473908, "grad_norm": 0.15190567076206207, "learning_rate": 0.0002041960108450192, "loss": 2.0107, "step": 281000 }, { "epoch": 1.0695934167155134, "grad_norm": 0.19501842558383942, "learning_rate": 0.00020412558970626295, "loss": 2.0048, "step": 281010 }, { "epoch": 1.069631479183636, "grad_norm": 0.12260035425424576, "learning_rate": 0.00020405518532447083, "loss": 2.009, "step": 281020 }, { "epoch": 1.0696695416517588, "grad_norm": 0.13547752797603607, "learning_rate": 0.00020398479768768635, "loss": 2.0087, "step": 281030 }, { "epoch": 1.0697076041198816, "grad_norm": 0.13921548426151276, "learning_rate": 0.00020391442678396727, "loss": 2.0026, "step": 281040 }, { "epoch": 1.0697456665880043, "grad_norm": 0.14413045346736908, "learning_rate": 0.00020384407260138566, "loss": 2.0115, "step": 281050 }, { "epoch": 1.069783729056127, "grad_norm": 0.13830119371414185, "learning_rate": 0.00020377373512802755, "loss": 2.0323, "step": 281060 }, { "epoch": 1.0698217915242496, "grad_norm": 0.1393330693244934, "learning_rate": 0.00020370341435199313, "loss": 2.0317, "step": 281070 }, { "epoch": 1.0698598539923723, "grad_norm": 0.13055545091629028, "learning_rate": 0.0002036331102613969, "loss": 2.0284, "step": 281080 }, { "epoch": 1.069897916460495, "grad_norm": 0.14802104234695435, "learning_rate": 0.00020356282284436728, "loss": 2.013, "step": 281090 }, { "epoch": 1.0699359789286176, "grad_norm": 0.1766086220741272, "learning_rate": 0.0002034925520890467, "loss": 2.034, "step": 281100 }, { "epoch": 1.0699740413967402, "grad_norm": 0.13774430751800537, "learning_rate": 0.00020342229798359196, "loss": 2.0077, "step": 281110 }, { "epoch": 1.0700121038648631, "grad_norm": 0.14388753473758698, "learning_rate": 0.0002033520605161735, "loss": 2.0194, "step": 281120 }, { "epoch": 1.0700501663329858, "grad_norm": 0.16754965484142303, "learning_rate": 0.000203281839674976, "loss": 2.0278, "step": 281130 }, { "epoch": 1.0700882288011084, "grad_norm": 0.1566379964351654, "learning_rate": 0.00020321163544819798, "loss": 2.02, "step": 281140 }, { "epoch": 1.070126291269231, "grad_norm": 0.13745974004268646, "learning_rate": 0.0002031414478240521, "loss": 2.0253, "step": 281150 }, { "epoch": 1.0701643537373537, "grad_norm": 0.1512146145105362, "learning_rate": 0.00020307127679076471, "loss": 2.0087, "step": 281160 }, { "epoch": 1.0702024162054764, "grad_norm": 0.16319917142391205, "learning_rate": 0.0002030011223365763, "loss": 2.0168, "step": 281170 }, { "epoch": 1.070240478673599, "grad_norm": 0.1592712253332138, "learning_rate": 0.00020293098444974106, "loss": 2.0189, "step": 281180 }, { "epoch": 1.0702785411417217, "grad_norm": 0.189766988158226, "learning_rate": 0.00020286086311852714, "loss": 2.0047, "step": 281190 }, { "epoch": 1.0703166036098444, "grad_norm": 0.13883762061595917, "learning_rate": 0.0002027907583312165, "loss": 2.0139, "step": 281200 }, { "epoch": 1.0703546660779673, "grad_norm": 0.15338963270187378, "learning_rate": 0.00020272067007610502, "loss": 2.012, "step": 281210 }, { "epoch": 1.07039272854609, "grad_norm": 0.14497676491737366, "learning_rate": 0.00020265059834150218, "loss": 2.0159, "step": 281220 }, { "epoch": 1.0704307910142126, "grad_norm": 0.15736877918243408, "learning_rate": 0.00020258054311573137, "loss": 2.0107, "step": 281230 }, { "epoch": 1.0704688534823352, "grad_norm": 0.15212887525558472, "learning_rate": 0.0002025105043871297, "loss": 2.0035, "step": 281240 }, { "epoch": 1.0705069159504579, "grad_norm": 0.15456640720367432, "learning_rate": 0.00020244048214404793, "loss": 2.0036, "step": 281250 }, { "epoch": 1.0705449784185805, "grad_norm": 0.13884684443473816, "learning_rate": 0.00020237047637485067, "loss": 2.0216, "step": 281260 }, { "epoch": 1.0705830408867032, "grad_norm": 0.18412266671657562, "learning_rate": 0.0002023004870679161, "loss": 2.0221, "step": 281270 }, { "epoch": 1.0706211033548259, "grad_norm": 0.14824584126472473, "learning_rate": 0.00020223051421163607, "loss": 1.9981, "step": 281280 }, { "epoch": 1.0706591658229487, "grad_norm": 0.13820265233516693, "learning_rate": 0.0002021605577944161, "loss": 2.0035, "step": 281290 }, { "epoch": 1.0706972282910714, "grad_norm": 0.14500044286251068, "learning_rate": 0.0002020906178046754, "loss": 2.0038, "step": 281300 }, { "epoch": 1.070735290759194, "grad_norm": 0.14148664474487305, "learning_rate": 0.00020202069423084652, "loss": 2.0051, "step": 281310 }, { "epoch": 1.0707733532273167, "grad_norm": 0.1406242847442627, "learning_rate": 0.00020195078706137582, "loss": 2.0071, "step": 281320 }, { "epoch": 1.0708114156954394, "grad_norm": 0.12793296575546265, "learning_rate": 0.00020188089628472318, "loss": 2.0039, "step": 281330 }, { "epoch": 1.070849478163562, "grad_norm": 0.13845571875572205, "learning_rate": 0.00020181102188936178, "loss": 2.0057, "step": 281340 }, { "epoch": 1.0708875406316847, "grad_norm": 0.1830500364303589, "learning_rate": 0.00020174116386377866, "loss": 2.0158, "step": 281350 }, { "epoch": 1.0709256030998073, "grad_norm": 0.15033838152885437, "learning_rate": 0.00020167132219647404, "loss": 2.0125, "step": 281360 }, { "epoch": 1.07096366556793, "grad_norm": 0.1378006786108017, "learning_rate": 0.00020160149687596174, "loss": 2.0167, "step": 281370 }, { "epoch": 1.0710017280360529, "grad_norm": 0.15929892659187317, "learning_rate": 0.00020153168789076897, "loss": 2.0034, "step": 281380 }, { "epoch": 1.0710397905041755, "grad_norm": 0.14839771389961243, "learning_rate": 0.00020146189522943643, "loss": 2.0286, "step": 281390 }, { "epoch": 1.0710778529722982, "grad_norm": 0.13341379165649414, "learning_rate": 0.00020139211888051806, "loss": 2.0248, "step": 281400 }, { "epoch": 1.0711159154404208, "grad_norm": 0.1480301469564438, "learning_rate": 0.00020132235883258128, "loss": 2.0037, "step": 281410 }, { "epoch": 1.0711539779085435, "grad_norm": 0.15032023191452026, "learning_rate": 0.0002012526150742069, "loss": 2.0286, "step": 281420 }, { "epoch": 1.0711920403766662, "grad_norm": 0.17209644615650177, "learning_rate": 0.0002011828875939889, "loss": 2.0223, "step": 281430 }, { "epoch": 1.0712301028447888, "grad_norm": 0.1403706818819046, "learning_rate": 0.00020111317638053471, "loss": 2.0093, "step": 281440 }, { "epoch": 1.0712681653129115, "grad_norm": 0.13635705411434174, "learning_rate": 0.00020104348142246502, "loss": 2.0087, "step": 281450 }, { "epoch": 1.0713062277810343, "grad_norm": 0.12491488456726074, "learning_rate": 0.00020097380270841375, "loss": 2.0211, "step": 281460 }, { "epoch": 1.071344290249157, "grad_norm": 0.1397937834262848, "learning_rate": 0.00020090414022702803, "loss": 2.0289, "step": 281470 }, { "epoch": 1.0713823527172797, "grad_norm": 0.1508331000804901, "learning_rate": 0.00020083449396696818, "loss": 2.02, "step": 281480 }, { "epoch": 1.0714204151854023, "grad_norm": 0.15565991401672363, "learning_rate": 0.00020076486391690785, "loss": 2.0113, "step": 281490 }, { "epoch": 1.071458477653525, "grad_norm": 0.13104164600372314, "learning_rate": 0.0002006952500655338, "loss": 2.0017, "step": 281500 }, { "epoch": 1.0714965401216476, "grad_norm": 0.17238375544548035, "learning_rate": 0.00020062565240154596, "loss": 2.0247, "step": 281510 }, { "epoch": 1.0715346025897703, "grad_norm": 0.1550111025571823, "learning_rate": 0.00020055607091365725, "loss": 2.0301, "step": 281520 }, { "epoch": 1.071572665057893, "grad_norm": 0.13278517127037048, "learning_rate": 0.0002004865055905939, "loss": 2.0215, "step": 281530 }, { "epoch": 1.0716107275260156, "grad_norm": 0.16525842249393463, "learning_rate": 0.00020041695642109515, "loss": 2.0218, "step": 281540 }, { "epoch": 1.0716487899941383, "grad_norm": 0.13423489034175873, "learning_rate": 0.00020034742339391327, "loss": 2.0172, "step": 281550 }, { "epoch": 1.0716868524622611, "grad_norm": 0.1912301927804947, "learning_rate": 0.00020027790649781364, "loss": 2.0127, "step": 281560 }, { "epoch": 1.0717249149303838, "grad_norm": 0.13126081228256226, "learning_rate": 0.00020020840572157467, "loss": 2.0082, "step": 281570 }, { "epoch": 1.0717629773985065, "grad_norm": 0.1419760137796402, "learning_rate": 0.00020013892105398762, "loss": 2.0213, "step": 281580 }, { "epoch": 1.0718010398666291, "grad_norm": 0.182196706533432, "learning_rate": 0.000200069452483857, "loss": 2.0223, "step": 281590 }, { "epoch": 1.0718391023347518, "grad_norm": 0.14496959745883942, "learning_rate": 0.0002, "loss": 2.03, "step": 281600 }, { "epoch": 1.0718771648028744, "grad_norm": 0.14605028927326202, "learning_rate": 0.00019993056359124701, "loss": 2.0057, "step": 281610 }, { "epoch": 1.071915227270997, "grad_norm": 0.13911451399326324, "learning_rate": 0.00019986114324644112, "loss": 2.0121, "step": 281620 }, { "epoch": 1.0719532897391197, "grad_norm": 0.15131564438343048, "learning_rate": 0.00019979173895443852, "loss": 2.0049, "step": 281630 }, { "epoch": 1.0719913522072426, "grad_norm": 0.140620157122612, "learning_rate": 0.00019972235070410806, "loss": 2.0164, "step": 281640 }, { "epoch": 1.0720294146753653, "grad_norm": 0.15846246480941772, "learning_rate": 0.00019965297848433168, "loss": 2.0048, "step": 281650 }, { "epoch": 1.072067477143488, "grad_norm": 0.14672231674194336, "learning_rate": 0.00019958362228400395, "loss": 2.0157, "step": 281660 }, { "epoch": 1.0721055396116106, "grad_norm": 0.163814976811409, "learning_rate": 0.00019951428209203243, "loss": 2.0156, "step": 281670 }, { "epoch": 1.0721436020797332, "grad_norm": 0.14089249074459076, "learning_rate": 0.00019944495789733736, "loss": 2.0251, "step": 281680 }, { "epoch": 1.072181664547856, "grad_norm": 0.14137700200080872, "learning_rate": 0.00019937564968885174, "loss": 2.0075, "step": 281690 }, { "epoch": 1.0722197270159786, "grad_norm": 0.15704695880413055, "learning_rate": 0.00019930635745552138, "loss": 2.003, "step": 281700 }, { "epoch": 1.0722577894841012, "grad_norm": 0.16056190431118011, "learning_rate": 0.00019923708118630492, "loss": 2.0112, "step": 281710 }, { "epoch": 1.0722958519522239, "grad_norm": 0.15353207290172577, "learning_rate": 0.00019916782087017355, "loss": 2.0058, "step": 281720 }, { "epoch": 1.0723339144203468, "grad_norm": 0.15010952949523926, "learning_rate": 0.00019909857649611114, "loss": 2.0187, "step": 281730 }, { "epoch": 1.0723719768884694, "grad_norm": 0.146628737449646, "learning_rate": 0.00019902934805311447, "loss": 2.011, "step": 281740 }, { "epoch": 1.072410039356592, "grad_norm": 0.17401348054409027, "learning_rate": 0.0001989601355301926, "loss": 2.0135, "step": 281750 }, { "epoch": 1.0724481018247147, "grad_norm": 0.1571890264749527, "learning_rate": 0.0001988909389163676, "loss": 2.005, "step": 281760 }, { "epoch": 1.0724861642928374, "grad_norm": 0.13389791548252106, "learning_rate": 0.0001988217582006739, "loss": 2.0151, "step": 281770 }, { "epoch": 1.07252422676096, "grad_norm": 0.14010360836982727, "learning_rate": 0.00019875259337215863, "loss": 2.0271, "step": 281780 }, { "epoch": 1.0725622892290827, "grad_norm": 0.14111493527889252, "learning_rate": 0.00019868344441988147, "loss": 2.0033, "step": 281790 }, { "epoch": 1.0726003516972054, "grad_norm": 0.13658498227596283, "learning_rate": 0.0001986143113329146, "loss": 2.0278, "step": 281800 }, { "epoch": 1.0726384141653282, "grad_norm": 0.15208940207958221, "learning_rate": 0.00019854519410034282, "loss": 2.0158, "step": 281810 }, { "epoch": 1.072676476633451, "grad_norm": 0.14470238983631134, "learning_rate": 0.0001984760927112633, "loss": 2.0137, "step": 281820 }, { "epoch": 1.0727145391015735, "grad_norm": 0.13096201419830322, "learning_rate": 0.00019840700715478593, "loss": 2.0075, "step": 281830 }, { "epoch": 1.0727526015696962, "grad_norm": 0.1504952311515808, "learning_rate": 0.00019833793742003286, "loss": 2.0197, "step": 281840 }, { "epoch": 1.0727906640378189, "grad_norm": 0.14142200350761414, "learning_rate": 0.0001982688834961388, "loss": 2.0145, "step": 281850 }, { "epoch": 1.0728287265059415, "grad_norm": 0.13171540200710297, "learning_rate": 0.00019819984537225078, "loss": 2.0184, "step": 281860 }, { "epoch": 1.0728667889740642, "grad_norm": 0.12692947685718536, "learning_rate": 0.00019813082303752838, "loss": 2.0127, "step": 281870 }, { "epoch": 1.0729048514421868, "grad_norm": 0.16185876727104187, "learning_rate": 0.00019806181648114352, "loss": 2.0118, "step": 281880 }, { "epoch": 1.0729429139103095, "grad_norm": 0.15910275280475616, "learning_rate": 0.0001979928256922804, "loss": 2.008, "step": 281890 }, { "epoch": 1.0729809763784324, "grad_norm": 0.13678741455078125, "learning_rate": 0.0001979238506601357, "loss": 2.0275, "step": 281900 }, { "epoch": 1.073019038846555, "grad_norm": 0.17118968069553375, "learning_rate": 0.00019785489137391838, "loss": 2.0161, "step": 281910 }, { "epoch": 1.0730571013146777, "grad_norm": 0.12950493395328522, "learning_rate": 0.0001977859478228497, "loss": 2.0096, "step": 281920 }, { "epoch": 1.0730951637828003, "grad_norm": 0.13670261204242706, "learning_rate": 0.0001977170199961632, "loss": 2.0194, "step": 281930 }, { "epoch": 1.073133226250923, "grad_norm": 0.1519925445318222, "learning_rate": 0.00019764810788310477, "loss": 2.006, "step": 281940 }, { "epoch": 1.0731712887190457, "grad_norm": 0.16093097627162933, "learning_rate": 0.0001975792114729324, "loss": 1.9946, "step": 281950 }, { "epoch": 1.0732093511871683, "grad_norm": 0.14991694688796997, "learning_rate": 0.00019751033075491654, "loss": 2.0128, "step": 281960 }, { "epoch": 1.073247413655291, "grad_norm": 0.14340434968471527, "learning_rate": 0.0001974414657183396, "loss": 2.0325, "step": 281970 }, { "epoch": 1.0732854761234139, "grad_norm": 0.14055612683296204, "learning_rate": 0.00019737261635249638, "loss": 2.0073, "step": 281980 }, { "epoch": 1.0733235385915365, "grad_norm": 0.1411522477865219, "learning_rate": 0.0001973037826466938, "loss": 2.005, "step": 281990 }, { "epoch": 1.0733616010596592, "grad_norm": 0.17120565474033356, "learning_rate": 0.00019723496459025087, "loss": 2.0129, "step": 282000 }, { "epoch": 1.0733996635277818, "grad_norm": 0.13336136937141418, "learning_rate": 0.0001971661621724987, "loss": 2.0178, "step": 282010 }, { "epoch": 1.0734377259959045, "grad_norm": 0.16977474093437195, "learning_rate": 0.00019709737538278082, "loss": 2.0231, "step": 282020 }, { "epoch": 1.0734757884640271, "grad_norm": 0.14384149014949799, "learning_rate": 0.0001970286042104525, "loss": 2.0212, "step": 282030 }, { "epoch": 1.0735138509321498, "grad_norm": 0.1479279100894928, "learning_rate": 0.00019695984864488114, "loss": 1.9961, "step": 282040 }, { "epoch": 1.0735519134002725, "grad_norm": 0.1393909901380539, "learning_rate": 0.00019689110867544647, "loss": 2.0178, "step": 282050 }, { "epoch": 1.073589975868395, "grad_norm": 0.1390264630317688, "learning_rate": 0.00019682238429153998, "loss": 2.0209, "step": 282060 }, { "epoch": 1.073628038336518, "grad_norm": 0.17246313393115997, "learning_rate": 0.00019675367548256529, "loss": 2.004, "step": 282070 }, { "epoch": 1.0736661008046406, "grad_norm": 0.1607477217912674, "learning_rate": 0.00019668498223793796, "loss": 2.0053, "step": 282080 }, { "epoch": 1.0737041632727633, "grad_norm": 0.14993655681610107, "learning_rate": 0.00019661630454708567, "loss": 2.013, "step": 282090 }, { "epoch": 1.073742225740886, "grad_norm": 0.16224703192710876, "learning_rate": 0.00019654764239944794, "loss": 2.0218, "step": 282100 }, { "epoch": 1.0737802882090086, "grad_norm": 0.16363425552845, "learning_rate": 0.00019647899578447626, "loss": 2.0246, "step": 282110 }, { "epoch": 1.0738183506771313, "grad_norm": 0.15580902993679047, "learning_rate": 0.00019641036469163404, "loss": 2.0254, "step": 282120 }, { "epoch": 1.073856413145254, "grad_norm": 0.1360892504453659, "learning_rate": 0.0001963417491103966, "loss": 2.0202, "step": 282130 }, { "epoch": 1.0738944756133766, "grad_norm": 0.14252018928527832, "learning_rate": 0.00019627314903025118, "loss": 2.0144, "step": 282140 }, { "epoch": 1.0739325380814995, "grad_norm": 0.13783758878707886, "learning_rate": 0.00019620456444069696, "loss": 2.0257, "step": 282150 }, { "epoch": 1.0739706005496221, "grad_norm": 0.16759249567985535, "learning_rate": 0.0001961359953312447, "loss": 2.0279, "step": 282160 }, { "epoch": 1.0740086630177448, "grad_norm": 0.14761881530284882, "learning_rate": 0.00019606744169141732, "loss": 2.0225, "step": 282170 }, { "epoch": 1.0740467254858674, "grad_norm": 0.15070804953575134, "learning_rate": 0.0001959989035107494, "loss": 2.0072, "step": 282180 }, { "epoch": 1.07408478795399, "grad_norm": 0.1455344706773758, "learning_rate": 0.00019593038077878722, "loss": 2.0093, "step": 282190 }, { "epoch": 1.0741228504221128, "grad_norm": 0.14780759811401367, "learning_rate": 0.00019586187348508906, "loss": 2.0141, "step": 282200 }, { "epoch": 1.0741609128902354, "grad_norm": 0.1355655938386917, "learning_rate": 0.00019579338161922473, "loss": 2.0128, "step": 282210 }, { "epoch": 1.074198975358358, "grad_norm": 0.13392916321754456, "learning_rate": 0.00019572490517077586, "loss": 2.0172, "step": 282220 }, { "epoch": 1.0742370378264807, "grad_norm": 0.13204143941402435, "learning_rate": 0.00019565644412933598, "loss": 2.0208, "step": 282230 }, { "epoch": 1.0742751002946036, "grad_norm": 0.15992946922779083, "learning_rate": 0.00019558799848451002, "loss": 2.022, "step": 282240 }, { "epoch": 1.0743131627627263, "grad_norm": 0.15534865856170654, "learning_rate": 0.00019551956822591482, "loss": 2.0222, "step": 282250 }, { "epoch": 1.074351225230849, "grad_norm": 0.16686315834522247, "learning_rate": 0.0001954511533431788, "loss": 2.0182, "step": 282260 }, { "epoch": 1.0743892876989716, "grad_norm": 0.12937520444393158, "learning_rate": 0.000195382753825942, "loss": 2.0121, "step": 282270 }, { "epoch": 1.0744273501670942, "grad_norm": 0.15132172405719757, "learning_rate": 0.00019531436966385607, "loss": 2.0174, "step": 282280 }, { "epoch": 1.0744654126352169, "grad_norm": 0.1341206282377243, "learning_rate": 0.00019524600084658446, "loss": 2.0161, "step": 282290 }, { "epoch": 1.0745034751033395, "grad_norm": 0.13307468593120575, "learning_rate": 0.00019517764736380201, "loss": 2.0128, "step": 282300 }, { "epoch": 1.0745415375714622, "grad_norm": 0.13326209783554077, "learning_rate": 0.0001951093092051951, "loss": 2.0153, "step": 282310 }, { "epoch": 1.074579600039585, "grad_norm": 0.1382444202899933, "learning_rate": 0.00019504098636046187, "loss": 2.0198, "step": 282320 }, { "epoch": 1.0746176625077077, "grad_norm": 0.1539696604013443, "learning_rate": 0.00019497267881931192, "loss": 2.0149, "step": 282330 }, { "epoch": 1.0746557249758304, "grad_norm": 0.14765378832817078, "learning_rate": 0.0001949043865714662, "loss": 2.0126, "step": 282340 }, { "epoch": 1.074693787443953, "grad_norm": 0.1588527411222458, "learning_rate": 0.00019483610960665743, "loss": 2.0209, "step": 282350 }, { "epoch": 1.0747318499120757, "grad_norm": 0.1634843647480011, "learning_rate": 0.00019476784791462964, "loss": 2.0097, "step": 282360 }, { "epoch": 1.0747699123801984, "grad_norm": 0.1361679583787918, "learning_rate": 0.0001946996014851384, "loss": 2.008, "step": 282370 }, { "epoch": 1.074807974848321, "grad_norm": 0.14792568981647491, "learning_rate": 0.00019463137030795058, "loss": 2.025, "step": 282380 }, { "epoch": 1.0748460373164437, "grad_norm": 0.135822132229805, "learning_rate": 0.00019456315437284478, "loss": 2.0182, "step": 282390 }, { "epoch": 1.0748840997845663, "grad_norm": 0.17058981955051422, "learning_rate": 0.00019449495366961067, "loss": 2.0061, "step": 282400 }, { "epoch": 1.0749221622526892, "grad_norm": 0.16827578842639923, "learning_rate": 0.00019442676818804956, "loss": 2.0218, "step": 282410 }, { "epoch": 1.0749602247208119, "grad_norm": 0.1305459439754486, "learning_rate": 0.00019435859791797406, "loss": 2.0085, "step": 282420 }, { "epoch": 1.0749982871889345, "grad_norm": 0.17050224542617798, "learning_rate": 0.00019429044284920805, "loss": 2.0145, "step": 282430 }, { "epoch": 1.0750363496570572, "grad_norm": 0.15739241242408752, "learning_rate": 0.0001942223029715869, "loss": 2.0225, "step": 282440 }, { "epoch": 1.0750744121251798, "grad_norm": 0.15171968936920166, "learning_rate": 0.00019415417827495717, "loss": 2.0333, "step": 282450 }, { "epoch": 1.0751124745933025, "grad_norm": 0.1353382021188736, "learning_rate": 0.0001940860687491769, "loss": 2.0259, "step": 282460 }, { "epoch": 1.0751505370614252, "grad_norm": 0.15228162705898285, "learning_rate": 0.00019401797438411516, "loss": 2.0067, "step": 282470 }, { "epoch": 1.0751885995295478, "grad_norm": 0.17352250218391418, "learning_rate": 0.00019394989516965255, "loss": 2.0035, "step": 282480 }, { "epoch": 1.0752266619976705, "grad_norm": 0.1305777132511139, "learning_rate": 0.00019388183109568074, "loss": 2.0188, "step": 282490 }, { "epoch": 1.0752647244657934, "grad_norm": 0.1270778477191925, "learning_rate": 0.00019381378215210278, "loss": 2.0177, "step": 282500 }, { "epoch": 1.075302786933916, "grad_norm": 0.1552201211452484, "learning_rate": 0.00019374574832883273, "loss": 2.0108, "step": 282510 }, { "epoch": 1.0753408494020387, "grad_norm": 0.1422547698020935, "learning_rate": 0.00019367772961579604, "loss": 2.0139, "step": 282520 }, { "epoch": 1.0753789118701613, "grad_norm": 0.18762005865573883, "learning_rate": 0.00019360972600292943, "loss": 2.0168, "step": 282530 }, { "epoch": 1.075416974338284, "grad_norm": 0.17483144998550415, "learning_rate": 0.0001935417374801804, "loss": 2.0074, "step": 282540 }, { "epoch": 1.0754550368064066, "grad_norm": 0.14014564454555511, "learning_rate": 0.00019347376403750798, "loss": 2.0011, "step": 282550 }, { "epoch": 1.0754930992745293, "grad_norm": 0.14157618582248688, "learning_rate": 0.0001934058056648822, "loss": 2.0045, "step": 282560 }, { "epoch": 1.075531161742652, "grad_norm": 0.17108173668384552, "learning_rate": 0.00019333786235228406, "loss": 2.0105, "step": 282570 }, { "epoch": 1.0755692242107746, "grad_norm": 0.1399330347776413, "learning_rate": 0.00019326993408970596, "loss": 2.0253, "step": 282580 }, { "epoch": 1.0756072866788975, "grad_norm": 0.2349340170621872, "learning_rate": 0.00019320202086715112, "loss": 2.0085, "step": 282590 }, { "epoch": 1.0756453491470201, "grad_norm": 0.20484134554862976, "learning_rate": 0.00019313412267463388, "loss": 1.9964, "step": 282600 }, { "epoch": 1.0756834116151428, "grad_norm": 0.1534414142370224, "learning_rate": 0.00019306623950217966, "loss": 2.0127, "step": 282610 }, { "epoch": 1.0757214740832655, "grad_norm": 0.15662942826747894, "learning_rate": 0.00019299837133982496, "loss": 2.0064, "step": 282620 }, { "epoch": 1.0757595365513881, "grad_norm": 0.16546347737312317, "learning_rate": 0.00019293051817761725, "loss": 2.0043, "step": 282630 }, { "epoch": 1.0757975990195108, "grad_norm": 0.16139911115169525, "learning_rate": 0.00019286268000561486, "loss": 2.0198, "step": 282640 }, { "epoch": 1.0758356614876334, "grad_norm": 0.16876624524593353, "learning_rate": 0.00019279485681388732, "loss": 2.0223, "step": 282650 }, { "epoch": 1.075873723955756, "grad_norm": 0.15508447587490082, "learning_rate": 0.00019272704859251495, "loss": 2.0048, "step": 282660 }, { "epoch": 1.075911786423879, "grad_norm": 0.16199934482574463, "learning_rate": 0.00019265925533158912, "loss": 2.012, "step": 282670 }, { "epoch": 1.0759498488920016, "grad_norm": 0.1381034553050995, "learning_rate": 0.00019259147702121204, "loss": 1.9981, "step": 282680 }, { "epoch": 1.0759879113601243, "grad_norm": 0.18051622807979584, "learning_rate": 0.0001925237136514969, "loss": 2.0134, "step": 282690 }, { "epoch": 1.076025973828247, "grad_norm": 0.1292513757944107, "learning_rate": 0.00019245596521256776, "loss": 2.0076, "step": 282700 }, { "epoch": 1.0760640362963696, "grad_norm": 0.14846284687519073, "learning_rate": 0.0001923882316945595, "loss": 2.017, "step": 282710 }, { "epoch": 1.0761020987644923, "grad_norm": 0.15135519206523895, "learning_rate": 0.000192320513087618, "loss": 2.0073, "step": 282720 }, { "epoch": 1.076140161232615, "grad_norm": 0.15053410828113556, "learning_rate": 0.00019225280938189976, "loss": 2.0245, "step": 282730 }, { "epoch": 1.0761782237007376, "grad_norm": 0.1461486518383026, "learning_rate": 0.0001921851205675723, "loss": 2.0046, "step": 282740 }, { "epoch": 1.0762162861688602, "grad_norm": 0.15739676356315613, "learning_rate": 0.0001921174466348139, "loss": 2.0302, "step": 282750 }, { "epoch": 1.076254348636983, "grad_norm": 0.14662247896194458, "learning_rate": 0.00019204978757381358, "loss": 2.0034, "step": 282760 }, { "epoch": 1.0762924111051058, "grad_norm": 0.15804801881313324, "learning_rate": 0.00019198214337477122, "loss": 2.0074, "step": 282770 }, { "epoch": 1.0763304735732284, "grad_norm": 0.15608061850070953, "learning_rate": 0.00019191451402789735, "loss": 2.006, "step": 282780 }, { "epoch": 1.076368536041351, "grad_norm": 0.13967221975326538, "learning_rate": 0.00019184689952341327, "loss": 2.0196, "step": 282790 }, { "epoch": 1.0764065985094737, "grad_norm": 0.16726204752922058, "learning_rate": 0.0001917792998515512, "loss": 2.0119, "step": 282800 }, { "epoch": 1.0764446609775964, "grad_norm": 0.1628492772579193, "learning_rate": 0.00019171171500255375, "loss": 2.0018, "step": 282810 }, { "epoch": 1.076482723445719, "grad_norm": 0.14668413996696472, "learning_rate": 0.00019164414496667436, "loss": 2.0048, "step": 282820 }, { "epoch": 1.0765207859138417, "grad_norm": 0.13558825850486755, "learning_rate": 0.0001915765897341773, "loss": 2.0183, "step": 282830 }, { "epoch": 1.0765588483819646, "grad_norm": 0.14365750551223755, "learning_rate": 0.00019150904929533723, "loss": 2.0051, "step": 282840 }, { "epoch": 1.0765969108500872, "grad_norm": 0.14168040454387665, "learning_rate": 0.0001914415236404397, "loss": 2.0082, "step": 282850 }, { "epoch": 1.07663497331821, "grad_norm": 0.14769423007965088, "learning_rate": 0.00019137401275978071, "loss": 2.0199, "step": 282860 }, { "epoch": 1.0766730357863326, "grad_norm": 0.1618385761976242, "learning_rate": 0.00019130651664366695, "loss": 2.01, "step": 282870 }, { "epoch": 1.0767110982544552, "grad_norm": 0.15950889885425568, "learning_rate": 0.00019123903528241572, "loss": 1.999, "step": 282880 }, { "epoch": 1.0767491607225779, "grad_norm": 0.14730483293533325, "learning_rate": 0.00019117156866635483, "loss": 2.0012, "step": 282890 }, { "epoch": 1.0767872231907005, "grad_norm": 0.14688549935817719, "learning_rate": 0.00019110411678582268, "loss": 2.0245, "step": 282900 }, { "epoch": 1.0768252856588232, "grad_norm": 0.1415330469608307, "learning_rate": 0.00019103667963116827, "loss": 2.0161, "step": 282910 }, { "epoch": 1.0768633481269458, "grad_norm": 0.13632331788539886, "learning_rate": 0.00019096925719275115, "loss": 2.0101, "step": 282920 }, { "epoch": 1.0769014105950687, "grad_norm": 0.19452516734600067, "learning_rate": 0.0001909018494609412, "loss": 2.0235, "step": 282930 }, { "epoch": 1.0769394730631914, "grad_norm": 0.17027851939201355, "learning_rate": 0.00019083445642611896, "loss": 2.0223, "step": 282940 }, { "epoch": 1.076977535531314, "grad_norm": 0.1464688926935196, "learning_rate": 0.00019076707807867544, "loss": 2.0188, "step": 282950 }, { "epoch": 1.0770155979994367, "grad_norm": 0.16679330170154572, "learning_rate": 0.00019069971440901213, "loss": 2.0091, "step": 282960 }, { "epoch": 1.0770536604675593, "grad_norm": 0.13822433352470398, "learning_rate": 0.0001906323654075408, "loss": 2.0132, "step": 282970 }, { "epoch": 1.077091722935682, "grad_norm": 0.15693537890911102, "learning_rate": 0.000190565031064684, "loss": 2.0065, "step": 282980 }, { "epoch": 1.0771297854038047, "grad_norm": 0.13129134476184845, "learning_rate": 0.00019049771137087424, "loss": 1.9964, "step": 282990 }, { "epoch": 1.0771678478719273, "grad_norm": 0.1458442658185959, "learning_rate": 0.00019043040631655484, "loss": 2.0102, "step": 283000 }, { "epoch": 1.0772059103400502, "grad_norm": 0.14534065127372742, "learning_rate": 0.0001903631158921793, "loss": 2.0007, "step": 283010 }, { "epoch": 1.0772439728081729, "grad_norm": 0.1374642699956894, "learning_rate": 0.0001902958400882115, "loss": 2.0108, "step": 283020 }, { "epoch": 1.0772820352762955, "grad_norm": 0.1382344365119934, "learning_rate": 0.00019022857889512573, "loss": 2.0193, "step": 283030 }, { "epoch": 1.0773200977444182, "grad_norm": 0.14922475814819336, "learning_rate": 0.00019016133230340666, "loss": 2.0249, "step": 283040 }, { "epoch": 1.0773581602125408, "grad_norm": 0.16759616136550903, "learning_rate": 0.00019009410030354913, "loss": 2.0091, "step": 283050 }, { "epoch": 1.0773962226806635, "grad_norm": 0.13289323449134827, "learning_rate": 0.0001900268828860584, "loss": 2.0075, "step": 283060 }, { "epoch": 1.0774342851487861, "grad_norm": 0.13219304382801056, "learning_rate": 0.0001899596800414501, "loss": 2.0166, "step": 283070 }, { "epoch": 1.0774723476169088, "grad_norm": 0.1477508693933487, "learning_rate": 0.00018989249176024992, "loss": 2.012, "step": 283080 }, { "epoch": 1.0775104100850315, "grad_norm": 0.17055627703666687, "learning_rate": 0.00018982531803299398, "loss": 1.992, "step": 283090 }, { "epoch": 1.0775484725531543, "grad_norm": 0.16017602384090424, "learning_rate": 0.00018975815885022857, "loss": 2.0025, "step": 283100 }, { "epoch": 1.077586535021277, "grad_norm": 0.1373511552810669, "learning_rate": 0.00018969101420251029, "loss": 1.9972, "step": 283110 }, { "epoch": 1.0776245974893996, "grad_norm": 0.13665539026260376, "learning_rate": 0.00018962388408040587, "loss": 1.993, "step": 283120 }, { "epoch": 1.0776626599575223, "grad_norm": 0.1449371576309204, "learning_rate": 0.00018955676847449228, "loss": 2.0158, "step": 283130 }, { "epoch": 1.077700722425645, "grad_norm": 0.16415664553642273, "learning_rate": 0.0001894896673753566, "loss": 2.0146, "step": 283140 }, { "epoch": 1.0777387848937676, "grad_norm": 0.16769972443580627, "learning_rate": 0.0001894225807735963, "loss": 2.0202, "step": 283150 }, { "epoch": 1.0777768473618903, "grad_norm": 0.15793916583061218, "learning_rate": 0.00018935550865981866, "loss": 2.0132, "step": 283160 }, { "epoch": 1.077814909830013, "grad_norm": 0.1298457831144333, "learning_rate": 0.00018928845102464143, "loss": 2.0068, "step": 283170 }, { "epoch": 1.0778529722981358, "grad_norm": 0.13816504180431366, "learning_rate": 0.00018922140785869224, "loss": 2.0034, "step": 283180 }, { "epoch": 1.0778910347662585, "grad_norm": 0.137596994638443, "learning_rate": 0.0001891543791526089, "loss": 2.0118, "step": 283190 }, { "epoch": 1.0779290972343811, "grad_norm": 0.166671484708786, "learning_rate": 0.00018908736489703948, "loss": 2.0186, "step": 283200 }, { "epoch": 1.0779671597025038, "grad_norm": 0.1365715116262436, "learning_rate": 0.0001890203650826419, "loss": 2.0167, "step": 283210 }, { "epoch": 1.0780052221706264, "grad_norm": 0.13674066960811615, "learning_rate": 0.0001889533797000842, "loss": 2.0134, "step": 283220 }, { "epoch": 1.078043284638749, "grad_norm": 0.13761483132839203, "learning_rate": 0.00018888640874004448, "loss": 2.0246, "step": 283230 }, { "epoch": 1.0780813471068718, "grad_norm": 0.18757186830043793, "learning_rate": 0.00018881945219321095, "loss": 2.0121, "step": 283240 }, { "epoch": 1.0781194095749944, "grad_norm": 0.2052200734615326, "learning_rate": 0.0001887525100502817, "loss": 2.0133, "step": 283250 }, { "epoch": 1.078157472043117, "grad_norm": 0.14256851375102997, "learning_rate": 0.0001886855823019649, "loss": 2.0108, "step": 283260 }, { "epoch": 1.07819553451124, "grad_norm": 0.13533790409564972, "learning_rate": 0.00018861866893897872, "loss": 2.0102, "step": 283270 }, { "epoch": 1.0782335969793626, "grad_norm": 0.1639997363090515, "learning_rate": 0.00018855176995205124, "loss": 2.0092, "step": 283280 }, { "epoch": 1.0782716594474853, "grad_norm": 0.16645225882530212, "learning_rate": 0.0001884848853319206, "loss": 2.0475, "step": 283290 }, { "epoch": 1.078309721915608, "grad_norm": 0.1505880504846573, "learning_rate": 0.0001884180150693347, "loss": 2.0073, "step": 283300 }, { "epoch": 1.0783477843837306, "grad_norm": 0.15273131430149078, "learning_rate": 0.00018835115915505156, "loss": 2.0154, "step": 283310 }, { "epoch": 1.0783858468518532, "grad_norm": 0.1675952672958374, "learning_rate": 0.000188284317579839, "loss": 2.0151, "step": 283320 }, { "epoch": 1.078423909319976, "grad_norm": 0.14620567858219147, "learning_rate": 0.00018821749033447478, "loss": 2.0074, "step": 283330 }, { "epoch": 1.0784619717880986, "grad_norm": 0.1587236523628235, "learning_rate": 0.0001881506774097465, "loss": 2.0229, "step": 283340 }, { "epoch": 1.0785000342562212, "grad_norm": 0.16149382293224335, "learning_rate": 0.00018808387879645167, "loss": 2.016, "step": 283350 }, { "epoch": 1.078538096724344, "grad_norm": 0.19399163126945496, "learning_rate": 0.0001880170944853976, "loss": 2.0156, "step": 283360 }, { "epoch": 1.0785761591924667, "grad_norm": 0.15907388925552368, "learning_rate": 0.0001879503244674015, "loss": 1.9963, "step": 283370 }, { "epoch": 1.0786142216605894, "grad_norm": 0.14165343344211578, "learning_rate": 0.00018788356873329038, "loss": 2.0276, "step": 283380 }, { "epoch": 1.078652284128712, "grad_norm": 0.1441272646188736, "learning_rate": 0.00018781682727390104, "loss": 2.0069, "step": 283390 }, { "epoch": 1.0786903465968347, "grad_norm": 0.1678933948278427, "learning_rate": 0.00018775010008008008, "loss": 2.0281, "step": 283400 }, { "epoch": 1.0787284090649574, "grad_norm": 0.1660948246717453, "learning_rate": 0.00018768338714268392, "loss": 2.0149, "step": 283410 }, { "epoch": 1.07876647153308, "grad_norm": 0.1400216817855835, "learning_rate": 0.00018761668845257862, "loss": 2.0268, "step": 283420 }, { "epoch": 1.0788045340012027, "grad_norm": 0.15131667256355286, "learning_rate": 0.00018755000400064016, "loss": 2.0161, "step": 283430 }, { "epoch": 1.0788425964693253, "grad_norm": 0.1365083009004593, "learning_rate": 0.00018748333377775406, "loss": 2.0132, "step": 283440 }, { "epoch": 1.0788806589374482, "grad_norm": 0.1457868367433548, "learning_rate": 0.0001874166777748158, "loss": 2.0171, "step": 283450 }, { "epoch": 1.0789187214055709, "grad_norm": 0.1760193258523941, "learning_rate": 0.00018735003598273036, "loss": 2.0208, "step": 283460 }, { "epoch": 1.0789567838736935, "grad_norm": 0.14603100717067719, "learning_rate": 0.00018728340839241247, "loss": 2.0066, "step": 283470 }, { "epoch": 1.0789948463418162, "grad_norm": 0.16044391691684723, "learning_rate": 0.00018721679499478667, "loss": 2.0208, "step": 283480 }, { "epoch": 1.0790329088099389, "grad_norm": 0.1581258922815323, "learning_rate": 0.00018715019578078684, "loss": 2.0036, "step": 283490 }, { "epoch": 1.0790709712780615, "grad_norm": 0.1435764580965042, "learning_rate": 0.00018708361074135687, "loss": 2.0245, "step": 283500 }, { "epoch": 1.0791090337461842, "grad_norm": 0.13865844905376434, "learning_rate": 0.00018701703986745012, "loss": 2.0091, "step": 283510 }, { "epoch": 1.0791470962143068, "grad_norm": 0.1498934030532837, "learning_rate": 0.00018695048315002944, "loss": 2.0307, "step": 283520 }, { "epoch": 1.0791851586824297, "grad_norm": 0.1492132842540741, "learning_rate": 0.00018688394058006757, "loss": 2.0035, "step": 283530 }, { "epoch": 1.0792232211505524, "grad_norm": 0.16242045164108276, "learning_rate": 0.00018681741214854658, "loss": 2.0155, "step": 283540 }, { "epoch": 1.079261283618675, "grad_norm": 0.16527853906154633, "learning_rate": 0.00018675089784645833, "loss": 2.0191, "step": 283550 }, { "epoch": 1.0792993460867977, "grad_norm": 0.178872749209404, "learning_rate": 0.000186684397664804, "loss": 2.0106, "step": 283560 }, { "epoch": 1.0793374085549203, "grad_norm": 0.18619264662265778, "learning_rate": 0.00018661791159459463, "loss": 2.012, "step": 283570 }, { "epoch": 1.079375471023043, "grad_norm": 0.16307014226913452, "learning_rate": 0.00018655143962685038, "loss": 2.0204, "step": 283580 }, { "epoch": 1.0794135334911656, "grad_norm": 0.17658159136772156, "learning_rate": 0.00018648498175260141, "loss": 2.0227, "step": 283590 }, { "epoch": 1.0794515959592883, "grad_norm": 0.14216390252113342, "learning_rate": 0.00018641853796288704, "loss": 1.9927, "step": 283600 }, { "epoch": 1.079489658427411, "grad_norm": 0.1339612454175949, "learning_rate": 0.0001863521082487561, "loss": 2.0187, "step": 283610 }, { "epoch": 1.0795277208955338, "grad_norm": 0.15501002967357635, "learning_rate": 0.0001862856926012671, "loss": 2.0255, "step": 283620 }, { "epoch": 1.0795657833636565, "grad_norm": 0.17483502626419067, "learning_rate": 0.00018621929101148788, "loss": 1.9948, "step": 283630 }, { "epoch": 1.0796038458317792, "grad_norm": 0.1788007915019989, "learning_rate": 0.00018615290347049568, "loss": 2.0124, "step": 283640 }, { "epoch": 1.0796419082999018, "grad_norm": 0.16997750103473663, "learning_rate": 0.0001860865299693773, "loss": 2.013, "step": 283650 }, { "epoch": 1.0796799707680245, "grad_norm": 0.2105778157711029, "learning_rate": 0.00018602017049922887, "loss": 2.0133, "step": 283660 }, { "epoch": 1.0797180332361471, "grad_norm": 0.16479893028736115, "learning_rate": 0.00018595382505115588, "loss": 2.018, "step": 283670 }, { "epoch": 1.0797560957042698, "grad_norm": 0.1657109558582306, "learning_rate": 0.00018588749361627343, "loss": 2.017, "step": 283680 }, { "epoch": 1.0797941581723924, "grad_norm": 0.1969013214111328, "learning_rate": 0.00018582117618570578, "loss": 2.0011, "step": 283690 }, { "epoch": 1.0798322206405153, "grad_norm": 0.13585196435451508, "learning_rate": 0.00018575487275058661, "loss": 2.0124, "step": 283700 }, { "epoch": 1.079870283108638, "grad_norm": 0.13131742179393768, "learning_rate": 0.00018568858330205907, "loss": 2.0013, "step": 283710 }, { "epoch": 1.0799083455767606, "grad_norm": 0.1606842577457428, "learning_rate": 0.00018562230783127548, "loss": 2.0115, "step": 283720 }, { "epoch": 1.0799464080448833, "grad_norm": 0.14883503317832947, "learning_rate": 0.00018555604632939748, "loss": 2.0099, "step": 283730 }, { "epoch": 1.079984470513006, "grad_norm": 0.1689402461051941, "learning_rate": 0.00018548979878759632, "loss": 2.0121, "step": 283740 }, { "epoch": 1.0800225329811286, "grad_norm": 0.16839559376239777, "learning_rate": 0.0001854235651970521, "loss": 2.01, "step": 283750 }, { "epoch": 1.0800605954492513, "grad_norm": 0.1578279286623001, "learning_rate": 0.00018535734554895456, "loss": 2.0087, "step": 283760 }, { "epoch": 1.080098657917374, "grad_norm": 0.14088387787342072, "learning_rate": 0.00018529113983450246, "loss": 1.9997, "step": 283770 }, { "epoch": 1.0801367203854966, "grad_norm": 0.13390129804611206, "learning_rate": 0.00018522494804490408, "loss": 2.0186, "step": 283780 }, { "epoch": 1.0801747828536195, "grad_norm": 0.13913902640342712, "learning_rate": 0.00018515877017137667, "loss": 2.0088, "step": 283790 }, { "epoch": 1.080212845321742, "grad_norm": 0.16706794500350952, "learning_rate": 0.00018509260620514696, "loss": 1.996, "step": 283800 }, { "epoch": 1.0802509077898648, "grad_norm": 0.13749095797538757, "learning_rate": 0.00018502645613745062, "loss": 2.0122, "step": 283810 }, { "epoch": 1.0802889702579874, "grad_norm": 0.1544412076473236, "learning_rate": 0.0001849603199595327, "loss": 2.0125, "step": 283820 }, { "epoch": 1.08032703272611, "grad_norm": 0.18350383639335632, "learning_rate": 0.0001848941976626475, "loss": 2.0126, "step": 283830 }, { "epoch": 1.0803650951942327, "grad_norm": 0.1641099452972412, "learning_rate": 0.00018482808923805832, "loss": 2.0051, "step": 283840 }, { "epoch": 1.0804031576623554, "grad_norm": 0.1506098061800003, "learning_rate": 0.0001847619946770377, "loss": 2.0235, "step": 283850 }, { "epoch": 1.080441220130478, "grad_norm": 0.13227491080760956, "learning_rate": 0.00018469591397086738, "loss": 1.9968, "step": 283860 }, { "epoch": 1.080479282598601, "grad_norm": 0.14088758826255798, "learning_rate": 0.00018462984711083814, "loss": 2.0032, "step": 283870 }, { "epoch": 1.0805173450667236, "grad_norm": 0.1535644233226776, "learning_rate": 0.0001845637940882499, "loss": 2.0074, "step": 283880 }, { "epoch": 1.0805554075348462, "grad_norm": 0.13675335049629211, "learning_rate": 0.00018449775489441178, "loss": 2.0214, "step": 283890 }, { "epoch": 1.080593470002969, "grad_norm": 0.14348675310611725, "learning_rate": 0.00018443172952064185, "loss": 2.0205, "step": 283900 }, { "epoch": 1.0806315324710916, "grad_norm": 0.1493082195520401, "learning_rate": 0.0001843657179582674, "loss": 2.0074, "step": 283910 }, { "epoch": 1.0806695949392142, "grad_norm": 0.13401271402835846, "learning_rate": 0.0001842997201986247, "loss": 2.0021, "step": 283920 }, { "epoch": 1.0807076574073369, "grad_norm": 0.15331800282001495, "learning_rate": 0.00018423373623305907, "loss": 2.0022, "step": 283930 }, { "epoch": 1.0807457198754595, "grad_norm": 0.13529661297798157, "learning_rate": 0.00018416776605292484, "loss": 2.0236, "step": 283940 }, { "epoch": 1.0807837823435822, "grad_norm": 0.14704905450344086, "learning_rate": 0.00018410180964958557, "loss": 1.9967, "step": 283950 }, { "epoch": 1.080821844811705, "grad_norm": 0.15539222955703735, "learning_rate": 0.00018403586701441361, "loss": 2.0291, "step": 283960 }, { "epoch": 1.0808599072798277, "grad_norm": 0.14663167297840118, "learning_rate": 0.0001839699381387903, "loss": 2.0175, "step": 283970 }, { "epoch": 1.0808979697479504, "grad_norm": 0.18007595837116241, "learning_rate": 0.00018390402301410625, "loss": 1.9998, "step": 283980 }, { "epoch": 1.080936032216073, "grad_norm": 0.17619404196739197, "learning_rate": 0.00018383812163176073, "loss": 2.0131, "step": 283990 }, { "epoch": 1.0809740946841957, "grad_norm": 0.14554846286773682, "learning_rate": 0.00018377223398316207, "loss": 2.0071, "step": 284000 }, { "epoch": 1.0810121571523184, "grad_norm": 0.13671885430812836, "learning_rate": 0.00018370636005972762, "loss": 2.024, "step": 284010 }, { "epoch": 1.081050219620441, "grad_norm": 0.15811777114868164, "learning_rate": 0.0001836404998528836, "loss": 2.0026, "step": 284020 }, { "epoch": 1.0810882820885637, "grad_norm": 0.16493038833141327, "learning_rate": 0.0001835746533540652, "loss": 2.0035, "step": 284030 }, { "epoch": 1.0811263445566865, "grad_norm": 0.1355385184288025, "learning_rate": 0.00018350882055471646, "loss": 2.0125, "step": 284040 }, { "epoch": 1.0811644070248092, "grad_norm": 0.1493157148361206, "learning_rate": 0.00018344300144629034, "loss": 2.0083, "step": 284050 }, { "epoch": 1.0812024694929319, "grad_norm": 0.1507268100976944, "learning_rate": 0.00018337719602024873, "loss": 2.0161, "step": 284060 }, { "epoch": 1.0812405319610545, "grad_norm": 0.1359802782535553, "learning_rate": 0.0001833114042680623, "loss": 2.0057, "step": 284070 }, { "epoch": 1.0812785944291772, "grad_norm": 0.17290131747722626, "learning_rate": 0.00018324562618121065, "loss": 1.9976, "step": 284080 }, { "epoch": 1.0813166568972998, "grad_norm": 0.17614369094371796, "learning_rate": 0.00018317986175118224, "loss": 2.0057, "step": 284090 }, { "epoch": 1.0813547193654225, "grad_norm": 0.14882458746433258, "learning_rate": 0.0001831141109694743, "loss": 2.005, "step": 284100 }, { "epoch": 1.0813927818335451, "grad_norm": 0.1476394385099411, "learning_rate": 0.00018304837382759288, "loss": 2.0242, "step": 284110 }, { "epoch": 1.0814308443016678, "grad_norm": 0.16538527607917786, "learning_rate": 0.00018298265031705286, "loss": 1.9985, "step": 284120 }, { "epoch": 1.0814689067697907, "grad_norm": 0.13338564336299896, "learning_rate": 0.00018291694042937796, "loss": 2.0136, "step": 284130 }, { "epoch": 1.0815069692379133, "grad_norm": 0.13948379456996918, "learning_rate": 0.0001828512441561006, "loss": 2.0101, "step": 284140 }, { "epoch": 1.081545031706036, "grad_norm": 0.14546875655651093, "learning_rate": 0.00018278556148876196, "loss": 2.0074, "step": 284150 }, { "epoch": 1.0815830941741587, "grad_norm": 0.17852666974067688, "learning_rate": 0.00018271989241891218, "loss": 1.9957, "step": 284160 }, { "epoch": 1.0816211566422813, "grad_norm": 0.16738300025463104, "learning_rate": 0.00018265423693810984, "loss": 2.0249, "step": 284170 }, { "epoch": 1.081659219110404, "grad_norm": 0.1647614687681198, "learning_rate": 0.0001825885950379224, "loss": 2.0112, "step": 284180 }, { "epoch": 1.0816972815785266, "grad_norm": 0.1584128588438034, "learning_rate": 0.00018252296670992612, "loss": 2.0144, "step": 284190 }, { "epoch": 1.0817353440466493, "grad_norm": 0.173493891954422, "learning_rate": 0.00018245735194570583, "loss": 2.0134, "step": 284200 }, { "epoch": 1.081773406514772, "grad_norm": 0.15178415179252625, "learning_rate": 0.00018239175073685504, "loss": 2.0165, "step": 284210 }, { "epoch": 1.0818114689828948, "grad_norm": 0.14960837364196777, "learning_rate": 0.00018232616307497612, "loss": 2.0251, "step": 284220 }, { "epoch": 1.0818495314510175, "grad_norm": 0.13643042743206024, "learning_rate": 0.0001822605889516799, "loss": 1.9996, "step": 284230 }, { "epoch": 1.0818875939191401, "grad_norm": 0.15679650008678436, "learning_rate": 0.00018219502835858592, "loss": 2.0078, "step": 284240 }, { "epoch": 1.0819256563872628, "grad_norm": 0.15052969753742218, "learning_rate": 0.00018212948128732247, "loss": 2.0, "step": 284250 }, { "epoch": 1.0819637188553854, "grad_norm": 0.13190500438213348, "learning_rate": 0.0001820639477295264, "loss": 2.0053, "step": 284260 }, { "epoch": 1.082001781323508, "grad_norm": 0.15521852672100067, "learning_rate": 0.00018199842767684305, "loss": 2.0003, "step": 284270 }, { "epoch": 1.0820398437916308, "grad_norm": 0.15261310338974, "learning_rate": 0.00018193292112092662, "loss": 2.0115, "step": 284280 }, { "epoch": 1.0820779062597534, "grad_norm": 0.14686596393585205, "learning_rate": 0.00018186742805343975, "loss": 2.0122, "step": 284290 }, { "epoch": 1.082115968727876, "grad_norm": 0.1680731326341629, "learning_rate": 0.00018180194846605362, "loss": 2.0055, "step": 284300 }, { "epoch": 1.082154031195999, "grad_norm": 0.2047916054725647, "learning_rate": 0.00018173648235044805, "loss": 2.0156, "step": 284310 }, { "epoch": 1.0821920936641216, "grad_norm": 0.14100709557533264, "learning_rate": 0.00018167102969831139, "loss": 1.997, "step": 284320 }, { "epoch": 1.0822301561322443, "grad_norm": 0.17565107345581055, "learning_rate": 0.00018160559050134064, "loss": 2.0085, "step": 284330 }, { "epoch": 1.082268218600367, "grad_norm": 0.1566193699836731, "learning_rate": 0.00018154016475124108, "loss": 2.032, "step": 284340 }, { "epoch": 1.0823062810684896, "grad_norm": 0.1979827731847763, "learning_rate": 0.00018147475243972678, "loss": 2.0144, "step": 284350 }, { "epoch": 1.0823443435366122, "grad_norm": 0.14324718713760376, "learning_rate": 0.00018140935355852019, "loss": 2.003, "step": 284360 }, { "epoch": 1.082382406004735, "grad_norm": 0.1722949594259262, "learning_rate": 0.00018134396809935222, "loss": 2.0078, "step": 284370 }, { "epoch": 1.0824204684728576, "grad_norm": 0.13863638043403625, "learning_rate": 0.00018127859605396229, "loss": 2.0092, "step": 284380 }, { "epoch": 1.0824585309409804, "grad_norm": 0.12698815762996674, "learning_rate": 0.0001812132374140984, "loss": 2.0169, "step": 284390 }, { "epoch": 1.082496593409103, "grad_norm": 0.15930671989917755, "learning_rate": 0.00018114789217151683, "loss": 2.0116, "step": 284400 }, { "epoch": 1.0825346558772257, "grad_norm": 0.15124854445457458, "learning_rate": 0.0001810825603179824, "loss": 2.0018, "step": 284410 }, { "epoch": 1.0825727183453484, "grad_norm": 0.1586410254240036, "learning_rate": 0.0001810172418452684, "loss": 2.0096, "step": 284420 }, { "epoch": 1.082610780813471, "grad_norm": 0.1424012929201126, "learning_rate": 0.00018095193674515643, "loss": 1.9924, "step": 284430 }, { "epoch": 1.0826488432815937, "grad_norm": 0.15181034803390503, "learning_rate": 0.00018088664500943662, "loss": 2.0041, "step": 284440 }, { "epoch": 1.0826869057497164, "grad_norm": 0.15156134963035583, "learning_rate": 0.00018082136662990738, "loss": 2.0034, "step": 284450 }, { "epoch": 1.082724968217839, "grad_norm": 0.14133992791175842, "learning_rate": 0.00018075610159837564, "loss": 1.9996, "step": 284460 }, { "epoch": 1.0827630306859617, "grad_norm": 0.14046622812747955, "learning_rate": 0.00018069084990665656, "loss": 1.998, "step": 284470 }, { "epoch": 1.0828010931540846, "grad_norm": 0.17330563068389893, "learning_rate": 0.00018062561154657376, "loss": 2.0016, "step": 284480 }, { "epoch": 1.0828391556222072, "grad_norm": 0.16119085252285004, "learning_rate": 0.0001805603865099592, "loss": 2.0187, "step": 284490 }, { "epoch": 1.0828772180903299, "grad_norm": 0.16322733461856842, "learning_rate": 0.00018049517478865311, "loss": 1.998, "step": 284500 }, { "epoch": 1.0829152805584525, "grad_norm": 0.14511479437351227, "learning_rate": 0.00018042997637450416, "loss": 2.0181, "step": 284510 }, { "epoch": 1.0829533430265752, "grad_norm": 0.14829565584659576, "learning_rate": 0.0001803647912593691, "loss": 2.0053, "step": 284520 }, { "epoch": 1.0829914054946979, "grad_norm": 0.1813560426235199, "learning_rate": 0.00018029961943511342, "loss": 2.0052, "step": 284530 }, { "epoch": 1.0830294679628205, "grad_norm": 0.13126425445079803, "learning_rate": 0.0001802344608936104, "loss": 2.0076, "step": 284540 }, { "epoch": 1.0830675304309432, "grad_norm": 0.16517986357212067, "learning_rate": 0.00018016931562674188, "loss": 2.0101, "step": 284550 }, { "epoch": 1.083105592899066, "grad_norm": 0.1345342993736267, "learning_rate": 0.0001801041836263979, "loss": 2.005, "step": 284560 }, { "epoch": 1.0831436553671887, "grad_norm": 0.13746510446071625, "learning_rate": 0.00018003906488447692, "loss": 2.0143, "step": 284570 }, { "epoch": 1.0831817178353114, "grad_norm": 0.14292031526565552, "learning_rate": 0.00017997395939288525, "loss": 2.0117, "step": 284580 }, { "epoch": 1.083219780303434, "grad_norm": 0.15923810005187988, "learning_rate": 0.00017990886714353783, "loss": 2.0153, "step": 284590 }, { "epoch": 1.0832578427715567, "grad_norm": 0.14287570118904114, "learning_rate": 0.00017984378812835756, "loss": 2.033, "step": 284600 }, { "epoch": 1.0832959052396793, "grad_norm": 0.1656784862279892, "learning_rate": 0.00017977872233927573, "loss": 2.0002, "step": 284610 }, { "epoch": 1.083333967707802, "grad_norm": 0.13215410709381104, "learning_rate": 0.00017971366976823173, "loss": 2.003, "step": 284620 }, { "epoch": 1.0833720301759246, "grad_norm": 0.16975288093090057, "learning_rate": 0.0001796486304071731, "loss": 2.0143, "step": 284630 }, { "epoch": 1.0834100926440473, "grad_norm": 0.1532881259918213, "learning_rate": 0.0001795836042480556, "loss": 2.0028, "step": 284640 }, { "epoch": 1.0834481551121702, "grad_norm": 0.15794317424297333, "learning_rate": 0.00017951859128284315, "loss": 2.0231, "step": 284650 }, { "epoch": 1.0834862175802928, "grad_norm": 0.14210180938243866, "learning_rate": 0.00017945359150350787, "loss": 2.01, "step": 284660 }, { "epoch": 1.0835242800484155, "grad_norm": 0.20217181742191315, "learning_rate": 0.0001793886049020299, "loss": 2.0126, "step": 284670 }, { "epoch": 1.0835623425165382, "grad_norm": 0.1375533938407898, "learning_rate": 0.00017932363147039765, "loss": 2.0103, "step": 284680 }, { "epoch": 1.0836004049846608, "grad_norm": 0.14529874920845032, "learning_rate": 0.0001792586712006075, "loss": 1.9933, "step": 284690 }, { "epoch": 1.0836384674527835, "grad_norm": 0.14373710751533508, "learning_rate": 0.00017919372408466404, "loss": 2.0093, "step": 284700 }, { "epoch": 1.0836765299209061, "grad_norm": 0.1357426643371582, "learning_rate": 0.00017912879011457988, "loss": 2.0084, "step": 284710 }, { "epoch": 1.0837145923890288, "grad_norm": 0.15259642899036407, "learning_rate": 0.00017906386928237577, "loss": 2.0076, "step": 284720 }, { "epoch": 1.0837526548571517, "grad_norm": 0.15701621770858765, "learning_rate": 0.00017899896158008044, "loss": 2.0148, "step": 284730 }, { "epoch": 1.0837907173252743, "grad_norm": 0.13638730347156525, "learning_rate": 0.00017893406699973086, "loss": 2.0231, "step": 284740 }, { "epoch": 1.083828779793397, "grad_norm": 0.1526368409395218, "learning_rate": 0.00017886918553337177, "loss": 1.9945, "step": 284750 }, { "epoch": 1.0838668422615196, "grad_norm": 0.17583663761615753, "learning_rate": 0.00017880431717305623, "loss": 2.0016, "step": 284760 }, { "epoch": 1.0839049047296423, "grad_norm": 0.13901753723621368, "learning_rate": 0.0001787394619108451, "loss": 1.9962, "step": 284770 }, { "epoch": 1.083942967197765, "grad_norm": 0.14218559861183167, "learning_rate": 0.00017867461973880745, "loss": 2.0059, "step": 284780 }, { "epoch": 1.0839810296658876, "grad_norm": 0.15195435285568237, "learning_rate": 0.00017860979064902012, "loss": 1.9785, "step": 284790 }, { "epoch": 1.0840190921340103, "grad_norm": 0.1548282653093338, "learning_rate": 0.00017854497463356818, "loss": 1.9966, "step": 284800 }, { "epoch": 1.084057154602133, "grad_norm": 0.16204087436199188, "learning_rate": 0.0001784801716845445, "loss": 1.9974, "step": 284810 }, { "epoch": 1.0840952170702558, "grad_norm": 0.1456010788679123, "learning_rate": 0.00017841538179404993, "loss": 2.0028, "step": 284820 }, { "epoch": 1.0841332795383785, "grad_norm": 0.17768023908138275, "learning_rate": 0.00017835060495419343, "loss": 2.0119, "step": 284830 }, { "epoch": 1.0841713420065011, "grad_norm": 0.15545113384723663, "learning_rate": 0.0001782858411570918, "loss": 2.0115, "step": 284840 }, { "epoch": 1.0842094044746238, "grad_norm": 0.16330191493034363, "learning_rate": 0.00017822109039486962, "loss": 2.0031, "step": 284850 }, { "epoch": 1.0842474669427464, "grad_norm": 0.14554713666439056, "learning_rate": 0.00017815635265965967, "loss": 2.0208, "step": 284860 }, { "epoch": 1.084285529410869, "grad_norm": 0.13749483227729797, "learning_rate": 0.00017809162794360257, "loss": 1.9935, "step": 284870 }, { "epoch": 1.0843235918789917, "grad_norm": 0.15069235861301422, "learning_rate": 0.00017802691623884664, "loss": 2.002, "step": 284880 }, { "epoch": 1.0843616543471144, "grad_norm": 0.13793307542800903, "learning_rate": 0.00017796221753754837, "loss": 2.0199, "step": 284890 }, { "epoch": 1.0843997168152373, "grad_norm": 0.14687716960906982, "learning_rate": 0.00017789753183187186, "loss": 2.0139, "step": 284900 }, { "epoch": 1.08443777928336, "grad_norm": 0.15696461498737335, "learning_rate": 0.0001778328591139893, "loss": 2.0064, "step": 284910 }, { "epoch": 1.0844758417514826, "grad_norm": 0.21080505847930908, "learning_rate": 0.00017776819937608064, "loss": 2.0004, "step": 284920 }, { "epoch": 1.0845139042196053, "grad_norm": 0.13712690770626068, "learning_rate": 0.0001777035526103336, "loss": 2.0056, "step": 284930 }, { "epoch": 1.084551966687728, "grad_norm": 0.13887648284435272, "learning_rate": 0.0001776389188089439, "loss": 2.004, "step": 284940 }, { "epoch": 1.0845900291558506, "grad_norm": 0.13740429282188416, "learning_rate": 0.00017757429796411495, "loss": 2.0064, "step": 284950 }, { "epoch": 1.0846280916239732, "grad_norm": 0.1832609623670578, "learning_rate": 0.00017750969006805802, "loss": 2.0065, "step": 284960 }, { "epoch": 1.0846661540920959, "grad_norm": 0.160702183842659, "learning_rate": 0.00017744509511299217, "loss": 2.015, "step": 284970 }, { "epoch": 1.0847042165602185, "grad_norm": 0.15266917645931244, "learning_rate": 0.0001773805130911443, "loss": 2.0148, "step": 284980 }, { "epoch": 1.0847422790283414, "grad_norm": 0.16672642529010773, "learning_rate": 0.00017731594399474893, "loss": 2.0083, "step": 284990 }, { "epoch": 1.084780341496464, "grad_norm": 0.16822563111782074, "learning_rate": 0.0001772513878160486, "loss": 2.0062, "step": 285000 }, { "epoch": 1.0848184039645867, "grad_norm": 0.140736386179924, "learning_rate": 0.0001771868445472934, "loss": 2.0132, "step": 285010 }, { "epoch": 1.0848564664327094, "grad_norm": 0.15960873663425446, "learning_rate": 0.0001771223141807412, "loss": 2.007, "step": 285020 }, { "epoch": 1.084894528900832, "grad_norm": 0.1767164021730423, "learning_rate": 0.00017705779670865767, "loss": 2.0038, "step": 285030 }, { "epoch": 1.0849325913689547, "grad_norm": 0.15160715579986572, "learning_rate": 0.00017699329212331622, "loss": 2.0039, "step": 285040 }, { "epoch": 1.0849706538370774, "grad_norm": 0.151358962059021, "learning_rate": 0.00017692880041699787, "loss": 1.9956, "step": 285050 }, { "epoch": 1.0850087163052, "grad_norm": 0.13658073544502258, "learning_rate": 0.0001768643215819914, "loss": 2.0185, "step": 285060 }, { "epoch": 1.085046778773323, "grad_norm": 0.15917928516864777, "learning_rate": 0.0001767998556105933, "loss": 2.0128, "step": 285070 }, { "epoch": 1.0850848412414456, "grad_norm": 0.1380550116300583, "learning_rate": 0.0001767354024951077, "loss": 2.016, "step": 285080 }, { "epoch": 1.0851229037095682, "grad_norm": 0.17589768767356873, "learning_rate": 0.0001766709622278465, "loss": 2.0192, "step": 285090 }, { "epoch": 1.0851609661776909, "grad_norm": 0.1453203558921814, "learning_rate": 0.0001766065348011291, "loss": 2.0167, "step": 285100 }, { "epoch": 1.0851990286458135, "grad_norm": 0.13754414021968842, "learning_rate": 0.00017654212020728268, "loss": 2.0006, "step": 285110 }, { "epoch": 1.0852370911139362, "grad_norm": 0.1450946182012558, "learning_rate": 0.0001764777184386419, "loss": 2.0104, "step": 285120 }, { "epoch": 1.0852751535820588, "grad_norm": 0.14516545832157135, "learning_rate": 0.0001764133294875493, "loss": 2.0098, "step": 285130 }, { "epoch": 1.0853132160501815, "grad_norm": 0.15919335186481476, "learning_rate": 0.00017634895334635488, "loss": 2.018, "step": 285140 }, { "epoch": 1.0853512785183042, "grad_norm": 0.1589651107788086, "learning_rate": 0.00017628459000741616, "loss": 1.993, "step": 285150 }, { "epoch": 1.0853893409864268, "grad_norm": 0.1578570306301117, "learning_rate": 0.0001762202394630984, "loss": 2.0113, "step": 285160 }, { "epoch": 1.0854274034545497, "grad_norm": 0.15003244578838348, "learning_rate": 0.0001761559017057745, "loss": 2.0134, "step": 285170 }, { "epoch": 1.0854654659226723, "grad_norm": 0.15741883218288422, "learning_rate": 0.00017609157672782472, "loss": 2.0009, "step": 285180 }, { "epoch": 1.085503528390795, "grad_norm": 0.13433581590652466, "learning_rate": 0.00017602726452163703, "loss": 2.0201, "step": 285190 }, { "epoch": 1.0855415908589177, "grad_norm": 0.16501778364181519, "learning_rate": 0.00017596296507960702, "loss": 2.0033, "step": 285200 }, { "epoch": 1.0855796533270403, "grad_norm": 0.17917238175868988, "learning_rate": 0.0001758986783941376, "loss": 1.9986, "step": 285210 }, { "epoch": 1.085617715795163, "grad_norm": 0.1420595645904541, "learning_rate": 0.00017583440445763937, "loss": 2.0049, "step": 285220 }, { "epoch": 1.0856557782632856, "grad_norm": 0.13732458651065826, "learning_rate": 0.00017577014326253048, "loss": 1.9963, "step": 285230 }, { "epoch": 1.0856938407314083, "grad_norm": 0.15316133201122284, "learning_rate": 0.00017570589480123656, "loss": 2.0055, "step": 285240 }, { "epoch": 1.0857319031995312, "grad_norm": 0.16198821365833282, "learning_rate": 0.00017564165906619063, "loss": 2.0089, "step": 285250 }, { "epoch": 1.0857699656676538, "grad_norm": 0.14189749956130981, "learning_rate": 0.00017557743604983334, "loss": 2.0037, "step": 285260 }, { "epoch": 1.0858080281357765, "grad_norm": 0.1340772807598114, "learning_rate": 0.0001755132257446127, "loss": 2.0069, "step": 285270 }, { "epoch": 1.0858460906038991, "grad_norm": 0.156874418258667, "learning_rate": 0.0001754490281429844, "loss": 2.0035, "step": 285280 }, { "epoch": 1.0858841530720218, "grad_norm": 0.1479601413011551, "learning_rate": 0.00017538484323741137, "loss": 2.0102, "step": 285290 }, { "epoch": 1.0859222155401445, "grad_norm": 0.13866026699543, "learning_rate": 0.00017532067102036402, "loss": 2.004, "step": 285300 }, { "epoch": 1.085960278008267, "grad_norm": 0.14532513916492462, "learning_rate": 0.00017525651148432036, "loss": 1.9924, "step": 285310 }, { "epoch": 1.0859983404763898, "grad_norm": 0.1476632058620453, "learning_rate": 0.00017519236462176568, "loss": 2.0077, "step": 285320 }, { "epoch": 1.0860364029445124, "grad_norm": 0.155150905251503, "learning_rate": 0.00017512823042519271, "loss": 2.0086, "step": 285330 }, { "epoch": 1.0860744654126353, "grad_norm": 0.17510360479354858, "learning_rate": 0.00017506410888710168, "loss": 2.029, "step": 285340 }, { "epoch": 1.086112527880758, "grad_norm": 0.147820383310318, "learning_rate": 0.000175, "loss": 2.0126, "step": 285350 }, { "epoch": 1.0861505903488806, "grad_norm": 0.159368097782135, "learning_rate": 0.00017493590375640273, "loss": 2.0101, "step": 285360 }, { "epoch": 1.0861886528170033, "grad_norm": 0.18122664093971252, "learning_rate": 0.00017487182014883218, "loss": 2.0041, "step": 285370 }, { "epoch": 1.086226715285126, "grad_norm": 0.1660909205675125, "learning_rate": 0.000174807749169818, "loss": 2.0045, "step": 285380 }, { "epoch": 1.0862647777532486, "grad_norm": 0.16297060251235962, "learning_rate": 0.0001747436908118973, "loss": 2.0196, "step": 285390 }, { "epoch": 1.0863028402213712, "grad_norm": 0.1591036319732666, "learning_rate": 0.0001746796450676144, "loss": 2.0259, "step": 285400 }, { "epoch": 1.086340902689494, "grad_norm": 0.1548295021057129, "learning_rate": 0.00017461561192952114, "loss": 2.0177, "step": 285410 }, { "epoch": 1.0863789651576168, "grad_norm": 0.19422496855258942, "learning_rate": 0.00017455159139017644, "loss": 1.9943, "step": 285420 }, { "epoch": 1.0864170276257394, "grad_norm": 0.14384585618972778, "learning_rate": 0.00017448758344214683, "loss": 2.0085, "step": 285430 }, { "epoch": 1.086455090093862, "grad_norm": 0.15218782424926758, "learning_rate": 0.0001744235880780059, "loss": 2.0069, "step": 285440 }, { "epoch": 1.0864931525619848, "grad_norm": 0.14296023547649384, "learning_rate": 0.00017435960529033463, "loss": 2.0121, "step": 285450 }, { "epoch": 1.0865312150301074, "grad_norm": 0.13962121307849884, "learning_rate": 0.0001742956350717213, "loss": 1.999, "step": 285460 }, { "epoch": 1.08656927749823, "grad_norm": 0.15799452364444733, "learning_rate": 0.00017423167741476153, "loss": 2.0169, "step": 285470 }, { "epoch": 1.0866073399663527, "grad_norm": 0.1890258938074112, "learning_rate": 0.000174167732312058, "loss": 2.0067, "step": 285480 }, { "epoch": 1.0866454024344754, "grad_norm": 0.187018021941185, "learning_rate": 0.00017410379975622093, "loss": 2.0143, "step": 285490 }, { "epoch": 1.086683464902598, "grad_norm": 0.1458263248205185, "learning_rate": 0.00017403987973986756, "loss": 1.9961, "step": 285500 }, { "epoch": 1.086721527370721, "grad_norm": 0.15105989575386047, "learning_rate": 0.00017397597225562246, "loss": 2.0032, "step": 285510 }, { "epoch": 1.0867595898388436, "grad_norm": 0.14834915101528168, "learning_rate": 0.00017391207729611737, "loss": 2.0062, "step": 285520 }, { "epoch": 1.0867976523069662, "grad_norm": 0.14579245448112488, "learning_rate": 0.00017384819485399133, "loss": 2.0119, "step": 285530 }, { "epoch": 1.086835714775089, "grad_norm": 0.1407989114522934, "learning_rate": 0.00017378432492189057, "loss": 2.0034, "step": 285540 }, { "epoch": 1.0868737772432115, "grad_norm": 0.1429048627614975, "learning_rate": 0.00017372046749246844, "loss": 2.0049, "step": 285550 }, { "epoch": 1.0869118397113342, "grad_norm": 0.14179645478725433, "learning_rate": 0.0001736566225583856, "loss": 1.9977, "step": 285560 }, { "epoch": 1.0869499021794569, "grad_norm": 0.16286250948905945, "learning_rate": 0.00017359279011230978, "loss": 2.0151, "step": 285570 }, { "epoch": 1.0869879646475795, "grad_norm": 0.1652802675962448, "learning_rate": 0.00017352897014691587, "loss": 2.0158, "step": 285580 }, { "epoch": 1.0870260271157024, "grad_norm": 0.17722772061824799, "learning_rate": 0.0001734651626548861, "loss": 2.0127, "step": 285590 }, { "epoch": 1.087064089583825, "grad_norm": 0.16950948536396027, "learning_rate": 0.00017340136762890958, "loss": 2.0054, "step": 285600 }, { "epoch": 1.0871021520519477, "grad_norm": 0.16562749445438385, "learning_rate": 0.0001733375850616828, "loss": 2.0079, "step": 285610 }, { "epoch": 1.0871402145200704, "grad_norm": 0.16646356880664825, "learning_rate": 0.00017327381494590922, "loss": 2.0163, "step": 285620 }, { "epoch": 1.087178276988193, "grad_norm": 0.17872683703899384, "learning_rate": 0.00017321005727429946, "loss": 2.0024, "step": 285630 }, { "epoch": 1.0872163394563157, "grad_norm": 0.14331817626953125, "learning_rate": 0.00017314631203957126, "loss": 2.0333, "step": 285640 }, { "epoch": 1.0872544019244383, "grad_norm": 0.15764577686786652, "learning_rate": 0.0001730825792344495, "loss": 2.0171, "step": 285650 }, { "epoch": 1.087292464392561, "grad_norm": 0.21209511160850525, "learning_rate": 0.00017301885885166607, "loss": 1.9974, "step": 285660 }, { "epoch": 1.0873305268606837, "grad_norm": 0.14507344365119934, "learning_rate": 0.00017295515088396008, "loss": 2.0294, "step": 285670 }, { "epoch": 1.0873685893288065, "grad_norm": 0.15377765893936157, "learning_rate": 0.00017289145532407746, "loss": 1.9843, "step": 285680 }, { "epoch": 1.0874066517969292, "grad_norm": 0.19551454484462738, "learning_rate": 0.0001728277721647715, "loss": 2.019, "step": 285690 }, { "epoch": 1.0874447142650518, "grad_norm": 0.17279034852981567, "learning_rate": 0.00017276410139880233, "loss": 2.0254, "step": 285700 }, { "epoch": 1.0874827767331745, "grad_norm": 0.14661487936973572, "learning_rate": 0.00017270044301893718, "loss": 2.0132, "step": 285710 }, { "epoch": 1.0875208392012972, "grad_norm": 0.14725978672504425, "learning_rate": 0.00017263679701795033, "loss": 2.0066, "step": 285720 }, { "epoch": 1.0875589016694198, "grad_norm": 0.20444689691066742, "learning_rate": 0.00017257316338862305, "loss": 1.989, "step": 285730 }, { "epoch": 1.0875969641375425, "grad_norm": 0.14883337914943695, "learning_rate": 0.0001725095421237437, "loss": 2.0105, "step": 285740 }, { "epoch": 1.0876350266056651, "grad_norm": 0.1610412299633026, "learning_rate": 0.00017244593321610757, "loss": 2.0308, "step": 285750 }, { "epoch": 1.087673089073788, "grad_norm": 0.17380206286907196, "learning_rate": 0.00017238233665851693, "loss": 2.0085, "step": 285760 }, { "epoch": 1.0877111515419107, "grad_norm": 0.1897568255662918, "learning_rate": 0.00017231875244378114, "loss": 2.0239, "step": 285770 }, { "epoch": 1.0877492140100333, "grad_norm": 0.18813052773475647, "learning_rate": 0.0001722551805647164, "loss": 2.0117, "step": 285780 }, { "epoch": 1.087787276478156, "grad_norm": 0.15918661653995514, "learning_rate": 0.00017219162101414593, "loss": 1.993, "step": 285790 }, { "epoch": 1.0878253389462786, "grad_norm": 0.13849321007728577, "learning_rate": 0.00017212807378489997, "loss": 2.0134, "step": 285800 }, { "epoch": 1.0878634014144013, "grad_norm": 0.1621067076921463, "learning_rate": 0.00017206453886981567, "loss": 2.0181, "step": 285810 }, { "epoch": 1.087901463882524, "grad_norm": 0.17289504408836365, "learning_rate": 0.000172001016261737, "loss": 1.9917, "step": 285820 }, { "epoch": 1.0879395263506466, "grad_norm": 0.1490570455789566, "learning_rate": 0.00017193750595351505, "loss": 2.0093, "step": 285830 }, { "epoch": 1.0879775888187693, "grad_norm": 0.15735618770122528, "learning_rate": 0.00017187400793800767, "loss": 2.0095, "step": 285840 }, { "epoch": 1.0880156512868921, "grad_norm": 0.19365963339805603, "learning_rate": 0.00017181052220807975, "loss": 2.002, "step": 285850 }, { "epoch": 1.0880537137550148, "grad_norm": 0.15690432488918304, "learning_rate": 0.00017174704875660296, "loss": 1.9952, "step": 285860 }, { "epoch": 1.0880917762231375, "grad_norm": 0.17785774171352386, "learning_rate": 0.00017168358757645597, "loss": 1.9854, "step": 285870 }, { "epoch": 1.0881298386912601, "grad_norm": 0.15966349840164185, "learning_rate": 0.00017162013866052424, "loss": 2.0002, "step": 285880 }, { "epoch": 1.0881679011593828, "grad_norm": 0.14687706530094147, "learning_rate": 0.00017155670200170016, "loss": 2.0077, "step": 285890 }, { "epoch": 1.0882059636275054, "grad_norm": 0.15228454768657684, "learning_rate": 0.00017149327759288297, "loss": 2.0044, "step": 285900 }, { "epoch": 1.088244026095628, "grad_norm": 0.16104793548583984, "learning_rate": 0.00017142986542697868, "loss": 2.0053, "step": 285910 }, { "epoch": 1.0882820885637507, "grad_norm": 0.1378631442785263, "learning_rate": 0.00017136646549690033, "loss": 2.0007, "step": 285920 }, { "epoch": 1.0883201510318736, "grad_norm": 0.149201899766922, "learning_rate": 0.00017130307779556763, "loss": 2.0061, "step": 285930 }, { "epoch": 1.0883582134999963, "grad_norm": 0.15267148613929749, "learning_rate": 0.00017123970231590718, "loss": 2.0115, "step": 285940 }, { "epoch": 1.088396275968119, "grad_norm": 0.16789740324020386, "learning_rate": 0.0001711763390508524, "loss": 1.9813, "step": 285950 }, { "epoch": 1.0884343384362416, "grad_norm": 0.1826680302619934, "learning_rate": 0.00017111298799334345, "loss": 2.0106, "step": 285960 }, { "epoch": 1.0884724009043643, "grad_norm": 0.16165274381637573, "learning_rate": 0.00017104964913632737, "loss": 2.006, "step": 285970 }, { "epoch": 1.088510463372487, "grad_norm": 0.15090411901474, "learning_rate": 0.00017098632247275797, "loss": 1.9881, "step": 285980 }, { "epoch": 1.0885485258406096, "grad_norm": 0.15984360873699188, "learning_rate": 0.00017092300799559585, "loss": 2.0049, "step": 285990 }, { "epoch": 1.0885865883087322, "grad_norm": 0.16495366394519806, "learning_rate": 0.00017085970569780833, "loss": 2.01, "step": 286000 }, { "epoch": 1.0886246507768549, "grad_norm": 0.1553267389535904, "learning_rate": 0.0001707964155723696, "loss": 2.0111, "step": 286010 }, { "epoch": 1.0886627132449775, "grad_norm": 0.14737053215503693, "learning_rate": 0.00017073313761226038, "loss": 2.0051, "step": 286020 }, { "epoch": 1.0887007757131004, "grad_norm": 0.19088530540466309, "learning_rate": 0.00017066987181046838, "loss": 2.0076, "step": 286030 }, { "epoch": 1.088738838181223, "grad_norm": 0.14853475987911224, "learning_rate": 0.0001706066181599879, "loss": 2.0169, "step": 286040 }, { "epoch": 1.0887769006493457, "grad_norm": 0.22443360090255737, "learning_rate": 0.00017054337665382008, "loss": 2.0177, "step": 286050 }, { "epoch": 1.0888149631174684, "grad_norm": 0.16358278691768646, "learning_rate": 0.00017048014728497264, "loss": 2.0092, "step": 286060 }, { "epoch": 1.088853025585591, "grad_norm": 0.14100675284862518, "learning_rate": 0.0001704169300464601, "loss": 2.0094, "step": 286070 }, { "epoch": 1.0888910880537137, "grad_norm": 0.14958365261554718, "learning_rate": 0.00017035372493130359, "loss": 1.9926, "step": 286080 }, { "epoch": 1.0889291505218364, "grad_norm": 0.14957799017429352, "learning_rate": 0.00017029053193253103, "loss": 2.001, "step": 286090 }, { "epoch": 1.088967212989959, "grad_norm": 0.16943015158176422, "learning_rate": 0.00017022735104317704, "loss": 2.01, "step": 286100 }, { "epoch": 1.089005275458082, "grad_norm": 0.15172553062438965, "learning_rate": 0.00017016418225628278, "loss": 2.001, "step": 286110 }, { "epoch": 1.0890433379262046, "grad_norm": 0.15565767884254456, "learning_rate": 0.0001701010255648961, "loss": 2.0, "step": 286120 }, { "epoch": 1.0890814003943272, "grad_norm": 0.14762194454669952, "learning_rate": 0.00017003788096207168, "loss": 1.9912, "step": 286130 }, { "epoch": 1.0891194628624499, "grad_norm": 0.1653757095336914, "learning_rate": 0.00016997474844087057, "loss": 2.0073, "step": 286140 }, { "epoch": 1.0891575253305725, "grad_norm": 0.1787230223417282, "learning_rate": 0.00016991162799436066, "loss": 2.0119, "step": 286150 }, { "epoch": 1.0891955877986952, "grad_norm": 0.21251770853996277, "learning_rate": 0.00016984851961561643, "loss": 2.0091, "step": 286160 }, { "epoch": 1.0892336502668178, "grad_norm": 0.16268038749694824, "learning_rate": 0.00016978542329771897, "loss": 2.0079, "step": 286170 }, { "epoch": 1.0892717127349405, "grad_norm": 0.139210045337677, "learning_rate": 0.00016972233903375585, "loss": 2.0042, "step": 286180 }, { "epoch": 1.0893097752030632, "grad_norm": 0.17149564623832703, "learning_rate": 0.00016965926681682141, "loss": 2.0037, "step": 286190 }, { "epoch": 1.089347837671186, "grad_norm": 0.1662767082452774, "learning_rate": 0.0001695962066400165, "loss": 2.0081, "step": 286200 }, { "epoch": 1.0893859001393087, "grad_norm": 0.1791909635066986, "learning_rate": 0.00016953315849644864, "loss": 2.0038, "step": 286210 }, { "epoch": 1.0894239626074314, "grad_norm": 0.14849887788295746, "learning_rate": 0.00016947012237923182, "loss": 2.006, "step": 286220 }, { "epoch": 1.089462025075554, "grad_norm": 0.15307657420635223, "learning_rate": 0.0001694070982814866, "loss": 2.0029, "step": 286230 }, { "epoch": 1.0895000875436767, "grad_norm": 0.1445232778787613, "learning_rate": 0.00016934408619634011, "loss": 1.9932, "step": 286240 }, { "epoch": 1.0895381500117993, "grad_norm": 0.16825585067272186, "learning_rate": 0.00016928108611692617, "loss": 2.0032, "step": 286250 }, { "epoch": 1.089576212479922, "grad_norm": 0.1817305088043213, "learning_rate": 0.00016921809803638495, "loss": 2.0079, "step": 286260 }, { "epoch": 1.0896142749480446, "grad_norm": 0.1555592119693756, "learning_rate": 0.0001691551219478631, "loss": 1.9982, "step": 286270 }, { "epoch": 1.0896523374161675, "grad_norm": 0.14670062065124512, "learning_rate": 0.00016909215784451404, "loss": 2.0045, "step": 286280 }, { "epoch": 1.0896903998842902, "grad_norm": 0.15879133343696594, "learning_rate": 0.0001690292057194976, "loss": 2.0125, "step": 286290 }, { "epoch": 1.0897284623524128, "grad_norm": 0.1739165037870407, "learning_rate": 0.00016896626556597993, "loss": 1.9925, "step": 286300 }, { "epoch": 1.0897665248205355, "grad_norm": 0.13863325119018555, "learning_rate": 0.0001689033373771339, "loss": 2.0041, "step": 286310 }, { "epoch": 1.0898045872886581, "grad_norm": 0.1826622039079666, "learning_rate": 0.0001688404211461389, "loss": 2.0126, "step": 286320 }, { "epoch": 1.0898426497567808, "grad_norm": 0.18271639943122864, "learning_rate": 0.00016877751686618053, "loss": 1.9994, "step": 286330 }, { "epoch": 1.0898807122249035, "grad_norm": 0.14765529334545135, "learning_rate": 0.0001687146245304511, "loss": 2.0127, "step": 286340 }, { "epoch": 1.0899187746930261, "grad_norm": 0.1734556257724762, "learning_rate": 0.0001686517441321493, "loss": 2.0015, "step": 286350 }, { "epoch": 1.0899568371611488, "grad_norm": 0.14427216351032257, "learning_rate": 0.00016858887566448022, "loss": 2.0012, "step": 286360 }, { "epoch": 1.0899948996292717, "grad_norm": 0.17495569586753845, "learning_rate": 0.00016852601912065556, "loss": 2.0077, "step": 286370 }, { "epoch": 1.0900329620973943, "grad_norm": 0.14779295027256012, "learning_rate": 0.00016846317449389326, "loss": 2.0107, "step": 286380 }, { "epoch": 1.090071024565517, "grad_norm": 0.16964447498321533, "learning_rate": 0.0001684003417774178, "loss": 1.9968, "step": 286390 }, { "epoch": 1.0901090870336396, "grad_norm": 0.14409834146499634, "learning_rate": 0.00016833752096446003, "loss": 2.0048, "step": 286400 }, { "epoch": 1.0901471495017623, "grad_norm": 0.13902217149734497, "learning_rate": 0.00016827471204825724, "loss": 1.9971, "step": 286410 }, { "epoch": 1.090185211969885, "grad_norm": 0.17094755172729492, "learning_rate": 0.0001682119150220531, "loss": 2.0094, "step": 286420 }, { "epoch": 1.0902232744380076, "grad_norm": 0.164179265499115, "learning_rate": 0.00016814912987909764, "loss": 2.0153, "step": 286430 }, { "epoch": 1.0902613369061303, "grad_norm": 0.14546635746955872, "learning_rate": 0.00016808635661264744, "loss": 2.0041, "step": 286440 }, { "epoch": 1.0902993993742531, "grad_norm": 0.14570185542106628, "learning_rate": 0.00016802359521596517, "loss": 2.0091, "step": 286450 }, { "epoch": 1.0903374618423758, "grad_norm": 0.14966364204883575, "learning_rate": 0.00016796084568232016, "loss": 1.9971, "step": 286460 }, { "epoch": 1.0903755243104984, "grad_norm": 0.17986074090003967, "learning_rate": 0.00016789810800498794, "loss": 2.0117, "step": 286470 }, { "epoch": 1.090413586778621, "grad_norm": 0.18305820226669312, "learning_rate": 0.00016783538217725037, "loss": 1.9942, "step": 286480 }, { "epoch": 1.0904516492467438, "grad_norm": 0.20067545771598816, "learning_rate": 0.00016777266819239573, "loss": 2.0062, "step": 286490 }, { "epoch": 1.0904897117148664, "grad_norm": 0.1565934121608734, "learning_rate": 0.0001677099660437186, "loss": 2.0087, "step": 286500 }, { "epoch": 1.090527774182989, "grad_norm": 0.16560204327106476, "learning_rate": 0.0001676472757245198, "loss": 1.9894, "step": 286510 }, { "epoch": 1.0905658366511117, "grad_norm": 0.15744443237781525, "learning_rate": 0.00016758459722810676, "loss": 1.9977, "step": 286520 }, { "epoch": 1.0906038991192344, "grad_norm": 0.1524791717529297, "learning_rate": 0.00016752193054779286, "loss": 2.0021, "step": 286530 }, { "epoch": 1.0906419615873573, "grad_norm": 0.16802918910980225, "learning_rate": 0.00016745927567689801, "loss": 1.9982, "step": 286540 }, { "epoch": 1.09068002405548, "grad_norm": 0.16290956735610962, "learning_rate": 0.0001673966326087482, "loss": 2.0062, "step": 286550 }, { "epoch": 1.0907180865236026, "grad_norm": 0.19118371605873108, "learning_rate": 0.00016733400133667604, "loss": 1.9943, "step": 286560 }, { "epoch": 1.0907561489917252, "grad_norm": 0.17798908054828644, "learning_rate": 0.0001672713818540201, "loss": 2.007, "step": 286570 }, { "epoch": 1.090794211459848, "grad_norm": 0.16545897722244263, "learning_rate": 0.0001672087741541253, "loss": 2.0211, "step": 286580 }, { "epoch": 1.0908322739279706, "grad_norm": 0.16653016209602356, "learning_rate": 0.00016714617823034288, "loss": 2.0091, "step": 286590 }, { "epoch": 1.0908703363960932, "grad_norm": 0.1368710845708847, "learning_rate": 0.00016708359407603037, "loss": 2.0035, "step": 286600 }, { "epoch": 1.0909083988642159, "grad_norm": 0.16141341626644135, "learning_rate": 0.0001670210216845514, "loss": 2.0106, "step": 286610 }, { "epoch": 1.0909464613323387, "grad_norm": 0.13862714171409607, "learning_rate": 0.00016695846104927592, "loss": 2.0011, "step": 286620 }, { "epoch": 1.0909845238004614, "grad_norm": 0.13830986618995667, "learning_rate": 0.00016689591216358012, "loss": 2.0061, "step": 286630 }, { "epoch": 1.091022586268584, "grad_norm": 0.1664019078016281, "learning_rate": 0.0001668333750208464, "loss": 1.9973, "step": 286640 }, { "epoch": 1.0910606487367067, "grad_norm": 0.14373458921909332, "learning_rate": 0.00016677084961446326, "loss": 2.0144, "step": 286650 }, { "epoch": 1.0910987112048294, "grad_norm": 0.16323675215244293, "learning_rate": 0.00016670833593782558, "loss": 2.0114, "step": 286660 }, { "epoch": 1.091136773672952, "grad_norm": 0.167044535279274, "learning_rate": 0.0001666458339843343, "loss": 1.9995, "step": 286670 }, { "epoch": 1.0911748361410747, "grad_norm": 0.14962568879127502, "learning_rate": 0.00016658334374739664, "loss": 1.9798, "step": 286680 }, { "epoch": 1.0912128986091973, "grad_norm": 0.13388465344905853, "learning_rate": 0.00016652086522042593, "loss": 2.013, "step": 286690 }, { "epoch": 1.09125096107732, "grad_norm": 0.14110036194324493, "learning_rate": 0.0001664583983968417, "loss": 1.9943, "step": 286700 }, { "epoch": 1.0912890235454429, "grad_norm": 0.17264728248119354, "learning_rate": 0.0001663959432700695, "loss": 2.0083, "step": 286710 }, { "epoch": 1.0913270860135655, "grad_norm": 0.16424086689949036, "learning_rate": 0.00016633349983354135, "loss": 2.0085, "step": 286720 }, { "epoch": 1.0913651484816882, "grad_norm": 0.15271663665771484, "learning_rate": 0.00016627106808069514, "loss": 1.9945, "step": 286730 }, { "epoch": 1.0914032109498109, "grad_norm": 0.1648697853088379, "learning_rate": 0.00016620864800497503, "loss": 1.9921, "step": 286740 }, { "epoch": 1.0914412734179335, "grad_norm": 0.16213729977607727, "learning_rate": 0.00016614623959983115, "loss": 1.9955, "step": 286750 }, { "epoch": 1.0914793358860562, "grad_norm": 0.1640002578496933, "learning_rate": 0.00016608384285872002, "loss": 2.0016, "step": 286760 }, { "epoch": 1.0915173983541788, "grad_norm": 0.19037093222141266, "learning_rate": 0.00016602145777510396, "loss": 2.0206, "step": 286770 }, { "epoch": 1.0915554608223015, "grad_norm": 0.1593031883239746, "learning_rate": 0.00016595908434245167, "loss": 1.9916, "step": 286780 }, { "epoch": 1.0915935232904244, "grad_norm": 0.1630665510892868, "learning_rate": 0.0001658967225542377, "loss": 1.9885, "step": 286790 }, { "epoch": 1.091631585758547, "grad_norm": 0.13461852073669434, "learning_rate": 0.000165834372403943, "loss": 2.0162, "step": 286800 }, { "epoch": 1.0916696482266697, "grad_norm": 0.19513630867004395, "learning_rate": 0.00016577203388505425, "loss": 2.0226, "step": 286810 }, { "epoch": 1.0917077106947923, "grad_norm": 0.1570121794939041, "learning_rate": 0.0001657097069910644, "loss": 2.0136, "step": 286820 }, { "epoch": 1.091745773162915, "grad_norm": 0.144845649600029, "learning_rate": 0.00016564739171547255, "loss": 1.9948, "step": 286830 }, { "epoch": 1.0917838356310376, "grad_norm": 0.15379267930984497, "learning_rate": 0.00016558508805178358, "loss": 2.0069, "step": 286840 }, { "epoch": 1.0918218980991603, "grad_norm": 0.14519637823104858, "learning_rate": 0.00016552279599350871, "loss": 2.0013, "step": 286850 }, { "epoch": 1.091859960567283, "grad_norm": 0.14761601388454437, "learning_rate": 0.00016546051553416498, "loss": 1.9983, "step": 286860 }, { "epoch": 1.0918980230354056, "grad_norm": 0.15481062233448029, "learning_rate": 0.00016539824666727559, "loss": 1.9977, "step": 286870 }, { "epoch": 1.0919360855035283, "grad_norm": 0.14929449558258057, "learning_rate": 0.00016533598938636978, "loss": 2.0055, "step": 286880 }, { "epoch": 1.0919741479716512, "grad_norm": 0.16063956916332245, "learning_rate": 0.00016527374368498272, "loss": 2.006, "step": 286890 }, { "epoch": 1.0920122104397738, "grad_norm": 0.139910489320755, "learning_rate": 0.0001652115095566556, "loss": 1.9999, "step": 286900 }, { "epoch": 1.0920502729078965, "grad_norm": 0.15074720978736877, "learning_rate": 0.0001651492869949356, "loss": 2.0081, "step": 286910 }, { "epoch": 1.0920883353760191, "grad_norm": 0.14458464086055756, "learning_rate": 0.00016508707599337607, "loss": 2.0055, "step": 286920 }, { "epoch": 1.0921263978441418, "grad_norm": 0.15293492376804352, "learning_rate": 0.0001650248765455361, "loss": 2.0052, "step": 286930 }, { "epoch": 1.0921644603122644, "grad_norm": 0.1694762408733368, "learning_rate": 0.00016496268864498093, "loss": 2.0053, "step": 286940 }, { "epoch": 1.092202522780387, "grad_norm": 0.1571517437696457, "learning_rate": 0.00016490051228528168, "loss": 1.9962, "step": 286950 }, { "epoch": 1.0922405852485098, "grad_norm": 0.1510549634695053, "learning_rate": 0.00016483834746001548, "loss": 1.9841, "step": 286960 }, { "epoch": 1.0922786477166326, "grad_norm": 0.1525040715932846, "learning_rate": 0.00016477619416276534, "loss": 2.0132, "step": 286970 }, { "epoch": 1.0923167101847553, "grad_norm": 0.14453206956386566, "learning_rate": 0.0001647140523871204, "loss": 2.0037, "step": 286980 }, { "epoch": 1.092354772652878, "grad_norm": 0.19410859048366547, "learning_rate": 0.00016465192212667545, "loss": 1.9918, "step": 286990 }, { "epoch": 1.0923928351210006, "grad_norm": 0.17885205149650574, "learning_rate": 0.00016458980337503154, "loss": 2.0214, "step": 287000 }, { "epoch": 1.0924308975891233, "grad_norm": 0.14350204169750214, "learning_rate": 0.00016452769612579544, "loss": 2.0075, "step": 287010 }, { "epoch": 1.092468960057246, "grad_norm": 0.15950074791908264, "learning_rate": 0.00016446560037257978, "loss": 2.004, "step": 287020 }, { "epoch": 1.0925070225253686, "grad_norm": 0.17068053781986237, "learning_rate": 0.00016440351610900333, "loss": 1.9916, "step": 287030 }, { "epoch": 1.0925450849934912, "grad_norm": 0.15856210887432098, "learning_rate": 0.00016434144332869055, "loss": 2.0099, "step": 287040 }, { "epoch": 1.092583147461614, "grad_norm": 0.15143512189388275, "learning_rate": 0.00016427938202527193, "loss": 2.0088, "step": 287050 }, { "epoch": 1.0926212099297368, "grad_norm": 0.14432553946971893, "learning_rate": 0.0001642173321923837, "loss": 1.9955, "step": 287060 }, { "epoch": 1.0926592723978594, "grad_norm": 0.19511444866657257, "learning_rate": 0.00016415529382366817, "loss": 2.0103, "step": 287070 }, { "epoch": 1.092697334865982, "grad_norm": 0.16971547901630402, "learning_rate": 0.00016409326691277338, "loss": 2.0093, "step": 287080 }, { "epoch": 1.0927353973341047, "grad_norm": 0.16324810683727264, "learning_rate": 0.00016403125145335318, "loss": 2.0059, "step": 287090 }, { "epoch": 1.0927734598022274, "grad_norm": 0.1708240658044815, "learning_rate": 0.0001639692474390675, "loss": 1.9884, "step": 287100 }, { "epoch": 1.09281152227035, "grad_norm": 0.16044610738754272, "learning_rate": 0.00016390725486358187, "loss": 1.9985, "step": 287110 }, { "epoch": 1.0928495847384727, "grad_norm": 0.18719172477722168, "learning_rate": 0.00016384527372056778, "loss": 1.995, "step": 287120 }, { "epoch": 1.0928876472065954, "grad_norm": 0.15649785101413727, "learning_rate": 0.0001637833040037026, "loss": 1.9986, "step": 287130 }, { "epoch": 1.0929257096747182, "grad_norm": 0.15070591866970062, "learning_rate": 0.00016372134570666945, "loss": 2.0029, "step": 287140 }, { "epoch": 1.092963772142841, "grad_norm": 0.15930724143981934, "learning_rate": 0.00016365939882315722, "loss": 2.0, "step": 287150 }, { "epoch": 1.0930018346109636, "grad_norm": 0.1533130407333374, "learning_rate": 0.00016359746334686082, "loss": 1.9915, "step": 287160 }, { "epoch": 1.0930398970790862, "grad_norm": 0.15068431198596954, "learning_rate": 0.0001635355392714807, "loss": 2.0032, "step": 287170 }, { "epoch": 1.0930779595472089, "grad_norm": 0.1491413116455078, "learning_rate": 0.00016347362659072318, "loss": 2.0149, "step": 287180 }, { "epoch": 1.0931160220153315, "grad_norm": 0.1355583220720291, "learning_rate": 0.00016341172529830056, "loss": 2.0016, "step": 287190 }, { "epoch": 1.0931540844834542, "grad_norm": 0.1516856998205185, "learning_rate": 0.00016334983538793076, "loss": 2.0003, "step": 287200 }, { "epoch": 1.0931921469515768, "grad_norm": 0.1841883808374405, "learning_rate": 0.00016328795685333736, "loss": 2.0038, "step": 287210 }, { "epoch": 1.0932302094196995, "grad_norm": 0.15536165237426758, "learning_rate": 0.00016322608968824998, "loss": 2.0222, "step": 287220 }, { "epoch": 1.0932682718878224, "grad_norm": 0.17003241181373596, "learning_rate": 0.00016316423388640372, "loss": 2.0192, "step": 287230 }, { "epoch": 1.093306334355945, "grad_norm": 0.15207625925540924, "learning_rate": 0.00016310238944153966, "loss": 1.9915, "step": 287240 }, { "epoch": 1.0933443968240677, "grad_norm": 0.18474330008029938, "learning_rate": 0.0001630405563474045, "loss": 1.9994, "step": 287250 }, { "epoch": 1.0933824592921904, "grad_norm": 0.17792512476444244, "learning_rate": 0.00016297873459775075, "loss": 1.9965, "step": 287260 }, { "epoch": 1.093420521760313, "grad_norm": 0.14206421375274658, "learning_rate": 0.00016291692418633652, "loss": 1.9997, "step": 287270 }, { "epoch": 1.0934585842284357, "grad_norm": 0.15516752004623413, "learning_rate": 0.0001628551251069258, "loss": 1.9974, "step": 287280 }, { "epoch": 1.0934966466965583, "grad_norm": 0.1504223644733429, "learning_rate": 0.00016279333735328815, "loss": 2.0051, "step": 287290 }, { "epoch": 1.093534709164681, "grad_norm": 0.18267503380775452, "learning_rate": 0.00016273156091919894, "loss": 2.0162, "step": 287300 }, { "epoch": 1.0935727716328039, "grad_norm": 0.14748525619506836, "learning_rate": 0.00016266979579843927, "loss": 2.0015, "step": 287310 }, { "epoch": 1.0936108341009265, "grad_norm": 0.19388824701309204, "learning_rate": 0.00016260804198479578, "loss": 1.9957, "step": 287320 }, { "epoch": 1.0936488965690492, "grad_norm": 0.15450628101825714, "learning_rate": 0.0001625462994720609, "loss": 2.0021, "step": 287330 }, { "epoch": 1.0936869590371718, "grad_norm": 0.14921757578849792, "learning_rate": 0.0001624845682540328, "loss": 1.9967, "step": 287340 }, { "epoch": 1.0937250215052945, "grad_norm": 0.13192152976989746, "learning_rate": 0.00016242284832451508, "loss": 2.023, "step": 287350 }, { "epoch": 1.0937630839734171, "grad_norm": 0.16562673449516296, "learning_rate": 0.00016236113967731737, "loss": 2.0048, "step": 287360 }, { "epoch": 1.0938011464415398, "grad_norm": 0.14732389152050018, "learning_rate": 0.0001622994423062546, "loss": 1.9913, "step": 287370 }, { "epoch": 1.0938392089096625, "grad_norm": 0.16607753932476044, "learning_rate": 0.00016223775620514757, "loss": 2.0023, "step": 287380 }, { "epoch": 1.0938772713777851, "grad_norm": 0.15482759475708008, "learning_rate": 0.00016217608136782265, "loss": 1.9949, "step": 287390 }, { "epoch": 1.093915333845908, "grad_norm": 0.18163645267486572, "learning_rate": 0.00016211441778811182, "loss": 1.9959, "step": 287400 }, { "epoch": 1.0939533963140307, "grad_norm": 0.16344821453094482, "learning_rate": 0.00016205276545985272, "loss": 2.0087, "step": 287410 }, { "epoch": 1.0939914587821533, "grad_norm": 0.16161710023880005, "learning_rate": 0.00016199112437688856, "loss": 1.9912, "step": 287420 }, { "epoch": 1.094029521250276, "grad_norm": 0.16121038794517517, "learning_rate": 0.00016192949453306833, "loss": 1.9999, "step": 287430 }, { "epoch": 1.0940675837183986, "grad_norm": 0.16078747808933258, "learning_rate": 0.00016186787592224644, "loss": 2.0047, "step": 287440 }, { "epoch": 1.0941056461865213, "grad_norm": 0.1562453657388687, "learning_rate": 0.0001618062685382829, "loss": 2.0022, "step": 287450 }, { "epoch": 1.094143708654644, "grad_norm": 0.1912255585193634, "learning_rate": 0.0001617446723750435, "loss": 2.0044, "step": 287460 }, { "epoch": 1.0941817711227666, "grad_norm": 0.1432357132434845, "learning_rate": 0.00016168308742639938, "loss": 1.9939, "step": 287470 }, { "epoch": 1.0942198335908895, "grad_norm": 0.16316208243370056, "learning_rate": 0.0001616215136862274, "loss": 2.0033, "step": 287480 }, { "epoch": 1.0942578960590121, "grad_norm": 0.21677039563655853, "learning_rate": 0.00016155995114840993, "loss": 2.0013, "step": 287490 }, { "epoch": 1.0942959585271348, "grad_norm": 0.15556517243385315, "learning_rate": 0.000161498399806835, "loss": 2.0027, "step": 287500 }, { "epoch": 1.0943340209952575, "grad_norm": 0.17532147467136383, "learning_rate": 0.000161436859655396, "loss": 1.9996, "step": 287510 }, { "epoch": 1.09437208346338, "grad_norm": 0.20894719660282135, "learning_rate": 0.00016137533068799216, "loss": 2.0244, "step": 287520 }, { "epoch": 1.0944101459315028, "grad_norm": 0.15300019085407257, "learning_rate": 0.00016131381289852798, "loss": 2.0154, "step": 287530 }, { "epoch": 1.0944482083996254, "grad_norm": 0.15588730573654175, "learning_rate": 0.00016125230628091352, "loss": 2.0095, "step": 287540 }, { "epoch": 1.094486270867748, "grad_norm": 0.14091601967811584, "learning_rate": 0.0001611908108290646, "loss": 1.9952, "step": 287550 }, { "epoch": 1.0945243333358707, "grad_norm": 0.1498078852891922, "learning_rate": 0.00016112932653690237, "loss": 1.9998, "step": 287560 }, { "epoch": 1.0945623958039936, "grad_norm": 0.15718935430049896, "learning_rate": 0.0001610678533983535, "loss": 2.0035, "step": 287570 }, { "epoch": 1.0946004582721163, "grad_norm": 0.14311803877353668, "learning_rate": 0.00016100639140735019, "loss": 2.0122, "step": 287580 }, { "epoch": 1.094638520740239, "grad_norm": 0.17664818465709686, "learning_rate": 0.00016094494055783016, "loss": 2.0004, "step": 287590 }, { "epoch": 1.0946765832083616, "grad_norm": 0.17590229213237762, "learning_rate": 0.00016088350084373659, "loss": 2.0063, "step": 287600 }, { "epoch": 1.0947146456764842, "grad_norm": 0.20563502609729767, "learning_rate": 0.00016082207225901824, "loss": 1.9919, "step": 287610 }, { "epoch": 1.094752708144607, "grad_norm": 0.15983684360980988, "learning_rate": 0.00016076065479762915, "loss": 2.0112, "step": 287620 }, { "epoch": 1.0947907706127296, "grad_norm": 0.15316280722618103, "learning_rate": 0.00016069924845352906, "loss": 2.0041, "step": 287630 }, { "epoch": 1.0948288330808522, "grad_norm": 0.1465786248445511, "learning_rate": 0.00016063785322068303, "loss": 2.0049, "step": 287640 }, { "epoch": 1.094866895548975, "grad_norm": 0.20526903867721558, "learning_rate": 0.00016057646909306168, "loss": 1.9967, "step": 287650 }, { "epoch": 1.0949049580170978, "grad_norm": 0.15140342712402344, "learning_rate": 0.00016051509606464088, "loss": 2.0155, "step": 287660 }, { "epoch": 1.0949430204852204, "grad_norm": 0.13655777275562286, "learning_rate": 0.00016045373412940222, "loss": 2.0007, "step": 287670 }, { "epoch": 1.094981082953343, "grad_norm": 0.14832167327404022, "learning_rate": 0.00016039238328133254, "loss": 2.0155, "step": 287680 }, { "epoch": 1.0950191454214657, "grad_norm": 0.1434812843799591, "learning_rate": 0.00016033104351442418, "loss": 2.0123, "step": 287690 }, { "epoch": 1.0950572078895884, "grad_norm": 0.14046348631381989, "learning_rate": 0.0001602697148226749, "loss": 2.0025, "step": 287700 }, { "epoch": 1.095095270357711, "grad_norm": 0.14863237738609314, "learning_rate": 0.0001602083972000878, "loss": 2.0069, "step": 287710 }, { "epoch": 1.0951333328258337, "grad_norm": 0.15894490480422974, "learning_rate": 0.0001601470906406714, "loss": 2.0039, "step": 287720 }, { "epoch": 1.0951713952939564, "grad_norm": 0.16327515244483948, "learning_rate": 0.00016008579513843984, "loss": 1.9813, "step": 287730 }, { "epoch": 1.095209457762079, "grad_norm": 0.1889437884092331, "learning_rate": 0.00016002451068741248, "loss": 2.0002, "step": 287740 }, { "epoch": 1.0952475202302019, "grad_norm": 0.17731782793998718, "learning_rate": 0.00015996323728161393, "loss": 2.0006, "step": 287750 }, { "epoch": 1.0952855826983245, "grad_norm": 0.19618789851665497, "learning_rate": 0.00015990197491507442, "loss": 1.9982, "step": 287760 }, { "epoch": 1.0953236451664472, "grad_norm": 0.1554926037788391, "learning_rate": 0.0001598407235818295, "loss": 1.9862, "step": 287770 }, { "epoch": 1.0953617076345699, "grad_norm": 0.16439561545848846, "learning_rate": 0.00015977948327592002, "loss": 2.0052, "step": 287780 }, { "epoch": 1.0953997701026925, "grad_norm": 0.15064625442028046, "learning_rate": 0.00015971825399139223, "loss": 2.0139, "step": 287790 }, { "epoch": 1.0954378325708152, "grad_norm": 0.16435232758522034, "learning_rate": 0.00015965703572229774, "loss": 2.019, "step": 287800 }, { "epoch": 1.0954758950389378, "grad_norm": 0.13841593265533447, "learning_rate": 0.0001595958284626935, "loss": 1.9997, "step": 287810 }, { "epoch": 1.0955139575070605, "grad_norm": 0.16646708548069, "learning_rate": 0.00015953463220664182, "loss": 2.005, "step": 287820 }, { "epoch": 1.0955520199751834, "grad_norm": 0.2216751128435135, "learning_rate": 0.0001594734469482103, "loss": 2.0131, "step": 287830 }, { "epoch": 1.095590082443306, "grad_norm": 0.19386476278305054, "learning_rate": 0.00015941227268147196, "loss": 2.0042, "step": 287840 }, { "epoch": 1.0956281449114287, "grad_norm": 0.13800813257694244, "learning_rate": 0.00015935110940050508, "loss": 2.0069, "step": 287850 }, { "epoch": 1.0956662073795513, "grad_norm": 0.14591440558433533, "learning_rate": 0.00015928995709939321, "loss": 2.0032, "step": 287860 }, { "epoch": 1.095704269847674, "grad_norm": 0.24522840976715088, "learning_rate": 0.00015922881577222524, "loss": 2.0018, "step": 287870 }, { "epoch": 1.0957423323157967, "grad_norm": 0.14610832929611206, "learning_rate": 0.00015916768541309546, "loss": 2.0102, "step": 287880 }, { "epoch": 1.0957803947839193, "grad_norm": 0.15744467079639435, "learning_rate": 0.0001591065660161033, "loss": 2.0041, "step": 287890 }, { "epoch": 1.095818457252042, "grad_norm": 0.14038099348545074, "learning_rate": 0.0001590454575753536, "loss": 2.0019, "step": 287900 }, { "epoch": 1.0958565197201646, "grad_norm": 0.16151206195354462, "learning_rate": 0.00015898436008495643, "loss": 2.0176, "step": 287910 }, { "epoch": 1.0958945821882875, "grad_norm": 0.17450959980487823, "learning_rate": 0.0001589232735390271, "loss": 2.0055, "step": 287920 }, { "epoch": 1.0959326446564102, "grad_norm": 0.14196312427520752, "learning_rate": 0.00015886219793168627, "loss": 2.0046, "step": 287930 }, { "epoch": 1.0959707071245328, "grad_norm": 0.19493959844112396, "learning_rate": 0.0001588011332570598, "loss": 1.9931, "step": 287940 }, { "epoch": 1.0960087695926555, "grad_norm": 0.1691603660583496, "learning_rate": 0.00015874007950927881, "loss": 2.0087, "step": 287950 }, { "epoch": 1.0960468320607781, "grad_norm": 0.1751086562871933, "learning_rate": 0.00015867903668247973, "loss": 1.9943, "step": 287960 }, { "epoch": 1.0960848945289008, "grad_norm": 0.18380074203014374, "learning_rate": 0.00015861800477080419, "loss": 2.0019, "step": 287970 }, { "epoch": 1.0961229569970234, "grad_norm": 0.1523730605840683, "learning_rate": 0.000158556983768399, "loss": 1.9901, "step": 287980 }, { "epoch": 1.096161019465146, "grad_norm": 0.18034271895885468, "learning_rate": 0.0001584959736694163, "loss": 1.9968, "step": 287990 }, { "epoch": 1.096199081933269, "grad_norm": 0.15186168253421783, "learning_rate": 0.0001584349744680134, "loss": 2.0008, "step": 288000 }, { "epoch": 1.0962371444013916, "grad_norm": 0.16246947646141052, "learning_rate": 0.0001583739861583528, "loss": 1.9983, "step": 288010 }, { "epoch": 1.0962752068695143, "grad_norm": 0.14793811738491058, "learning_rate": 0.00015831300873460225, "loss": 2.0115, "step": 288020 }, { "epoch": 1.096313269337637, "grad_norm": 0.15794621407985687, "learning_rate": 0.0001582520421909347, "loss": 1.9923, "step": 288030 }, { "epoch": 1.0963513318057596, "grad_norm": 0.15211062133312225, "learning_rate": 0.00015819108652152835, "loss": 2.015, "step": 288040 }, { "epoch": 1.0963893942738823, "grad_norm": 0.15101411938667297, "learning_rate": 0.00015813014172056639, "loss": 1.9842, "step": 288050 }, { "epoch": 1.096427456742005, "grad_norm": 0.1630418449640274, "learning_rate": 0.00015806920778223748, "loss": 1.9971, "step": 288060 }, { "epoch": 1.0964655192101276, "grad_norm": 0.1481507569551468, "learning_rate": 0.0001580082847007353, "loss": 1.9978, "step": 288070 }, { "epoch": 1.0965035816782502, "grad_norm": 0.1584104299545288, "learning_rate": 0.0001579473724702586, "loss": 1.9828, "step": 288080 }, { "epoch": 1.0965416441463731, "grad_norm": 0.16793425381183624, "learning_rate": 0.00015788647108501153, "loss": 2.0079, "step": 288090 }, { "epoch": 1.0965797066144958, "grad_norm": 0.15810927748680115, "learning_rate": 0.00015782558053920322, "loss": 2.0104, "step": 288100 }, { "epoch": 1.0966177690826184, "grad_norm": 0.15413767099380493, "learning_rate": 0.00015776470082704791, "loss": 2.0074, "step": 288110 }, { "epoch": 1.096655831550741, "grad_norm": 0.14198710024356842, "learning_rate": 0.00015770383194276528, "loss": 1.9906, "step": 288120 }, { "epoch": 1.0966938940188637, "grad_norm": 0.17314308881759644, "learning_rate": 0.00015764297388057985, "loss": 1.9965, "step": 288130 }, { "epoch": 1.0967319564869864, "grad_norm": 0.21093778312206268, "learning_rate": 0.00015758212663472137, "loss": 2.0053, "step": 288140 }, { "epoch": 1.096770018955109, "grad_norm": 0.16736042499542236, "learning_rate": 0.00015752129019942467, "loss": 2.0135, "step": 288150 }, { "epoch": 1.0968080814232317, "grad_norm": 0.15092270076274872, "learning_rate": 0.00015746046456892988, "loss": 1.9901, "step": 288160 }, { "epoch": 1.0968461438913546, "grad_norm": 0.14054399728775024, "learning_rate": 0.00015739964973748207, "loss": 1.9955, "step": 288170 }, { "epoch": 1.0968842063594773, "grad_norm": 0.1438574343919754, "learning_rate": 0.00015733884569933131, "loss": 2.0008, "step": 288180 }, { "epoch": 1.0969222688276, "grad_norm": 0.15705853700637817, "learning_rate": 0.0001572780524487331, "loss": 2.0009, "step": 288190 }, { "epoch": 1.0969603312957226, "grad_norm": 0.15003721415996552, "learning_rate": 0.0001572172699799478, "loss": 1.9926, "step": 288200 }, { "epoch": 1.0969983937638452, "grad_norm": 0.2214089035987854, "learning_rate": 0.00015715649828724088, "loss": 1.9999, "step": 288210 }, { "epoch": 1.0970364562319679, "grad_norm": 0.20071560144424438, "learning_rate": 0.00015709573736488296, "loss": 1.9994, "step": 288220 }, { "epoch": 1.0970745187000905, "grad_norm": 0.17797204852104187, "learning_rate": 0.00015703498720714965, "loss": 1.9929, "step": 288230 }, { "epoch": 1.0971125811682132, "grad_norm": 0.1843406856060028, "learning_rate": 0.00015697424780832176, "loss": 1.9963, "step": 288240 }, { "epoch": 1.0971506436363359, "grad_norm": 0.1717744767665863, "learning_rate": 0.000156913519162685, "loss": 2.007, "step": 288250 }, { "epoch": 1.0971887061044587, "grad_norm": 0.15767750144004822, "learning_rate": 0.00015685280126453023, "loss": 2.0069, "step": 288260 }, { "epoch": 1.0972267685725814, "grad_norm": 0.23259855806827545, "learning_rate": 0.00015679209410815337, "loss": 2.0009, "step": 288270 }, { "epoch": 1.097264831040704, "grad_norm": 0.1731208860874176, "learning_rate": 0.00015673139768785532, "loss": 1.9992, "step": 288280 }, { "epoch": 1.0973028935088267, "grad_norm": 0.16261719167232513, "learning_rate": 0.00015667071199794213, "loss": 1.9964, "step": 288290 }, { "epoch": 1.0973409559769494, "grad_norm": 0.147927924990654, "learning_rate": 0.00015661003703272474, "loss": 2.0035, "step": 288300 }, { "epoch": 1.097379018445072, "grad_norm": 0.21638403832912445, "learning_rate": 0.00015654937278651926, "loss": 2.006, "step": 288310 }, { "epoch": 1.0974170809131947, "grad_norm": 0.1866307407617569, "learning_rate": 0.00015648871925364667, "loss": 1.9907, "step": 288320 }, { "epoch": 1.0974551433813173, "grad_norm": 0.16943003237247467, "learning_rate": 0.000156428076428433, "loss": 1.996, "step": 288330 }, { "epoch": 1.0974932058494402, "grad_norm": 0.1697445660829544, "learning_rate": 0.00015636744430520943, "loss": 1.9813, "step": 288340 }, { "epoch": 1.0975312683175629, "grad_norm": 0.13972648978233337, "learning_rate": 0.000156306822878312, "loss": 1.9917, "step": 288350 }, { "epoch": 1.0975693307856855, "grad_norm": 0.15015487372875214, "learning_rate": 0.00015624621214208173, "loss": 1.9955, "step": 288360 }, { "epoch": 1.0976073932538082, "grad_norm": 0.14674119651317596, "learning_rate": 0.00015618561209086474, "loss": 2.0034, "step": 288370 }, { "epoch": 1.0976454557219308, "grad_norm": 0.18153975903987885, "learning_rate": 0.00015612502271901202, "loss": 1.9931, "step": 288380 }, { "epoch": 1.0976835181900535, "grad_norm": 0.16578781604766846, "learning_rate": 0.0001560644440208796, "loss": 1.998, "step": 288390 }, { "epoch": 1.0977215806581762, "grad_norm": 0.1398111879825592, "learning_rate": 0.00015600387599082845, "loss": 2.0043, "step": 288400 }, { "epoch": 1.0977596431262988, "grad_norm": 0.15062817931175232, "learning_rate": 0.00015594331862322453, "loss": 1.9935, "step": 288410 }, { "epoch": 1.0977977055944215, "grad_norm": 0.1535642296075821, "learning_rate": 0.0001558827719124387, "loss": 1.9983, "step": 288420 }, { "epoch": 1.0978357680625443, "grad_norm": 0.13218477368354797, "learning_rate": 0.00015582223585284695, "loss": 1.9896, "step": 288430 }, { "epoch": 1.097873830530667, "grad_norm": 0.1496010273694992, "learning_rate": 0.00015576171043882992, "loss": 1.9917, "step": 288440 }, { "epoch": 1.0979118929987897, "grad_norm": 0.160117968916893, "learning_rate": 0.00015570119566477337, "loss": 1.9945, "step": 288450 }, { "epoch": 1.0979499554669123, "grad_norm": 0.15955455601215363, "learning_rate": 0.00015564069152506805, "loss": 1.9808, "step": 288460 }, { "epoch": 1.097988017935035, "grad_norm": 0.152132049202919, "learning_rate": 0.00015558019801410956, "loss": 2.0021, "step": 288470 }, { "epoch": 1.0980260804031576, "grad_norm": 0.15424053370952606, "learning_rate": 0.0001555197151262983, "loss": 2.0095, "step": 288480 }, { "epoch": 1.0980641428712803, "grad_norm": 0.15129613876342773, "learning_rate": 0.00015545924285603985, "loss": 2.0025, "step": 288490 }, { "epoch": 1.098102205339403, "grad_norm": 0.1594102382659912, "learning_rate": 0.00015539878119774446, "loss": 2.0106, "step": 288500 }, { "epoch": 1.0981402678075258, "grad_norm": 0.21141329407691956, "learning_rate": 0.00015533833014582738, "loss": 1.9938, "step": 288510 }, { "epoch": 1.0981783302756485, "grad_norm": 0.14478717744350433, "learning_rate": 0.00015527788969470885, "loss": 2.0059, "step": 288520 }, { "epoch": 1.0982163927437711, "grad_norm": 0.19828860461711884, "learning_rate": 0.0001552174598388138, "loss": 1.9837, "step": 288530 }, { "epoch": 1.0982544552118938, "grad_norm": 0.16630606353282928, "learning_rate": 0.00015515704057257214, "loss": 2.0055, "step": 288540 }, { "epoch": 1.0982925176800165, "grad_norm": 0.1673320233821869, "learning_rate": 0.00015509663189041873, "loss": 1.999, "step": 288550 }, { "epoch": 1.0983305801481391, "grad_norm": 0.13971549272537231, "learning_rate": 0.00015503623378679323, "loss": 2.0113, "step": 288560 }, { "epoch": 1.0983686426162618, "grad_norm": 0.14060862362384796, "learning_rate": 0.00015497584625614007, "loss": 1.9897, "step": 288570 }, { "epoch": 1.0984067050843844, "grad_norm": 0.22311097383499146, "learning_rate": 0.00015491546929290883, "loss": 1.9926, "step": 288580 }, { "epoch": 1.098444767552507, "grad_norm": 0.1624974012374878, "learning_rate": 0.0001548551028915537, "loss": 2.0086, "step": 288590 }, { "epoch": 1.0984828300206297, "grad_norm": 0.1577761322259903, "learning_rate": 0.0001547947470465337, "loss": 2.0068, "step": 288600 }, { "epoch": 1.0985208924887526, "grad_norm": 0.14535222947597504, "learning_rate": 0.0001547344017523128, "loss": 2.0043, "step": 288610 }, { "epoch": 1.0985589549568753, "grad_norm": 0.15469293296337128, "learning_rate": 0.00015467406700335985, "loss": 1.9993, "step": 288620 }, { "epoch": 1.098597017424998, "grad_norm": 0.18461138010025024, "learning_rate": 0.0001546137427941484, "loss": 1.9892, "step": 288630 }, { "epoch": 1.0986350798931206, "grad_norm": 0.15238602459430695, "learning_rate": 0.00015455342911915694, "loss": 2.0026, "step": 288640 }, { "epoch": 1.0986731423612432, "grad_norm": 0.15034642815589905, "learning_rate": 0.00015449312597286868, "loss": 2.0044, "step": 288650 }, { "epoch": 1.098711204829366, "grad_norm": 0.16094571352005005, "learning_rate": 0.0001544328333497717, "loss": 2.0095, "step": 288660 }, { "epoch": 1.0987492672974886, "grad_norm": 0.15386877954006195, "learning_rate": 0.0001543725512443589, "loss": 1.9989, "step": 288670 }, { "epoch": 1.0987873297656112, "grad_norm": 0.14561620354652405, "learning_rate": 0.00015431227965112792, "loss": 1.9938, "step": 288680 }, { "epoch": 1.098825392233734, "grad_norm": 0.15083807706832886, "learning_rate": 0.00015425201856458125, "loss": 1.9973, "step": 288690 }, { "epoch": 1.0988634547018568, "grad_norm": 0.148808091878891, "learning_rate": 0.0001541917679792262, "loss": 1.9817, "step": 288700 }, { "epoch": 1.0989015171699794, "grad_norm": 0.18542420864105225, "learning_rate": 0.0001541315278895748, "loss": 1.9831, "step": 288710 }, { "epoch": 1.098939579638102, "grad_norm": 0.20795604586601257, "learning_rate": 0.00015407129829014382, "loss": 1.9894, "step": 288720 }, { "epoch": 1.0989776421062247, "grad_norm": 0.1624477505683899, "learning_rate": 0.00015401107917545492, "loss": 2.024, "step": 288730 }, { "epoch": 1.0990157045743474, "grad_norm": 0.1427524834871292, "learning_rate": 0.00015395087054003448, "loss": 2.0004, "step": 288740 }, { "epoch": 1.09905376704247, "grad_norm": 0.16401228308677673, "learning_rate": 0.00015389067237841353, "loss": 2.0047, "step": 288750 }, { "epoch": 1.0990918295105927, "grad_norm": 0.18014630675315857, "learning_rate": 0.00015383048468512812, "loss": 2.0046, "step": 288760 }, { "epoch": 1.0991298919787154, "grad_norm": 0.22702322900295258, "learning_rate": 0.00015377030745471876, "loss": 1.998, "step": 288770 }, { "epoch": 1.0991679544468382, "grad_norm": 0.17550452053546906, "learning_rate": 0.00015371014068173077, "loss": 1.9982, "step": 288780 }, { "epoch": 1.099206016914961, "grad_norm": 0.19119413197040558, "learning_rate": 0.00015364998436071447, "loss": 1.997, "step": 288790 }, { "epoch": 1.0992440793830835, "grad_norm": 0.21022382378578186, "learning_rate": 0.00015358983848622455, "loss": 1.9926, "step": 288800 }, { "epoch": 1.0992821418512062, "grad_norm": 0.14497125148773193, "learning_rate": 0.0001535297030528206, "loss": 1.9891, "step": 288810 }, { "epoch": 1.0993202043193289, "grad_norm": 0.14831840991973877, "learning_rate": 0.00015346957805506694, "loss": 2.0051, "step": 288820 }, { "epoch": 1.0993582667874515, "grad_norm": 0.14361897110939026, "learning_rate": 0.0001534094634875326, "loss": 1.993, "step": 288830 }, { "epoch": 1.0993963292555742, "grad_norm": 0.1500166654586792, "learning_rate": 0.00015334935934479123, "loss": 2.0049, "step": 288840 }, { "epoch": 1.0994343917236968, "grad_norm": 0.17549310624599457, "learning_rate": 0.00015328926562142132, "loss": 1.9883, "step": 288850 }, { "epoch": 1.0994724541918197, "grad_norm": 0.15605853497982025, "learning_rate": 0.00015322918231200594, "loss": 2.0059, "step": 288860 }, { "epoch": 1.0995105166599424, "grad_norm": 0.13666051626205444, "learning_rate": 0.00015316910941113294, "loss": 1.9986, "step": 288870 }, { "epoch": 1.099548579128065, "grad_norm": 0.14910241961479187, "learning_rate": 0.0001531090469133948, "loss": 2.0049, "step": 288880 }, { "epoch": 1.0995866415961877, "grad_norm": 0.15947653353214264, "learning_rate": 0.00015304899481338868, "loss": 1.9857, "step": 288890 }, { "epoch": 1.0996247040643103, "grad_norm": 0.1871788054704666, "learning_rate": 0.00015298895310571643, "loss": 1.9886, "step": 288900 }, { "epoch": 1.099662766532433, "grad_norm": 0.18131105601787567, "learning_rate": 0.00015292892178498464, "loss": 1.9964, "step": 288910 }, { "epoch": 1.0997008290005557, "grad_norm": 0.2423751950263977, "learning_rate": 0.00015286890084580435, "loss": 2.0039, "step": 288920 }, { "epoch": 1.0997388914686783, "grad_norm": 0.15106576681137085, "learning_rate": 0.00015280889028279154, "loss": 2.0011, "step": 288930 }, { "epoch": 1.099776953936801, "grad_norm": 0.17737062275409698, "learning_rate": 0.0001527488900905667, "loss": 1.9977, "step": 288940 }, { "epoch": 1.0998150164049239, "grad_norm": 0.1560988426208496, "learning_rate": 0.00015268890026375487, "loss": 1.9988, "step": 288950 }, { "epoch": 1.0998530788730465, "grad_norm": 0.15913379192352295, "learning_rate": 0.00015262892079698592, "loss": 2.0013, "step": 288960 }, { "epoch": 1.0998911413411692, "grad_norm": 0.1441878080368042, "learning_rate": 0.00015256895168489425, "loss": 2.0045, "step": 288970 }, { "epoch": 1.0999292038092918, "grad_norm": 0.1639913022518158, "learning_rate": 0.00015250899292211894, "loss": 2.0125, "step": 288980 }, { "epoch": 1.0999672662774145, "grad_norm": 0.1896839141845703, "learning_rate": 0.00015244904450330355, "loss": 1.9886, "step": 288990 }, { "epoch": 1.1000053287455371, "grad_norm": 0.20792101323604584, "learning_rate": 0.0001523891064230965, "loss": 2.0052, "step": 289000 }, { "epoch": 1.1000433912136598, "grad_norm": 0.22266975045204163, "learning_rate": 0.00015232917867615066, "loss": 1.9991, "step": 289010 }, { "epoch": 1.1000814536817825, "grad_norm": 0.19801975786685944, "learning_rate": 0.00015226926125712343, "loss": 2.0012, "step": 289020 }, { "epoch": 1.1001195161499053, "grad_norm": 0.20056535303592682, "learning_rate": 0.0001522093541606771, "loss": 2.0076, "step": 289030 }, { "epoch": 1.100157578618028, "grad_norm": 0.17718303203582764, "learning_rate": 0.00015214945738147828, "loss": 2.0151, "step": 289040 }, { "epoch": 1.1001956410861506, "grad_norm": 0.15518245100975037, "learning_rate": 0.0001520895709141983, "loss": 2.0094, "step": 289050 }, { "epoch": 1.1002337035542733, "grad_norm": 0.16333739459514618, "learning_rate": 0.00015202969475351302, "loss": 1.9887, "step": 289060 }, { "epoch": 1.100271766022396, "grad_norm": 0.1541435420513153, "learning_rate": 0.00015196982889410294, "loss": 1.9986, "step": 289070 }, { "epoch": 1.1003098284905186, "grad_norm": 0.15141764283180237, "learning_rate": 0.00015190997333065304, "loss": 1.9973, "step": 289080 }, { "epoch": 1.1003478909586413, "grad_norm": 0.1551426500082016, "learning_rate": 0.00015185012805785304, "loss": 2.0003, "step": 289090 }, { "epoch": 1.100385953426764, "grad_norm": 0.15018625557422638, "learning_rate": 0.00015179029307039703, "loss": 1.9999, "step": 289100 }, { "epoch": 1.1004240158948866, "grad_norm": 0.16040506958961487, "learning_rate": 0.00015173046836298377, "loss": 1.9833, "step": 289110 }, { "epoch": 1.1004620783630095, "grad_norm": 0.14544560015201569, "learning_rate": 0.0001516706539303165, "loss": 1.9885, "step": 289120 }, { "epoch": 1.1005001408311321, "grad_norm": 0.15642940998077393, "learning_rate": 0.00015161084976710309, "loss": 2.0103, "step": 289130 }, { "epoch": 1.1005382032992548, "grad_norm": 0.14502054452896118, "learning_rate": 0.00015155105586805596, "loss": 1.9831, "step": 289140 }, { "epoch": 1.1005762657673774, "grad_norm": 0.14879922568798065, "learning_rate": 0.00015149127222789194, "loss": 1.9798, "step": 289150 }, { "epoch": 1.1006143282355, "grad_norm": 0.15120501816272736, "learning_rate": 0.0001514314988413325, "loss": 2.0017, "step": 289160 }, { "epoch": 1.1006523907036228, "grad_norm": 0.15092013776302338, "learning_rate": 0.00015137173570310358, "loss": 1.9922, "step": 289170 }, { "epoch": 1.1006904531717454, "grad_norm": 0.18918323516845703, "learning_rate": 0.00015131198280793568, "loss": 1.989, "step": 289180 }, { "epoch": 1.100728515639868, "grad_norm": 0.15814675390720367, "learning_rate": 0.00015125224015056382, "loss": 1.9879, "step": 289190 }, { "epoch": 1.100766578107991, "grad_norm": 0.19635015726089478, "learning_rate": 0.0001511925077257275, "loss": 1.9881, "step": 289200 }, { "epoch": 1.1008046405761136, "grad_norm": 0.22311274707317352, "learning_rate": 0.00015113278552817072, "loss": 2.003, "step": 289210 }, { "epoch": 1.1008427030442363, "grad_norm": 0.1627051681280136, "learning_rate": 0.000151073073552642, "loss": 1.9937, "step": 289220 }, { "epoch": 1.100880765512359, "grad_norm": 0.1458701491355896, "learning_rate": 0.00015101337179389428, "loss": 2.0009, "step": 289230 }, { "epoch": 1.1009188279804816, "grad_norm": 0.24090033769607544, "learning_rate": 0.0001509536802466851, "loss": 1.995, "step": 289240 }, { "epoch": 1.1009568904486042, "grad_norm": 0.1879594475030899, "learning_rate": 0.0001508939989057765, "loss": 1.9979, "step": 289250 }, { "epoch": 1.1009949529167269, "grad_norm": 0.1629105806350708, "learning_rate": 0.00015083432776593475, "loss": 1.9971, "step": 289260 }, { "epoch": 1.1010330153848495, "grad_norm": 0.18328267335891724, "learning_rate": 0.00015077466682193098, "loss": 1.9839, "step": 289270 }, { "epoch": 1.1010710778529722, "grad_norm": 0.16340726613998413, "learning_rate": 0.00015071501606854037, "loss": 1.9913, "step": 289280 }, { "epoch": 1.101109140321095, "grad_norm": 0.15893971920013428, "learning_rate": 0.00015065537550054293, "loss": 2.0083, "step": 289290 }, { "epoch": 1.1011472027892177, "grad_norm": 0.16307507455348969, "learning_rate": 0.00015059574511272283, "loss": 1.9925, "step": 289300 }, { "epoch": 1.1011852652573404, "grad_norm": 0.20666718482971191, "learning_rate": 0.00015053612489986895, "loss": 2.0027, "step": 289310 }, { "epoch": 1.101223327725463, "grad_norm": 0.1942300647497177, "learning_rate": 0.00015047651485677437, "loss": 2.0019, "step": 289320 }, { "epoch": 1.1012613901935857, "grad_norm": 0.153672456741333, "learning_rate": 0.0001504169149782368, "loss": 2.0058, "step": 289330 }, { "epoch": 1.1012994526617084, "grad_norm": 0.15216460824012756, "learning_rate": 0.00015035732525905826, "loss": 1.9905, "step": 289340 }, { "epoch": 1.101337515129831, "grad_norm": 0.18357402086257935, "learning_rate": 0.0001502977456940452, "loss": 1.997, "step": 289350 }, { "epoch": 1.1013755775979537, "grad_norm": 0.16224220395088196, "learning_rate": 0.00015023817627800868, "loss": 2.0048, "step": 289360 }, { "epoch": 1.1014136400660766, "grad_norm": 0.15068677067756653, "learning_rate": 0.00015017861700576392, "loss": 1.9926, "step": 289370 }, { "epoch": 1.1014517025341992, "grad_norm": 0.15177755057811737, "learning_rate": 0.00015011906787213076, "loss": 1.9836, "step": 289380 }, { "epoch": 1.1014897650023219, "grad_norm": 0.15786100924015045, "learning_rate": 0.00015005952887193325, "loss": 2.0062, "step": 289390 }, { "epoch": 1.1015278274704445, "grad_norm": 0.15467971563339233, "learning_rate": 0.00015000000000000001, "loss": 2.0015, "step": 289400 }, { "epoch": 1.1015658899385672, "grad_norm": 0.14474506676197052, "learning_rate": 0.000149940481251164, "loss": 1.9909, "step": 289410 }, { "epoch": 1.1016039524066898, "grad_norm": 0.151639923453331, "learning_rate": 0.00014988097262026256, "loss": 1.9939, "step": 289420 }, { "epoch": 1.1016420148748125, "grad_norm": 0.17876717448234558, "learning_rate": 0.00014982147410213742, "loss": 2.0102, "step": 289430 }, { "epoch": 1.1016800773429352, "grad_norm": 0.1566760092973709, "learning_rate": 0.00014976198569163475, "loss": 2.0064, "step": 289440 }, { "epoch": 1.1017181398110578, "grad_norm": 0.15046700835227966, "learning_rate": 0.00014970250738360498, "loss": 1.9942, "step": 289450 }, { "epoch": 1.1017562022791805, "grad_norm": 0.15465828776359558, "learning_rate": 0.00014964303917290302, "loss": 1.9938, "step": 289460 }, { "epoch": 1.1017942647473034, "grad_norm": 0.1620795875787735, "learning_rate": 0.00014958358105438802, "loss": 1.9864, "step": 289470 }, { "epoch": 1.101832327215426, "grad_norm": 0.2129961997270584, "learning_rate": 0.00014952413302292362, "loss": 2.0068, "step": 289480 }, { "epoch": 1.1018703896835487, "grad_norm": 0.171329528093338, "learning_rate": 0.0001494646950733778, "loss": 2.0017, "step": 289490 }, { "epoch": 1.1019084521516713, "grad_norm": 0.17650112509727478, "learning_rate": 0.00014940526720062287, "loss": 1.9973, "step": 289500 }, { "epoch": 1.101946514619794, "grad_norm": 0.22053077816963196, "learning_rate": 0.00014934584939953538, "loss": 1.985, "step": 289510 }, { "epoch": 1.1019845770879166, "grad_norm": 0.1486891359090805, "learning_rate": 0.0001492864416649964, "loss": 1.9996, "step": 289520 }, { "epoch": 1.1020226395560393, "grad_norm": 0.14435265958309174, "learning_rate": 0.0001492270439918912, "loss": 1.9937, "step": 289530 }, { "epoch": 1.102060702024162, "grad_norm": 0.1580554097890854, "learning_rate": 0.00014916765637510937, "loss": 2.0134, "step": 289540 }, { "epoch": 1.1020987644922848, "grad_norm": 0.16247224807739258, "learning_rate": 0.00014910827880954504, "loss": 1.9933, "step": 289550 }, { "epoch": 1.1021368269604075, "grad_norm": 0.19161753356456757, "learning_rate": 0.00014904891129009635, "loss": 2.0013, "step": 289560 }, { "epoch": 1.1021748894285301, "grad_norm": 0.17701232433319092, "learning_rate": 0.00014898955381166597, "loss": 1.9985, "step": 289570 }, { "epoch": 1.1022129518966528, "grad_norm": 0.14210069179534912, "learning_rate": 0.00014893020636916083, "loss": 1.9887, "step": 289580 }, { "epoch": 1.1022510143647755, "grad_norm": 0.14342361688613892, "learning_rate": 0.00014887086895749215, "loss": 1.9924, "step": 289590 }, { "epoch": 1.1022890768328981, "grad_norm": 0.22423794865608215, "learning_rate": 0.0001488115415715754, "loss": 2.0036, "step": 289600 }, { "epoch": 1.1023271393010208, "grad_norm": 0.1794877052307129, "learning_rate": 0.00014875222420633038, "loss": 2.0018, "step": 289610 }, { "epoch": 1.1023652017691434, "grad_norm": 0.15107597410678864, "learning_rate": 0.00014869291685668128, "loss": 2.0049, "step": 289620 }, { "epoch": 1.102403264237266, "grad_norm": 0.15473225712776184, "learning_rate": 0.00014863361951755638, "loss": 1.9945, "step": 289630 }, { "epoch": 1.102441326705389, "grad_norm": 0.16924192011356354, "learning_rate": 0.00014857433218388845, "loss": 1.991, "step": 289640 }, { "epoch": 1.1024793891735116, "grad_norm": 0.16266871988773346, "learning_rate": 0.00014851505485061434, "loss": 1.9842, "step": 289650 }, { "epoch": 1.1025174516416343, "grad_norm": 0.1542142778635025, "learning_rate": 0.00014845578751267524, "loss": 1.9933, "step": 289660 }, { "epoch": 1.102555514109757, "grad_norm": 0.16522587835788727, "learning_rate": 0.0001483965301650167, "loss": 1.9997, "step": 289670 }, { "epoch": 1.1025935765778796, "grad_norm": 0.14744262397289276, "learning_rate": 0.00014833728280258846, "loss": 1.9997, "step": 289680 }, { "epoch": 1.1026316390460023, "grad_norm": 0.1941128671169281, "learning_rate": 0.0001482780454203444, "loss": 1.9926, "step": 289690 }, { "epoch": 1.102669701514125, "grad_norm": 0.19153951108455658, "learning_rate": 0.00014821881801324276, "loss": 2.0, "step": 289700 }, { "epoch": 1.1027077639822476, "grad_norm": 0.18494223058223724, "learning_rate": 0.0001481596005762461, "loss": 2.0099, "step": 289710 }, { "epoch": 1.1027458264503704, "grad_norm": 0.17378918826580048, "learning_rate": 0.00014810039310432112, "loss": 2.0009, "step": 289720 }, { "epoch": 1.102783888918493, "grad_norm": 0.1473301351070404, "learning_rate": 0.00014804119559243866, "loss": 2.0015, "step": 289730 }, { "epoch": 1.1028219513866158, "grad_norm": 0.1474497765302658, "learning_rate": 0.00014798200803557406, "loss": 1.9901, "step": 289740 }, { "epoch": 1.1028600138547384, "grad_norm": 0.15276385843753815, "learning_rate": 0.00014792283042870652, "loss": 2.0014, "step": 289750 }, { "epoch": 1.102898076322861, "grad_norm": 0.14474351704120636, "learning_rate": 0.00014786366276681984, "loss": 1.9905, "step": 289760 }, { "epoch": 1.1029361387909837, "grad_norm": 0.16547620296478271, "learning_rate": 0.00014780450504490173, "loss": 2.0099, "step": 289770 }, { "epoch": 1.1029742012591064, "grad_norm": 0.16784998774528503, "learning_rate": 0.00014774535725794425, "loss": 1.9933, "step": 289780 }, { "epoch": 1.103012263727229, "grad_norm": 0.14733049273490906, "learning_rate": 0.00014768621940094368, "loss": 1.9994, "step": 289790 }, { "epoch": 1.1030503261953517, "grad_norm": 0.1614081859588623, "learning_rate": 0.0001476270914689004, "loss": 2.0061, "step": 289800 }, { "epoch": 1.1030883886634746, "grad_norm": 0.19870488345623016, "learning_rate": 0.00014756797345681917, "loss": 1.9968, "step": 289810 }, { "epoch": 1.1031264511315972, "grad_norm": 0.16760575771331787, "learning_rate": 0.00014750886535970864, "loss": 1.9875, "step": 289820 }, { "epoch": 1.10316451359972, "grad_norm": 0.15084044635295868, "learning_rate": 0.00014744976717258196, "loss": 2.0, "step": 289830 }, { "epoch": 1.1032025760678426, "grad_norm": 0.14579305052757263, "learning_rate": 0.00014739067889045626, "loss": 1.9929, "step": 289840 }, { "epoch": 1.1032406385359652, "grad_norm": 0.15969346463680267, "learning_rate": 0.00014733160050835287, "loss": 1.992, "step": 289850 }, { "epoch": 1.1032787010040879, "grad_norm": 0.17312924563884735, "learning_rate": 0.00014727253202129743, "loss": 1.9932, "step": 289860 }, { "epoch": 1.1033167634722105, "grad_norm": 0.1722601354122162, "learning_rate": 0.0001472134734243195, "loss": 1.9965, "step": 289870 }, { "epoch": 1.1033548259403332, "grad_norm": 0.1556127369403839, "learning_rate": 0.00014715442471245299, "loss": 1.9975, "step": 289880 }, { "epoch": 1.103392888408456, "grad_norm": 0.1654588282108307, "learning_rate": 0.00014709538588073595, "loss": 1.9947, "step": 289890 }, { "epoch": 1.1034309508765787, "grad_norm": 0.1517258733510971, "learning_rate": 0.00014703635692421048, "loss": 1.9943, "step": 289900 }, { "epoch": 1.1034690133447014, "grad_norm": 0.14629270136356354, "learning_rate": 0.00014697733783792293, "loss": 2.0045, "step": 289910 }, { "epoch": 1.103507075812824, "grad_norm": 0.16628184914588928, "learning_rate": 0.00014691832861692372, "loss": 1.9975, "step": 289920 }, { "epoch": 1.1035451382809467, "grad_norm": 0.163875013589859, "learning_rate": 0.00014685932925626743, "loss": 1.9841, "step": 289930 }, { "epoch": 1.1035832007490693, "grad_norm": 0.164508655667305, "learning_rate": 0.00014680033975101282, "loss": 1.9806, "step": 289940 }, { "epoch": 1.103621263217192, "grad_norm": 0.17021775245666504, "learning_rate": 0.00014674136009622263, "loss": 1.9932, "step": 289950 }, { "epoch": 1.1036593256853147, "grad_norm": 0.15839537978172302, "learning_rate": 0.00014668239028696383, "loss": 2.0106, "step": 289960 }, { "epoch": 1.1036973881534373, "grad_norm": 0.1492561548948288, "learning_rate": 0.0001466234303183076, "loss": 1.9936, "step": 289970 }, { "epoch": 1.1037354506215602, "grad_norm": 0.17014679312705994, "learning_rate": 0.00014656448018532904, "loss": 1.9998, "step": 289980 }, { "epoch": 1.1037735130896829, "grad_norm": 0.15628159046173096, "learning_rate": 0.00014650553988310744, "loss": 2.0081, "step": 289990 }, { "epoch": 1.1038115755578055, "grad_norm": 0.16518567502498627, "learning_rate": 0.00014644660940672623, "loss": 2.0011, "step": 290000 }, { "epoch": 1.1038496380259282, "grad_norm": 0.15008623898029327, "learning_rate": 0.00014638768875127289, "loss": 1.986, "step": 290010 }, { "epoch": 1.1038877004940508, "grad_norm": 0.2048100084066391, "learning_rate": 0.00014632877791183896, "loss": 2.005, "step": 290020 }, { "epoch": 1.1039257629621735, "grad_norm": 0.19177468121051788, "learning_rate": 0.00014626987688352017, "loss": 1.989, "step": 290030 }, { "epoch": 1.1039638254302961, "grad_norm": 0.16694004833698273, "learning_rate": 0.00014621098566141617, "loss": 1.9882, "step": 290040 }, { "epoch": 1.1040018878984188, "grad_norm": 0.22580422461032867, "learning_rate": 0.00014615210424063092, "loss": 2.0191, "step": 290050 }, { "epoch": 1.1040399503665417, "grad_norm": 0.16023632884025574, "learning_rate": 0.00014609323261627222, "loss": 1.9965, "step": 290060 }, { "epoch": 1.1040780128346643, "grad_norm": 0.20892436802387238, "learning_rate": 0.0001460343707834521, "loss": 1.9882, "step": 290070 }, { "epoch": 1.104116075302787, "grad_norm": 0.18539145588874817, "learning_rate": 0.0001459755187372866, "loss": 1.9884, "step": 290080 }, { "epoch": 1.1041541377709096, "grad_norm": 0.1476847231388092, "learning_rate": 0.00014591667647289574, "loss": 1.9922, "step": 290090 }, { "epoch": 1.1041922002390323, "grad_norm": 0.17845043540000916, "learning_rate": 0.0001458578439854037, "loss": 1.9937, "step": 290100 }, { "epoch": 1.104230262707155, "grad_norm": 0.15550413727760315, "learning_rate": 0.00014579902126993872, "loss": 2.018, "step": 290110 }, { "epoch": 1.1042683251752776, "grad_norm": 0.16928768157958984, "learning_rate": 0.00014574020832163297, "loss": 2.0076, "step": 290120 }, { "epoch": 1.1043063876434003, "grad_norm": 0.17358897626399994, "learning_rate": 0.0001456814051356228, "loss": 2.0049, "step": 290130 }, { "epoch": 1.104344450111523, "grad_norm": 0.18268205225467682, "learning_rate": 0.0001456226117070485, "loss": 1.9806, "step": 290140 }, { "epoch": 1.1043825125796458, "grad_norm": 0.18139834702014923, "learning_rate": 0.0001455638280310544, "loss": 1.9939, "step": 290150 }, { "epoch": 1.1044205750477685, "grad_norm": 0.18911303579807281, "learning_rate": 0.0001455050541027888, "loss": 1.9914, "step": 290160 }, { "epoch": 1.1044586375158911, "grad_norm": 0.20248942077159882, "learning_rate": 0.00014544628991740427, "loss": 2.0105, "step": 290170 }, { "epoch": 1.1044966999840138, "grad_norm": 0.26870208978652954, "learning_rate": 0.00014538753547005712, "loss": 1.9981, "step": 290180 }, { "epoch": 1.1045347624521364, "grad_norm": 0.1862451285123825, "learning_rate": 0.00014532879075590776, "loss": 2.0049, "step": 290190 }, { "epoch": 1.104572824920259, "grad_norm": 0.17011886835098267, "learning_rate": 0.00014527005577012064, "loss": 1.9946, "step": 290200 }, { "epoch": 1.1046108873883818, "grad_norm": 0.19458185136318207, "learning_rate": 0.00014521133050786418, "loss": 1.9968, "step": 290210 }, { "epoch": 1.1046489498565044, "grad_norm": 0.19219279289245605, "learning_rate": 0.00014515261496431077, "loss": 1.9908, "step": 290220 }, { "epoch": 1.1046870123246273, "grad_norm": 0.1879696100950241, "learning_rate": 0.00014509390913463693, "loss": 1.9734, "step": 290230 }, { "epoch": 1.10472507479275, "grad_norm": 0.1549655795097351, "learning_rate": 0.00014503521301402307, "loss": 2.0045, "step": 290240 }, { "epoch": 1.1047631372608726, "grad_norm": 0.19210083782672882, "learning_rate": 0.00014497652659765348, "loss": 2.0109, "step": 290250 }, { "epoch": 1.1048011997289953, "grad_norm": 0.1508699655532837, "learning_rate": 0.0001449178498807166, "loss": 2.0096, "step": 290260 }, { "epoch": 1.104839262197118, "grad_norm": 0.14681914448738098, "learning_rate": 0.00014485918285840477, "loss": 1.9985, "step": 290270 }, { "epoch": 1.1048773246652406, "grad_norm": 0.16370920836925507, "learning_rate": 0.00014480052552591429, "loss": 1.9955, "step": 290280 }, { "epoch": 1.1049153871333632, "grad_norm": 0.18862545490264893, "learning_rate": 0.00014474187787844552, "loss": 1.9966, "step": 290290 }, { "epoch": 1.104953449601486, "grad_norm": 0.1513679325580597, "learning_rate": 0.00014468323991120264, "loss": 1.9865, "step": 290300 }, { "epoch": 1.1049915120696086, "grad_norm": 0.15902507305145264, "learning_rate": 0.00014462461161939382, "loss": 1.9905, "step": 290310 }, { "epoch": 1.1050295745377312, "grad_norm": 0.14775541424751282, "learning_rate": 0.00014456599299823137, "loss": 2.0034, "step": 290320 }, { "epoch": 1.105067637005854, "grad_norm": 0.18092699348926544, "learning_rate": 0.00014450738404293124, "loss": 2.0011, "step": 290330 }, { "epoch": 1.1051056994739767, "grad_norm": 0.14086657762527466, "learning_rate": 0.00014444878474871348, "loss": 1.9985, "step": 290340 }, { "epoch": 1.1051437619420994, "grad_norm": 0.19705578684806824, "learning_rate": 0.0001443901951108022, "loss": 1.9896, "step": 290350 }, { "epoch": 1.105181824410222, "grad_norm": 0.1683788001537323, "learning_rate": 0.00014433161512442523, "loss": 1.9888, "step": 290360 }, { "epoch": 1.1052198868783447, "grad_norm": 0.17867405712604523, "learning_rate": 0.0001442730447848144, "loss": 1.997, "step": 290370 }, { "epoch": 1.1052579493464674, "grad_norm": 0.1499549299478531, "learning_rate": 0.00014421448408720555, "loss": 1.9974, "step": 290380 }, { "epoch": 1.10529601181459, "grad_norm": 0.1693742722272873, "learning_rate": 0.00014415593302683833, "loss": 1.9997, "step": 290390 }, { "epoch": 1.1053340742827127, "grad_norm": 0.14913637936115265, "learning_rate": 0.0001440973915989563, "loss": 1.9935, "step": 290400 }, { "epoch": 1.1053721367508356, "grad_norm": 0.15323811769485474, "learning_rate": 0.00014403885979880705, "loss": 1.9955, "step": 290410 }, { "epoch": 1.1054101992189582, "grad_norm": 0.1560705304145813, "learning_rate": 0.00014398033762164203, "loss": 1.987, "step": 290420 }, { "epoch": 1.1054482616870809, "grad_norm": 0.22040893137454987, "learning_rate": 0.0001439218250627165, "loss": 1.9997, "step": 290430 }, { "epoch": 1.1054863241552035, "grad_norm": 0.2085459977388382, "learning_rate": 0.00014386332211728975, "loss": 1.9904, "step": 290440 }, { "epoch": 1.1055243866233262, "grad_norm": 0.1620468944311142, "learning_rate": 0.00014380482878062484, "loss": 2.003, "step": 290450 }, { "epoch": 1.1055624490914489, "grad_norm": 0.1452278196811676, "learning_rate": 0.0001437463450479888, "loss": 1.9898, "step": 290460 }, { "epoch": 1.1056005115595715, "grad_norm": 0.15971092879772186, "learning_rate": 0.00014368787091465252, "loss": 1.9945, "step": 290470 }, { "epoch": 1.1056385740276942, "grad_norm": 0.15089818835258484, "learning_rate": 0.00014362940637589077, "loss": 2.0007, "step": 290480 }, { "epoch": 1.1056766364958168, "grad_norm": 0.15188033878803253, "learning_rate": 0.00014357095142698224, "loss": 1.9957, "step": 290490 }, { "epoch": 1.1057146989639397, "grad_norm": 0.18442633748054504, "learning_rate": 0.00014351250606320937, "loss": 1.985, "step": 290500 }, { "epoch": 1.1057527614320624, "grad_norm": 0.15648066997528076, "learning_rate": 0.00014345407027985862, "loss": 1.9846, "step": 290510 }, { "epoch": 1.105790823900185, "grad_norm": 0.15993808209896088, "learning_rate": 0.00014339564407222016, "loss": 1.9937, "step": 290520 }, { "epoch": 1.1058288863683077, "grad_norm": 0.1606651097536087, "learning_rate": 0.00014333722743558818, "loss": 2.0042, "step": 290530 }, { "epoch": 1.1058669488364303, "grad_norm": 0.1600220501422882, "learning_rate": 0.00014327882036526063, "loss": 1.9934, "step": 290540 }, { "epoch": 1.105905011304553, "grad_norm": 0.20765568315982819, "learning_rate": 0.00014322042285653925, "loss": 1.9812, "step": 290550 }, { "epoch": 1.1059430737726756, "grad_norm": 0.15059348940849304, "learning_rate": 0.00014316203490472972, "loss": 1.9896, "step": 290560 }, { "epoch": 1.1059811362407983, "grad_norm": 0.20932096242904663, "learning_rate": 0.00014310365650514157, "loss": 2.0081, "step": 290570 }, { "epoch": 1.1060191987089212, "grad_norm": 0.19905489683151245, "learning_rate": 0.000143045287653088, "loss": 1.9983, "step": 290580 }, { "epoch": 1.1060572611770438, "grad_norm": 0.1467469334602356, "learning_rate": 0.00014298692834388639, "loss": 1.9974, "step": 290590 }, { "epoch": 1.1060953236451665, "grad_norm": 0.14193718135356903, "learning_rate": 0.00014292857857285752, "loss": 2.0026, "step": 290600 }, { "epoch": 1.1061333861132892, "grad_norm": 0.1589806228876114, "learning_rate": 0.00014287023833532624, "loss": 1.9906, "step": 290610 }, { "epoch": 1.1061714485814118, "grad_norm": 0.15120485424995422, "learning_rate": 0.00014281190762662128, "loss": 1.9992, "step": 290620 }, { "epoch": 1.1062095110495345, "grad_norm": 0.15628735721111298, "learning_rate": 0.000142753586442075, "loss": 1.9936, "step": 290630 }, { "epoch": 1.1062475735176571, "grad_norm": 0.1633949875831604, "learning_rate": 0.0001426952747770236, "loss": 1.9897, "step": 290640 }, { "epoch": 1.1062856359857798, "grad_norm": 0.1764533966779709, "learning_rate": 0.00014263697262680719, "loss": 1.9891, "step": 290650 }, { "epoch": 1.1063236984539024, "grad_norm": 0.17311392724514008, "learning_rate": 0.00014257867998676967, "loss": 2.0095, "step": 290660 }, { "epoch": 1.1063617609220253, "grad_norm": 0.20260465145111084, "learning_rate": 0.00014252039685225864, "loss": 2.0225, "step": 290670 }, { "epoch": 1.106399823390148, "grad_norm": 0.20680083334445953, "learning_rate": 0.0001424621232186255, "loss": 2.0022, "step": 290680 }, { "epoch": 1.1064378858582706, "grad_norm": 0.16034738719463348, "learning_rate": 0.00014240385908122555, "loss": 1.9941, "step": 290690 }, { "epoch": 1.1064759483263933, "grad_norm": 0.1378408670425415, "learning_rate": 0.00014234560443541773, "loss": 1.995, "step": 290700 }, { "epoch": 1.106514010794516, "grad_norm": 0.18012098968029022, "learning_rate": 0.00014228735927656493, "loss": 2.003, "step": 290710 }, { "epoch": 1.1065520732626386, "grad_norm": 0.18704988062381744, "learning_rate": 0.00014222912360003365, "loss": 2.0084, "step": 290720 }, { "epoch": 1.1065901357307613, "grad_norm": 0.16487978398799896, "learning_rate": 0.0001421708974011942, "loss": 1.9931, "step": 290730 }, { "epoch": 1.106628198198884, "grad_norm": 0.15397684276103973, "learning_rate": 0.00014211268067542077, "loss": 1.9913, "step": 290740 }, { "epoch": 1.1066662606670068, "grad_norm": 0.1769288331270218, "learning_rate": 0.00014205447341809118, "loss": 2.0106, "step": 290750 }, { "epoch": 1.1067043231351295, "grad_norm": 0.1709713190793991, "learning_rate": 0.000141996275624587, "loss": 1.9984, "step": 290760 }, { "epoch": 1.106742385603252, "grad_norm": 0.15498772263526917, "learning_rate": 0.00014193808729029368, "loss": 1.9707, "step": 290770 }, { "epoch": 1.1067804480713748, "grad_norm": 0.15889990329742432, "learning_rate": 0.00014187990841060032, "loss": 1.9981, "step": 290780 }, { "epoch": 1.1068185105394974, "grad_norm": 0.16191764175891876, "learning_rate": 0.00014182173898089984, "loss": 2.0066, "step": 290790 }, { "epoch": 1.10685657300762, "grad_norm": 0.16238361597061157, "learning_rate": 0.0001417635789965887, "loss": 2.01, "step": 290800 }, { "epoch": 1.1068946354757427, "grad_norm": 0.19509507715702057, "learning_rate": 0.00014170542845306743, "loss": 1.9998, "step": 290810 }, { "epoch": 1.1069326979438654, "grad_norm": 0.1603064388036728, "learning_rate": 0.00014164728734573996, "loss": 1.9946, "step": 290820 }, { "epoch": 1.106970760411988, "grad_norm": 0.18165600299835205, "learning_rate": 0.00014158915567001417, "loss": 1.9896, "step": 290830 }, { "epoch": 1.107008822880111, "grad_norm": 0.1503620743751526, "learning_rate": 0.00014153103342130159, "loss": 2.0022, "step": 290840 }, { "epoch": 1.1070468853482336, "grad_norm": 0.14587150514125824, "learning_rate": 0.00014147292059501742, "loss": 1.9836, "step": 290850 }, { "epoch": 1.1070849478163562, "grad_norm": 0.16784952580928802, "learning_rate": 0.00014141481718658072, "loss": 2.0025, "step": 290860 }, { "epoch": 1.107123010284479, "grad_norm": 0.14302490651607513, "learning_rate": 0.00014135672319141407, "loss": 1.9768, "step": 290870 }, { "epoch": 1.1071610727526016, "grad_norm": 0.1688007414340973, "learning_rate": 0.00014129863860494384, "loss": 1.9909, "step": 290880 }, { "epoch": 1.1071991352207242, "grad_norm": 0.1606607288122177, "learning_rate": 0.0001412405634226002, "loss": 1.9986, "step": 290890 }, { "epoch": 1.1072371976888469, "grad_norm": 0.18139497935771942, "learning_rate": 0.0001411824976398169, "loss": 1.9983, "step": 290900 }, { "epoch": 1.1072752601569695, "grad_norm": 0.1669086068868637, "learning_rate": 0.0001411244412520314, "loss": 2.016, "step": 290910 }, { "epoch": 1.1073133226250924, "grad_norm": 0.17165876924991608, "learning_rate": 0.00014106639425468488, "loss": 1.9919, "step": 290920 }, { "epoch": 1.107351385093215, "grad_norm": 0.15280088782310486, "learning_rate": 0.00014100835664322215, "loss": 1.9889, "step": 290930 }, { "epoch": 1.1073894475613377, "grad_norm": 0.14866815507411957, "learning_rate": 0.00014095032841309174, "loss": 1.9949, "step": 290940 }, { "epoch": 1.1074275100294604, "grad_norm": 0.18499290943145752, "learning_rate": 0.0001408923095597459, "loss": 1.9875, "step": 290950 }, { "epoch": 1.107465572497583, "grad_norm": 0.18930111825466156, "learning_rate": 0.00014083430007864057, "loss": 1.9826, "step": 290960 }, { "epoch": 1.1075036349657057, "grad_norm": 0.17787308990955353, "learning_rate": 0.00014077629996523522, "loss": 2.002, "step": 290970 }, { "epoch": 1.1075416974338284, "grad_norm": 0.13603579998016357, "learning_rate": 0.00014071830921499306, "loss": 1.998, "step": 290980 }, { "epoch": 1.107579759901951, "grad_norm": 0.1438482403755188, "learning_rate": 0.00014066032782338102, "loss": 1.9825, "step": 290990 }, { "epoch": 1.1076178223700737, "grad_norm": 0.1974167674779892, "learning_rate": 0.00014060235578586955, "loss": 1.9849, "step": 291000 }, { "epoch": 1.1076558848381965, "grad_norm": 0.15366169810295105, "learning_rate": 0.000140544393097933, "loss": 2.0074, "step": 291010 }, { "epoch": 1.1076939473063192, "grad_norm": 0.1550496369600296, "learning_rate": 0.000140486439755049, "loss": 1.9768, "step": 291020 }, { "epoch": 1.1077320097744419, "grad_norm": 0.15785466134548187, "learning_rate": 0.00014042849575269918, "loss": 1.9828, "step": 291030 }, { "epoch": 1.1077700722425645, "grad_norm": 0.16849170625209808, "learning_rate": 0.00014037056108636865, "loss": 2.0063, "step": 291040 }, { "epoch": 1.1078081347106872, "grad_norm": 0.1854594498872757, "learning_rate": 0.00014031263575154607, "loss": 1.9903, "step": 291050 }, { "epoch": 1.1078461971788098, "grad_norm": 0.14291688799858093, "learning_rate": 0.00014025471974372382, "loss": 2.0002, "step": 291060 }, { "epoch": 1.1078842596469325, "grad_norm": 0.15868712961673737, "learning_rate": 0.0001401968130583981, "loss": 1.989, "step": 291070 }, { "epoch": 1.1079223221150551, "grad_norm": 0.1694360077381134, "learning_rate": 0.00014013891569106835, "loss": 1.9879, "step": 291080 }, { "epoch": 1.107960384583178, "grad_norm": 0.19629625976085663, "learning_rate": 0.00014008102763723795, "loss": 2.007, "step": 291090 }, { "epoch": 1.1079984470513007, "grad_norm": 0.17719154059886932, "learning_rate": 0.00014002314889241375, "loss": 1.9958, "step": 291100 }, { "epoch": 1.1080365095194233, "grad_norm": 0.1574486643075943, "learning_rate": 0.00013996527945210618, "loss": 1.9925, "step": 291110 }, { "epoch": 1.108074571987546, "grad_norm": 0.15465888381004333, "learning_rate": 0.00013990741931182939, "loss": 1.9972, "step": 291120 }, { "epoch": 1.1081126344556687, "grad_norm": 0.18018567562103271, "learning_rate": 0.00013984956846710105, "loss": 2.0014, "step": 291130 }, { "epoch": 1.1081506969237913, "grad_norm": 0.17025281488895416, "learning_rate": 0.0001397917269134425, "loss": 2.0062, "step": 291140 }, { "epoch": 1.108188759391914, "grad_norm": 0.15943843126296997, "learning_rate": 0.00013973389464637853, "loss": 1.9928, "step": 291150 }, { "epoch": 1.1082268218600366, "grad_norm": 0.1920195072889328, "learning_rate": 0.00013967607166143775, "loss": 1.9894, "step": 291160 }, { "epoch": 1.1082648843281593, "grad_norm": 0.19052480161190033, "learning_rate": 0.00013961825795415217, "loss": 1.9869, "step": 291170 }, { "epoch": 1.108302946796282, "grad_norm": 0.2030021846294403, "learning_rate": 0.00013956045352005742, "loss": 1.9906, "step": 291180 }, { "epoch": 1.1083410092644048, "grad_norm": 0.21491935849189758, "learning_rate": 0.0001395026583546928, "loss": 2.0128, "step": 291190 }, { "epoch": 1.1083790717325275, "grad_norm": 0.1512957364320755, "learning_rate": 0.00013944487245360105, "loss": 1.9947, "step": 291200 }, { "epoch": 1.1084171342006501, "grad_norm": 0.18461589515209198, "learning_rate": 0.00013938709581232862, "loss": 1.9996, "step": 291210 }, { "epoch": 1.1084551966687728, "grad_norm": 0.18098539113998413, "learning_rate": 0.00013932932842642537, "loss": 1.9857, "step": 291220 }, { "epoch": 1.1084932591368954, "grad_norm": 0.16633349657058716, "learning_rate": 0.00013927157029144488, "loss": 1.9976, "step": 291230 }, { "epoch": 1.108531321605018, "grad_norm": 0.23348954319953918, "learning_rate": 0.00013921382140294415, "loss": 2.0039, "step": 291240 }, { "epoch": 1.1085693840731408, "grad_norm": 0.1641732156276703, "learning_rate": 0.00013915608175648386, "loss": 1.9816, "step": 291250 }, { "epoch": 1.1086074465412634, "grad_norm": 0.20765918493270874, "learning_rate": 0.00013909835134762823, "loss": 1.993, "step": 291260 }, { "epoch": 1.1086455090093863, "grad_norm": 0.17798057198524475, "learning_rate": 0.0001390406301719449, "loss": 2.0044, "step": 291270 }, { "epoch": 1.108683571477509, "grad_norm": 0.15671156346797943, "learning_rate": 0.00013898291822500515, "loss": 1.9776, "step": 291280 }, { "epoch": 1.1087216339456316, "grad_norm": 0.1654648631811142, "learning_rate": 0.0001389252155023838, "loss": 1.9782, "step": 291290 }, { "epoch": 1.1087596964137543, "grad_norm": 0.15194883942604065, "learning_rate": 0.0001388675219996592, "loss": 1.994, "step": 291300 }, { "epoch": 1.108797758881877, "grad_norm": 0.17421121895313263, "learning_rate": 0.00013880983771241313, "loss": 1.9997, "step": 291310 }, { "epoch": 1.1088358213499996, "grad_norm": 0.1441257894039154, "learning_rate": 0.00013875216263623113, "loss": 1.9798, "step": 291320 }, { "epoch": 1.1088738838181222, "grad_norm": 0.19240529835224152, "learning_rate": 0.00013869449676670204, "loss": 1.9993, "step": 291330 }, { "epoch": 1.108911946286245, "grad_norm": 0.16603481769561768, "learning_rate": 0.00013863684009941834, "loss": 1.9938, "step": 291340 }, { "epoch": 1.1089500087543676, "grad_norm": 0.14293710887432098, "learning_rate": 0.000138579192629976, "loss": 1.9767, "step": 291350 }, { "epoch": 1.1089880712224904, "grad_norm": 0.17526213824748993, "learning_rate": 0.00013852155435397447, "loss": 1.9891, "step": 291360 }, { "epoch": 1.109026133690613, "grad_norm": 0.19786065816879272, "learning_rate": 0.00013846392526701662, "loss": 2.0015, "step": 291370 }, { "epoch": 1.1090641961587357, "grad_norm": 0.16378028690814972, "learning_rate": 0.0001384063053647091, "loss": 2.0019, "step": 291380 }, { "epoch": 1.1091022586268584, "grad_norm": 0.1845664083957672, "learning_rate": 0.0001383486946426618, "loss": 1.995, "step": 291390 }, { "epoch": 1.109140321094981, "grad_norm": 0.18735288083553314, "learning_rate": 0.00013829109309648825, "loss": 1.9832, "step": 291400 }, { "epoch": 1.1091783835631037, "grad_norm": 0.15484029054641724, "learning_rate": 0.0001382335007218054, "loss": 2.0015, "step": 291410 }, { "epoch": 1.1092164460312264, "grad_norm": 0.17933939397335052, "learning_rate": 0.00013817591751423363, "loss": 1.9925, "step": 291420 }, { "epoch": 1.109254508499349, "grad_norm": 0.17813944816589355, "learning_rate": 0.00013811834346939695, "loss": 2.0081, "step": 291430 }, { "epoch": 1.109292570967472, "grad_norm": 0.14827589690685272, "learning_rate": 0.00013806077858292281, "loss": 1.9839, "step": 291440 }, { "epoch": 1.1093306334355946, "grad_norm": 0.1530294567346573, "learning_rate": 0.00013800322285044213, "loss": 1.9957, "step": 291450 }, { "epoch": 1.1093686959037172, "grad_norm": 0.26225045323371887, "learning_rate": 0.0001379456762675892, "loss": 1.9754, "step": 291460 }, { "epoch": 1.1094067583718399, "grad_norm": 0.16842412948608398, "learning_rate": 0.0001378881388300019, "loss": 1.9809, "step": 291470 }, { "epoch": 1.1094448208399625, "grad_norm": 0.16914471983909607, "learning_rate": 0.00013783061053332152, "loss": 1.9976, "step": 291480 }, { "epoch": 1.1094828833080852, "grad_norm": 0.23515167832374573, "learning_rate": 0.00013777309137319278, "loss": 1.9958, "step": 291490 }, { "epoch": 1.1095209457762079, "grad_norm": 0.15831919014453888, "learning_rate": 0.000137715581345264, "loss": 1.9806, "step": 291500 }, { "epoch": 1.1095590082443305, "grad_norm": 0.15088757872581482, "learning_rate": 0.00013765808044518686, "loss": 2.0058, "step": 291510 }, { "epoch": 1.1095970707124532, "grad_norm": 0.14674466848373413, "learning_rate": 0.00013760058866861647, "loss": 1.9826, "step": 291520 }, { "epoch": 1.109635133180576, "grad_norm": 0.152365580201149, "learning_rate": 0.00013754310601121135, "loss": 2.0032, "step": 291530 }, { "epoch": 1.1096731956486987, "grad_norm": 0.18826673924922943, "learning_rate": 0.00013748563246863355, "loss": 2.0001, "step": 291540 }, { "epoch": 1.1097112581168214, "grad_norm": 0.18371078372001648, "learning_rate": 0.00013742816803654845, "loss": 1.9927, "step": 291550 }, { "epoch": 1.109749320584944, "grad_norm": 0.1651403307914734, "learning_rate": 0.00013737071271062508, "loss": 2.0015, "step": 291560 }, { "epoch": 1.1097873830530667, "grad_norm": 0.14440204203128815, "learning_rate": 0.00013731326648653568, "loss": 2.0011, "step": 291570 }, { "epoch": 1.1098254455211893, "grad_norm": 0.24342374503612518, "learning_rate": 0.00013725582935995606, "loss": 1.9962, "step": 291580 }, { "epoch": 1.109863507989312, "grad_norm": 0.1862649917602539, "learning_rate": 0.0001371984013265653, "loss": 2.0055, "step": 291590 }, { "epoch": 1.1099015704574346, "grad_norm": 0.16959112882614136, "learning_rate": 0.00013714098238204597, "loss": 1.991, "step": 291600 }, { "epoch": 1.1099396329255575, "grad_norm": 0.15094879269599915, "learning_rate": 0.00013708357252208413, "loss": 1.9883, "step": 291610 }, { "epoch": 1.1099776953936802, "grad_norm": 0.1410762369632721, "learning_rate": 0.00013702617174236927, "loss": 1.9937, "step": 291620 }, { "epoch": 1.1100157578618028, "grad_norm": 0.16502730548381805, "learning_rate": 0.00013696878003859408, "loss": 1.993, "step": 291630 }, { "epoch": 1.1100538203299255, "grad_norm": 0.17659837007522583, "learning_rate": 0.00013691139740645492, "loss": 2.0023, "step": 291640 }, { "epoch": 1.1100918827980482, "grad_norm": 0.18810971081256866, "learning_rate": 0.00013685402384165123, "loss": 1.9911, "step": 291650 }, { "epoch": 1.1101299452661708, "grad_norm": 0.21112871170043945, "learning_rate": 0.00013679665933988622, "loss": 1.9886, "step": 291660 }, { "epoch": 1.1101680077342935, "grad_norm": 0.16219669580459595, "learning_rate": 0.00013673930389686617, "loss": 1.9913, "step": 291670 }, { "epoch": 1.1102060702024161, "grad_norm": 0.15710720419883728, "learning_rate": 0.000136681957508301, "loss": 1.9897, "step": 291680 }, { "epoch": 1.1102441326705388, "grad_norm": 0.19102413952350616, "learning_rate": 0.00013662462016990383, "loss": 1.9864, "step": 291690 }, { "epoch": 1.1102821951386617, "grad_norm": 0.15526530146598816, "learning_rate": 0.00013656729187739124, "loss": 1.9877, "step": 291700 }, { "epoch": 1.1103202576067843, "grad_norm": 0.19195884466171265, "learning_rate": 0.00013650997262648317, "loss": 1.9923, "step": 291710 }, { "epoch": 1.110358320074907, "grad_norm": 0.1689821034669876, "learning_rate": 0.00013645266241290305, "loss": 1.9934, "step": 291720 }, { "epoch": 1.1103963825430296, "grad_norm": 0.17311668395996094, "learning_rate": 0.00013639536123237738, "loss": 2.0037, "step": 291730 }, { "epoch": 1.1104344450111523, "grad_norm": 0.1563536375761032, "learning_rate": 0.0001363380690806364, "loss": 1.9901, "step": 291740 }, { "epoch": 1.110472507479275, "grad_norm": 0.14188843965530396, "learning_rate": 0.00013628078595341342, "loss": 1.9851, "step": 291750 }, { "epoch": 1.1105105699473976, "grad_norm": 0.16506977379322052, "learning_rate": 0.00013622351184644526, "loss": 1.9953, "step": 291760 }, { "epoch": 1.1105486324155203, "grad_norm": 0.15015274286270142, "learning_rate": 0.00013616624675547213, "loss": 2.0002, "step": 291770 }, { "epoch": 1.1105866948836431, "grad_norm": 0.14491043984889984, "learning_rate": 0.00013610899067623743, "loss": 1.9963, "step": 291780 }, { "epoch": 1.1106247573517658, "grad_norm": 0.14073286950588226, "learning_rate": 0.00013605174360448803, "loss": 1.9929, "step": 291790 }, { "epoch": 1.1106628198198885, "grad_norm": 0.14233963191509247, "learning_rate": 0.0001359945055359741, "loss": 1.9945, "step": 291800 }, { "epoch": 1.1107008822880111, "grad_norm": 0.1646268367767334, "learning_rate": 0.00013593727646644916, "loss": 1.9905, "step": 291810 }, { "epoch": 1.1107389447561338, "grad_norm": 0.18890641629695892, "learning_rate": 0.00013588005639167012, "loss": 1.9846, "step": 291820 }, { "epoch": 1.1107770072242564, "grad_norm": 0.16375824809074402, "learning_rate": 0.00013582284530739715, "loss": 1.9948, "step": 291830 }, { "epoch": 1.110815069692379, "grad_norm": 0.15013252198696136, "learning_rate": 0.00013576564320939377, "loss": 1.9855, "step": 291840 }, { "epoch": 1.1108531321605017, "grad_norm": 0.18155845999717712, "learning_rate": 0.00013570845009342676, "loss": 1.9734, "step": 291850 }, { "epoch": 1.1108911946286244, "grad_norm": 0.1454450935125351, "learning_rate": 0.0001356512659552664, "loss": 1.9903, "step": 291860 }, { "epoch": 1.1109292570967473, "grad_norm": 0.14277590811252594, "learning_rate": 0.0001355940907906862, "loss": 1.9927, "step": 291870 }, { "epoch": 1.11096731956487, "grad_norm": 0.16399109363555908, "learning_rate": 0.00013553692459546284, "loss": 1.9982, "step": 291880 }, { "epoch": 1.1110053820329926, "grad_norm": 0.16129136085510254, "learning_rate": 0.00013547976736537655, "loss": 1.9994, "step": 291890 }, { "epoch": 1.1110434445011153, "grad_norm": 0.1648087501525879, "learning_rate": 0.00013542261909621074, "loss": 1.9908, "step": 291900 }, { "epoch": 1.111081506969238, "grad_norm": 0.1906840205192566, "learning_rate": 0.000135365479783752, "loss": 1.9899, "step": 291910 }, { "epoch": 1.1111195694373606, "grad_norm": 0.17575401067733765, "learning_rate": 0.0001353083494237906, "loss": 2.015, "step": 291920 }, { "epoch": 1.1111576319054832, "grad_norm": 0.17227664589881897, "learning_rate": 0.00013525122801211976, "loss": 1.9862, "step": 291930 }, { "epoch": 1.1111956943736059, "grad_norm": 0.19471804797649384, "learning_rate": 0.00013519411554453604, "loss": 1.9961, "step": 291940 }, { "epoch": 1.1112337568417288, "grad_norm": 0.1781400740146637, "learning_rate": 0.00013513701201683942, "loss": 1.984, "step": 291950 }, { "epoch": 1.1112718193098514, "grad_norm": 0.14845864474773407, "learning_rate": 0.00013507991742483304, "loss": 1.9821, "step": 291960 }, { "epoch": 1.111309881777974, "grad_norm": 0.15103501081466675, "learning_rate": 0.00013502283176432346, "loss": 1.9982, "step": 291970 }, { "epoch": 1.1113479442460967, "grad_norm": 0.1785365641117096, "learning_rate": 0.0001349657550311204, "loss": 1.9979, "step": 291980 }, { "epoch": 1.1113860067142194, "grad_norm": 0.2228976935148239, "learning_rate": 0.00013490868722103688, "loss": 1.9898, "step": 291990 }, { "epoch": 1.111424069182342, "grad_norm": 0.15446533262729645, "learning_rate": 0.0001348516283298893, "loss": 1.9875, "step": 292000 }, { "epoch": 1.1114621316504647, "grad_norm": 0.16202174127101898, "learning_rate": 0.00013479457835349708, "loss": 2.002, "step": 292010 }, { "epoch": 1.1115001941185874, "grad_norm": 0.1793016642332077, "learning_rate": 0.00013473753728768318, "loss": 1.9883, "step": 292020 }, { "epoch": 1.11153825658671, "grad_norm": 0.1564231514930725, "learning_rate": 0.00013468050512827358, "loss": 2.0075, "step": 292030 }, { "epoch": 1.111576319054833, "grad_norm": 0.15224598348140717, "learning_rate": 0.0001346234818710978, "loss": 1.9923, "step": 292040 }, { "epoch": 1.1116143815229556, "grad_norm": 0.17988833785057068, "learning_rate": 0.00013456646751198838, "loss": 2.0062, "step": 292050 }, { "epoch": 1.1116524439910782, "grad_norm": 0.15105636417865753, "learning_rate": 0.0001345094620467811, "loss": 2.008, "step": 292060 }, { "epoch": 1.1116905064592009, "grad_norm": 0.18763117492198944, "learning_rate": 0.00013445246547131524, "loss": 1.9823, "step": 292070 }, { "epoch": 1.1117285689273235, "grad_norm": 0.16480746865272522, "learning_rate": 0.000134395477781433, "loss": 1.9947, "step": 292080 }, { "epoch": 1.1117666313954462, "grad_norm": 0.21038369834423065, "learning_rate": 0.00013433849897298, "loss": 1.9862, "step": 292090 }, { "epoch": 1.1118046938635688, "grad_norm": 0.16916730999946594, "learning_rate": 0.00013428152904180514, "loss": 1.9857, "step": 292100 }, { "epoch": 1.1118427563316915, "grad_norm": 0.16177931427955627, "learning_rate": 0.00013422456798376048, "loss": 1.9756, "step": 292110 }, { "epoch": 1.1118808187998142, "grad_norm": 0.20139986276626587, "learning_rate": 0.0001341676157947012, "loss": 1.9941, "step": 292120 }, { "epoch": 1.111918881267937, "grad_norm": 0.16537266969680786, "learning_rate": 0.00013411067247048598, "loss": 1.9977, "step": 292130 }, { "epoch": 1.1119569437360597, "grad_norm": 0.18192315101623535, "learning_rate": 0.0001340537380069764, "loss": 1.9959, "step": 292140 }, { "epoch": 1.1119950062041823, "grad_norm": 0.16280584037303925, "learning_rate": 0.00013399681240003754, "loss": 1.9868, "step": 292150 }, { "epoch": 1.112033068672305, "grad_norm": 0.1430111676454544, "learning_rate": 0.00013393989564553744, "loss": 1.9901, "step": 292160 }, { "epoch": 1.1120711311404277, "grad_norm": 0.14697347581386566, "learning_rate": 0.00013388298773934766, "loss": 1.9884, "step": 292170 }, { "epoch": 1.1121091936085503, "grad_norm": 0.16174790263175964, "learning_rate": 0.00013382608867734265, "loss": 2.0107, "step": 292180 }, { "epoch": 1.112147256076673, "grad_norm": 0.17751865088939667, "learning_rate": 0.00013376919845540025, "loss": 1.9975, "step": 292190 }, { "epoch": 1.1121853185447956, "grad_norm": 0.2055535614490509, "learning_rate": 0.00013371231706940152, "loss": 1.9957, "step": 292200 }, { "epoch": 1.1122233810129183, "grad_norm": 0.16524241864681244, "learning_rate": 0.00013365544451523055, "loss": 2.0028, "step": 292210 }, { "epoch": 1.1122614434810412, "grad_norm": 0.17955149710178375, "learning_rate": 0.00013359858078877478, "loss": 1.9782, "step": 292220 }, { "epoch": 1.1122995059491638, "grad_norm": 0.1631380319595337, "learning_rate": 0.00013354172588592484, "loss": 1.9813, "step": 292230 }, { "epoch": 1.1123375684172865, "grad_norm": 0.1502358317375183, "learning_rate": 0.0001334848798025744, "loss": 1.9934, "step": 292240 }, { "epoch": 1.1123756308854091, "grad_norm": 0.20850756764411926, "learning_rate": 0.0001334280425346205, "loss": 1.9876, "step": 292250 }, { "epoch": 1.1124136933535318, "grad_norm": 0.1623973846435547, "learning_rate": 0.00013337121407796328, "loss": 1.9928, "step": 292260 }, { "epoch": 1.1124517558216545, "grad_norm": 0.23397907614707947, "learning_rate": 0.0001333143944285059, "loss": 1.9842, "step": 292270 }, { "epoch": 1.112489818289777, "grad_norm": 0.21935118734836578, "learning_rate": 0.000133257583582155, "loss": 1.9768, "step": 292280 }, { "epoch": 1.1125278807578998, "grad_norm": 0.1885177344083786, "learning_rate": 0.00013320078153482024, "loss": 1.9962, "step": 292290 }, { "epoch": 1.1125659432260226, "grad_norm": 0.16666430234909058, "learning_rate": 0.00013314398828241436, "loss": 1.9879, "step": 292300 }, { "epoch": 1.1126040056941453, "grad_norm": 0.14934664964675903, "learning_rate": 0.00013308720382085336, "loss": 1.9862, "step": 292310 }, { "epoch": 1.112642068162268, "grad_norm": 0.18101535737514496, "learning_rate": 0.00013303042814605641, "loss": 1.9954, "step": 292320 }, { "epoch": 1.1126801306303906, "grad_norm": 0.17227186262607574, "learning_rate": 0.00013297366125394577, "loss": 1.9845, "step": 292330 }, { "epoch": 1.1127181930985133, "grad_norm": 0.2443477064371109, "learning_rate": 0.00013291690314044696, "loss": 1.9926, "step": 292340 }, { "epoch": 1.112756255566636, "grad_norm": 0.15813399851322174, "learning_rate": 0.00013286015380148852, "loss": 1.9914, "step": 292350 }, { "epoch": 1.1127943180347586, "grad_norm": 0.1505991667509079, "learning_rate": 0.0001328034132330022, "loss": 1.9996, "step": 292360 }, { "epoch": 1.1128323805028812, "grad_norm": 0.18855006992816925, "learning_rate": 0.00013274668143092294, "loss": 1.9945, "step": 292370 }, { "epoch": 1.112870442971004, "grad_norm": 0.1625203788280487, "learning_rate": 0.00013268995839118875, "loss": 1.9891, "step": 292380 }, { "epoch": 1.1129085054391268, "grad_norm": 0.1641480177640915, "learning_rate": 0.0001326332441097407, "loss": 1.9807, "step": 292390 }, { "epoch": 1.1129465679072494, "grad_norm": 0.16145209968090057, "learning_rate": 0.00013257653858252328, "loss": 1.9977, "step": 292400 }, { "epoch": 1.112984630375372, "grad_norm": 0.1449916511774063, "learning_rate": 0.00013251984180548377, "loss": 1.9924, "step": 292410 }, { "epoch": 1.1130226928434948, "grad_norm": 0.18777312338352203, "learning_rate": 0.0001324631537745728, "loss": 1.9868, "step": 292420 }, { "epoch": 1.1130607553116174, "grad_norm": 0.14995765686035156, "learning_rate": 0.000132406474485744, "loss": 1.9997, "step": 292430 }, { "epoch": 1.11309881777974, "grad_norm": 0.1728399097919464, "learning_rate": 0.00013234980393495417, "loss": 2.0076, "step": 292440 }, { "epoch": 1.1131368802478627, "grad_norm": 0.157765731215477, "learning_rate": 0.00013229314211816317, "loss": 1.9942, "step": 292450 }, { "epoch": 1.1131749427159854, "grad_norm": 0.20687264204025269, "learning_rate": 0.00013223648903133418, "loss": 1.9799, "step": 292460 }, { "epoch": 1.1132130051841083, "grad_norm": 0.1790127158164978, "learning_rate": 0.00013217984467043314, "loss": 2.001, "step": 292470 }, { "epoch": 1.113251067652231, "grad_norm": 0.14681363105773926, "learning_rate": 0.0001321232090314295, "loss": 1.9994, "step": 292480 }, { "epoch": 1.1132891301203536, "grad_norm": 0.17148324847221375, "learning_rate": 0.00013206658211029542, "loss": 2.0021, "step": 292490 }, { "epoch": 1.1133271925884762, "grad_norm": 0.1445014625787735, "learning_rate": 0.0001320099639030064, "loss": 1.9919, "step": 292500 }, { "epoch": 1.113365255056599, "grad_norm": 0.16965574026107788, "learning_rate": 0.00013195335440554097, "loss": 2.0039, "step": 292510 }, { "epoch": 1.1134033175247215, "grad_norm": 0.17414280772209167, "learning_rate": 0.00013189675361388077, "loss": 1.9966, "step": 292520 }, { "epoch": 1.1134413799928442, "grad_norm": 0.18921469151973724, "learning_rate": 0.00013184016152401051, "loss": 1.994, "step": 292530 }, { "epoch": 1.1134794424609669, "grad_norm": 0.19784805178642273, "learning_rate": 0.000131783578131918, "loss": 2.0034, "step": 292540 }, { "epoch": 1.1135175049290895, "grad_norm": 0.19768598675727844, "learning_rate": 0.00013172700343359412, "loss": 1.9839, "step": 292550 }, { "epoch": 1.1135555673972124, "grad_norm": 0.16530479490756989, "learning_rate": 0.00013167043742503282, "loss": 1.9888, "step": 292560 }, { "epoch": 1.113593629865335, "grad_norm": 0.15673546493053436, "learning_rate": 0.00013161388010223118, "loss": 1.9941, "step": 292570 }, { "epoch": 1.1136316923334577, "grad_norm": 0.15796087682247162, "learning_rate": 0.0001315573314611892, "loss": 2.0009, "step": 292580 }, { "epoch": 1.1136697548015804, "grad_norm": 0.1805214136838913, "learning_rate": 0.00013150079149791015, "loss": 1.9993, "step": 292590 }, { "epoch": 1.113707817269703, "grad_norm": 0.15272203087806702, "learning_rate": 0.00013144426020840033, "loss": 1.9801, "step": 292600 }, { "epoch": 1.1137458797378257, "grad_norm": 0.15740281343460083, "learning_rate": 0.00013138773758866894, "loss": 1.9852, "step": 292610 }, { "epoch": 1.1137839422059483, "grad_norm": 0.17293420433998108, "learning_rate": 0.00013133122363472838, "loss": 1.9922, "step": 292620 }, { "epoch": 1.113822004674071, "grad_norm": 0.1495877504348755, "learning_rate": 0.00013127471834259408, "loss": 1.9977, "step": 292630 }, { "epoch": 1.1138600671421939, "grad_norm": 0.14915607869625092, "learning_rate": 0.00013121822170828452, "loss": 1.9756, "step": 292640 }, { "epoch": 1.1138981296103165, "grad_norm": 0.15841448307037354, "learning_rate": 0.0001311617337278212, "loss": 1.9795, "step": 292650 }, { "epoch": 1.1139361920784392, "grad_norm": 0.15663465857505798, "learning_rate": 0.00013110525439722875, "loss": 1.9907, "step": 292660 }, { "epoch": 1.1139742545465618, "grad_norm": 0.14949707686901093, "learning_rate": 0.0001310487837125347, "loss": 1.9947, "step": 292670 }, { "epoch": 1.1140123170146845, "grad_norm": 0.16750098764896393, "learning_rate": 0.00013099232166976975, "loss": 2.0036, "step": 292680 }, { "epoch": 1.1140503794828072, "grad_norm": 0.23145738244056702, "learning_rate": 0.00013093586826496762, "loss": 1.9838, "step": 292690 }, { "epoch": 1.1140884419509298, "grad_norm": 0.1893061250448227, "learning_rate": 0.00013087942349416498, "loss": 1.9821, "step": 292700 }, { "epoch": 1.1141265044190525, "grad_norm": 0.16750651597976685, "learning_rate": 0.0001308229873534016, "loss": 1.9836, "step": 292710 }, { "epoch": 1.1141645668871751, "grad_norm": 0.14061546325683594, "learning_rate": 0.00013076655983872026, "loss": 1.9814, "step": 292720 }, { "epoch": 1.114202629355298, "grad_norm": 0.15702416002750397, "learning_rate": 0.0001307101409461668, "loss": 1.9929, "step": 292730 }, { "epoch": 1.1142406918234207, "grad_norm": 0.1511785238981247, "learning_rate": 0.00013065373067178997, "loss": 1.975, "step": 292740 }, { "epoch": 1.1142787542915433, "grad_norm": 0.17471915483474731, "learning_rate": 0.00013059732901164167, "loss": 1.9757, "step": 292750 }, { "epoch": 1.114316816759666, "grad_norm": 0.17572665214538574, "learning_rate": 0.00013054093596177668, "loss": 1.9844, "step": 292760 }, { "epoch": 1.1143548792277886, "grad_norm": 0.17547960579395294, "learning_rate": 0.00013048455151825293, "loss": 1.9892, "step": 292770 }, { "epoch": 1.1143929416959113, "grad_norm": 0.20557276904582977, "learning_rate": 0.00013042817567713127, "loss": 2.0015, "step": 292780 }, { "epoch": 1.114431004164034, "grad_norm": 0.16719205677509308, "learning_rate": 0.00013037180843447562, "loss": 1.9891, "step": 292790 }, { "epoch": 1.1144690666321566, "grad_norm": 0.17433960735797882, "learning_rate": 0.0001303154497863528, "loss": 1.9981, "step": 292800 }, { "epoch": 1.1145071291002795, "grad_norm": 0.15616962313652039, "learning_rate": 0.00013025909972883264, "loss": 2.0043, "step": 292810 }, { "epoch": 1.1145451915684021, "grad_norm": 0.23012442886829376, "learning_rate": 0.00013020275825798805, "loss": 1.9873, "step": 292820 }, { "epoch": 1.1145832540365248, "grad_norm": 0.17314907908439636, "learning_rate": 0.00013014642536989497, "loss": 1.9949, "step": 292830 }, { "epoch": 1.1146213165046475, "grad_norm": 0.14603310823440552, "learning_rate": 0.0001300901010606322, "loss": 1.9841, "step": 292840 }, { "epoch": 1.1146593789727701, "grad_norm": 0.16606785356998444, "learning_rate": 0.00013003378532628146, "loss": 1.9914, "step": 292850 }, { "epoch": 1.1146974414408928, "grad_norm": 0.14196555316448212, "learning_rate": 0.00012997747816292772, "loss": 1.9911, "step": 292860 }, { "epoch": 1.1147355039090154, "grad_norm": 0.16460765898227692, "learning_rate": 0.00012992117956665865, "loss": 1.9963, "step": 292870 }, { "epoch": 1.114773566377138, "grad_norm": 0.1715785413980484, "learning_rate": 0.00012986488953356507, "loss": 1.9846, "step": 292880 }, { "epoch": 1.1148116288452607, "grad_norm": 0.1690293848514557, "learning_rate": 0.00012980860805974072, "loss": 1.9839, "step": 292890 }, { "epoch": 1.1148496913133836, "grad_norm": 0.1981569081544876, "learning_rate": 0.0001297523351412823, "loss": 1.9965, "step": 292900 }, { "epoch": 1.1148877537815063, "grad_norm": 0.14672847092151642, "learning_rate": 0.00012969607077428953, "loss": 1.9905, "step": 292910 }, { "epoch": 1.114925816249629, "grad_norm": 0.20228272676467896, "learning_rate": 0.00012963981495486498, "loss": 2.002, "step": 292920 }, { "epoch": 1.1149638787177516, "grad_norm": 0.2107199728488922, "learning_rate": 0.00012958356767911428, "loss": 1.9812, "step": 292930 }, { "epoch": 1.1150019411858743, "grad_norm": 0.15910865366458893, "learning_rate": 0.00012952732894314588, "loss": 1.9941, "step": 292940 }, { "epoch": 1.115040003653997, "grad_norm": 0.18070292472839355, "learning_rate": 0.0001294710987430715, "loss": 1.9769, "step": 292950 }, { "epoch": 1.1150780661221196, "grad_norm": 0.17187024652957916, "learning_rate": 0.00012941487707500543, "loss": 1.9875, "step": 292960 }, { "epoch": 1.1151161285902422, "grad_norm": 0.1531585156917572, "learning_rate": 0.00012935866393506512, "loss": 1.9652, "step": 292970 }, { "epoch": 1.1151541910583649, "grad_norm": 0.17606285214424133, "learning_rate": 0.0001293024593193709, "loss": 1.9983, "step": 292980 }, { "epoch": 1.1151922535264878, "grad_norm": 0.15632694959640503, "learning_rate": 0.0001292462632240461, "loss": 1.9985, "step": 292990 }, { "epoch": 1.1152303159946104, "grad_norm": 0.1512540876865387, "learning_rate": 0.00012919007564521684, "loss": 2.0103, "step": 293000 }, { "epoch": 1.115268378462733, "grad_norm": 0.15779423713684082, "learning_rate": 0.00012913389657901242, "loss": 1.9961, "step": 293010 }, { "epoch": 1.1153064409308557, "grad_norm": 0.14625298976898193, "learning_rate": 0.00012907772602156482, "loss": 1.9751, "step": 293020 }, { "epoch": 1.1153445033989784, "grad_norm": 0.19319714605808258, "learning_rate": 0.0001290215639690091, "loss": 2.0026, "step": 293030 }, { "epoch": 1.115382565867101, "grad_norm": 0.16166873276233673, "learning_rate": 0.0001289654104174832, "loss": 1.9846, "step": 293040 }, { "epoch": 1.1154206283352237, "grad_norm": 0.1930776983499527, "learning_rate": 0.00012890926536312803, "loss": 1.9897, "step": 293050 }, { "epoch": 1.1154586908033464, "grad_norm": 0.1487596333026886, "learning_rate": 0.00012885312880208728, "loss": 1.9907, "step": 293060 }, { "epoch": 1.115496753271469, "grad_norm": 0.22580161690711975, "learning_rate": 0.00012879700073050777, "loss": 1.9888, "step": 293070 }, { "epoch": 1.115534815739592, "grad_norm": 0.18477199971675873, "learning_rate": 0.00012874088114453897, "loss": 1.9819, "step": 293080 }, { "epoch": 1.1155728782077146, "grad_norm": 0.2038331925868988, "learning_rate": 0.00012868477004033353, "loss": 1.9914, "step": 293090 }, { "epoch": 1.1156109406758372, "grad_norm": 0.21437828242778778, "learning_rate": 0.00012862866741404684, "loss": 2.0052, "step": 293100 }, { "epoch": 1.1156490031439599, "grad_norm": 0.16125749051570892, "learning_rate": 0.0001285725732618372, "loss": 1.995, "step": 293110 }, { "epoch": 1.1156870656120825, "grad_norm": 0.18511207401752472, "learning_rate": 0.0001285164875798658, "loss": 1.9737, "step": 293120 }, { "epoch": 1.1157251280802052, "grad_norm": 0.20369894802570343, "learning_rate": 0.0001284604103642969, "loss": 1.9757, "step": 293130 }, { "epoch": 1.1157631905483278, "grad_norm": 0.1853613555431366, "learning_rate": 0.00012840434161129745, "loss": 1.9815, "step": 293140 }, { "epoch": 1.1158012530164505, "grad_norm": 0.16906699538230896, "learning_rate": 0.00012834828131703734, "loss": 1.9748, "step": 293150 }, { "epoch": 1.1158393154845734, "grad_norm": 0.1837167739868164, "learning_rate": 0.0001282922294776895, "loss": 1.9982, "step": 293160 }, { "epoch": 1.115877377952696, "grad_norm": 0.16122731566429138, "learning_rate": 0.00012823618608942945, "loss": 1.9906, "step": 293170 }, { "epoch": 1.1159154404208187, "grad_norm": 0.16960924863815308, "learning_rate": 0.00012818015114843584, "loss": 1.9878, "step": 293180 }, { "epoch": 1.1159535028889414, "grad_norm": 0.18560199439525604, "learning_rate": 0.0001281241246508902, "loss": 2.0013, "step": 293190 }, { "epoch": 1.115991565357064, "grad_norm": 0.20879821479320526, "learning_rate": 0.0001280681065929767, "loss": 1.9916, "step": 293200 }, { "epoch": 1.1160296278251867, "grad_norm": 0.1578042060136795, "learning_rate": 0.00012801209697088268, "loss": 1.9951, "step": 293210 }, { "epoch": 1.1160676902933093, "grad_norm": 0.1498941034078598, "learning_rate": 0.00012795609578079815, "loss": 1.9811, "step": 293220 }, { "epoch": 1.116105752761432, "grad_norm": 0.156406968832016, "learning_rate": 0.00012790010301891603, "loss": 1.9818, "step": 293230 }, { "epoch": 1.1161438152295546, "grad_norm": 0.1441483199596405, "learning_rate": 0.0001278441186814321, "loss": 1.9711, "step": 293240 }, { "epoch": 1.1161818776976775, "grad_norm": 0.15797194838523865, "learning_rate": 0.0001277881427645451, "loss": 1.992, "step": 293250 }, { "epoch": 1.1162199401658002, "grad_norm": 0.17018261551856995, "learning_rate": 0.00012773217526445652, "loss": 1.9726, "step": 293260 }, { "epoch": 1.1162580026339228, "grad_norm": 0.1910189986228943, "learning_rate": 0.0001276762161773707, "loss": 1.9828, "step": 293270 }, { "epoch": 1.1162960651020455, "grad_norm": 0.15563057363033295, "learning_rate": 0.0001276202654994949, "loss": 1.9681, "step": 293280 }, { "epoch": 1.1163341275701681, "grad_norm": 0.19308650493621826, "learning_rate": 0.0001275643232270392, "loss": 1.9683, "step": 293290 }, { "epoch": 1.1163721900382908, "grad_norm": 0.15804284811019897, "learning_rate": 0.0001275083893562165, "loss": 2.002, "step": 293300 }, { "epoch": 1.1164102525064135, "grad_norm": 0.1859627366065979, "learning_rate": 0.00012745246388324262, "loss": 1.9815, "step": 293310 }, { "epoch": 1.1164483149745361, "grad_norm": 0.20299947261810303, "learning_rate": 0.00012739654680433605, "loss": 1.9811, "step": 293320 }, { "epoch": 1.116486377442659, "grad_norm": 0.17057934403419495, "learning_rate": 0.00012734063811571834, "loss": 1.9893, "step": 293330 }, { "epoch": 1.1165244399107817, "grad_norm": 0.1744338870048523, "learning_rate": 0.00012728473781361372, "loss": 1.9985, "step": 293340 }, { "epoch": 1.1165625023789043, "grad_norm": 0.2139360010623932, "learning_rate": 0.00012722884589424932, "loss": 1.9871, "step": 293350 }, { "epoch": 1.116600564847027, "grad_norm": 0.20919451117515564, "learning_rate": 0.000127172962353855, "loss": 1.9833, "step": 293360 }, { "epoch": 1.1166386273151496, "grad_norm": 0.16842421889305115, "learning_rate": 0.00012711708718866365, "loss": 1.9971, "step": 293370 }, { "epoch": 1.1166766897832723, "grad_norm": 0.19921022653579712, "learning_rate": 0.00012706122039491075, "loss": 1.9929, "step": 293380 }, { "epoch": 1.116714752251395, "grad_norm": 0.1602468341588974, "learning_rate": 0.00012700536196883473, "loss": 1.9998, "step": 293390 }, { "epoch": 1.1167528147195176, "grad_norm": 0.16518428921699524, "learning_rate": 0.0001269495119066768, "loss": 1.9892, "step": 293400 }, { "epoch": 1.1167908771876403, "grad_norm": 0.15834908187389374, "learning_rate": 0.000126893670204681, "loss": 1.9885, "step": 293410 }, { "epoch": 1.1168289396557631, "grad_norm": 0.1923481523990631, "learning_rate": 0.00012683783685909418, "loss": 1.9807, "step": 293420 }, { "epoch": 1.1168670021238858, "grad_norm": 0.18503503501415253, "learning_rate": 0.00012678201186616602, "loss": 1.9882, "step": 293430 }, { "epoch": 1.1169050645920084, "grad_norm": 0.1797768622636795, "learning_rate": 0.00012672619522214885, "loss": 1.9876, "step": 293440 }, { "epoch": 1.116943127060131, "grad_norm": 0.19215306639671326, "learning_rate": 0.000126670386923298, "loss": 1.9717, "step": 293450 }, { "epoch": 1.1169811895282538, "grad_norm": 0.16550879180431366, "learning_rate": 0.00012661458696587157, "loss": 1.9926, "step": 293460 }, { "epoch": 1.1170192519963764, "grad_norm": 0.18938034772872925, "learning_rate": 0.0001265587953461304, "loss": 2.0031, "step": 293470 }, { "epoch": 1.117057314464499, "grad_norm": 0.1606336086988449, "learning_rate": 0.00012650301206033803, "loss": 1.9919, "step": 293480 }, { "epoch": 1.1170953769326217, "grad_norm": 0.18568719923496246, "learning_rate": 0.00012644723710476097, "loss": 1.9744, "step": 293490 }, { "epoch": 1.1171334394007446, "grad_norm": 0.1861104965209961, "learning_rate": 0.0001263914704756684, "loss": 1.9889, "step": 293500 }, { "epoch": 1.1171715018688673, "grad_norm": 0.18612819910049438, "learning_rate": 0.00012633571216933242, "loss": 2.0001, "step": 293510 }, { "epoch": 1.11720956433699, "grad_norm": 0.1559780389070511, "learning_rate": 0.00012627996218202769, "loss": 1.9946, "step": 293520 }, { "epoch": 1.1172476268051126, "grad_norm": 0.1587904393672943, "learning_rate": 0.0001262242205100318, "loss": 1.9833, "step": 293530 }, { "epoch": 1.1172856892732352, "grad_norm": 0.20348505675792694, "learning_rate": 0.0001261684871496251, "loss": 1.9875, "step": 293540 }, { "epoch": 1.117323751741358, "grad_norm": 0.15221315622329712, "learning_rate": 0.0001261127620970907, "loss": 1.9776, "step": 293550 }, { "epoch": 1.1173618142094806, "grad_norm": 0.17232543230056763, "learning_rate": 0.00012605704534871453, "loss": 1.9885, "step": 293560 }, { "epoch": 1.1173998766776032, "grad_norm": 0.18388041853904724, "learning_rate": 0.0001260013369007852, "loss": 1.995, "step": 293570 }, { "epoch": 1.1174379391457259, "grad_norm": 0.1528346687555313, "learning_rate": 0.00012594563674959403, "loss": 1.9756, "step": 293580 }, { "epoch": 1.1174760016138487, "grad_norm": 0.16484560072422028, "learning_rate": 0.0001258899448914353, "loss": 1.9857, "step": 293590 }, { "epoch": 1.1175140640819714, "grad_norm": 0.14479511976242065, "learning_rate": 0.00012583426132260585, "loss": 1.9906, "step": 293600 }, { "epoch": 1.117552126550094, "grad_norm": 0.16426406800746918, "learning_rate": 0.00012577858603940545, "loss": 1.9831, "step": 293610 }, { "epoch": 1.1175901890182167, "grad_norm": 0.1525418609380722, "learning_rate": 0.0001257229190381365, "loss": 1.9849, "step": 293620 }, { "epoch": 1.1176282514863394, "grad_norm": 0.14647795259952545, "learning_rate": 0.00012566726031510418, "loss": 1.9824, "step": 293630 }, { "epoch": 1.117666313954462, "grad_norm": 0.16383711993694305, "learning_rate": 0.00012561160986661636, "loss": 1.9901, "step": 293640 }, { "epoch": 1.1177043764225847, "grad_norm": 0.1702844202518463, "learning_rate": 0.0001255559676889838, "loss": 1.9863, "step": 293650 }, { "epoch": 1.1177424388907073, "grad_norm": 0.22296492755413055, "learning_rate": 0.0001255003337785199, "loss": 1.9959, "step": 293660 }, { "epoch": 1.1177805013588302, "grad_norm": 0.23798371851444244, "learning_rate": 0.0001254447081315408, "loss": 1.9828, "step": 293670 }, { "epoch": 1.1178185638269529, "grad_norm": 0.18219825625419617, "learning_rate": 0.00012538909074436534, "loss": 1.9906, "step": 293680 }, { "epoch": 1.1178566262950755, "grad_norm": 0.2241986244916916, "learning_rate": 0.00012533348161331522, "loss": 1.9942, "step": 293690 }, { "epoch": 1.1178946887631982, "grad_norm": 0.15364870429039001, "learning_rate": 0.00012527788073471473, "loss": 1.9898, "step": 293700 }, { "epoch": 1.1179327512313209, "grad_norm": 0.20049087703227997, "learning_rate": 0.000125222288104891, "loss": 1.9918, "step": 293710 }, { "epoch": 1.1179708136994435, "grad_norm": 0.1605052649974823, "learning_rate": 0.00012516670372017375, "loss": 1.9906, "step": 293720 }, { "epoch": 1.1180088761675662, "grad_norm": 0.16484107077121735, "learning_rate": 0.00012511112757689557, "loss": 1.982, "step": 293730 }, { "epoch": 1.1180469386356888, "grad_norm": 0.16114190220832825, "learning_rate": 0.00012505555967139164, "loss": 1.9787, "step": 293740 }, { "epoch": 1.1180850011038115, "grad_norm": 0.17148414254188538, "learning_rate": 0.000125, "loss": 1.987, "step": 293750 }, { "epoch": 1.1181230635719344, "grad_norm": 0.17402517795562744, "learning_rate": 0.00012494444855906122, "loss": 1.9793, "step": 293760 }, { "epoch": 1.118161126040057, "grad_norm": 0.1686442792415619, "learning_rate": 0.00012488890534491874, "loss": 1.9868, "step": 293770 }, { "epoch": 1.1181991885081797, "grad_norm": 0.17639507353305817, "learning_rate": 0.00012483337035391862, "loss": 1.973, "step": 293780 }, { "epoch": 1.1182372509763023, "grad_norm": 0.25353384017944336, "learning_rate": 0.00012477784358240962, "loss": 1.9817, "step": 293790 }, { "epoch": 1.118275313444425, "grad_norm": 0.14957685768604279, "learning_rate": 0.00012472232502674325, "loss": 1.9882, "step": 293800 }, { "epoch": 1.1183133759125476, "grad_norm": 0.1678726077079773, "learning_rate": 0.00012466681468327373, "loss": 1.9836, "step": 293810 }, { "epoch": 1.1183514383806703, "grad_norm": 0.17077717185020447, "learning_rate": 0.00012461131254835789, "loss": 1.9959, "step": 293820 }, { "epoch": 1.118389500848793, "grad_norm": 0.2349521964788437, "learning_rate": 0.0001245558186183553, "loss": 1.9822, "step": 293830 }, { "epoch": 1.1184275633169156, "grad_norm": 0.18050359189510345, "learning_rate": 0.00012450033288962826, "loss": 1.9847, "step": 293840 }, { "epoch": 1.1184656257850385, "grad_norm": 0.15957248210906982, "learning_rate": 0.00012444485535854168, "loss": 1.9902, "step": 293850 }, { "epoch": 1.1185036882531612, "grad_norm": 0.16633108258247375, "learning_rate": 0.00012438938602146328, "loss": 1.9826, "step": 293860 }, { "epoch": 1.1185417507212838, "grad_norm": 0.1867218166589737, "learning_rate": 0.0001243339248747633, "loss": 1.9942, "step": 293870 }, { "epoch": 1.1185798131894065, "grad_norm": 0.1948414444923401, "learning_rate": 0.00012427847191481473, "loss": 1.9838, "step": 293880 }, { "epoch": 1.1186178756575291, "grad_norm": 0.16936980187892914, "learning_rate": 0.00012422302713799326, "loss": 2.009, "step": 293890 }, { "epoch": 1.1186559381256518, "grad_norm": 0.1969858705997467, "learning_rate": 0.00012416759054067735, "loss": 1.9868, "step": 293900 }, { "epoch": 1.1186940005937744, "grad_norm": 0.17792664468288422, "learning_rate": 0.00012411216211924775, "loss": 1.9882, "step": 293910 }, { "epoch": 1.118732063061897, "grad_norm": 0.17175708711147308, "learning_rate": 0.0001240567418700884, "loss": 1.9842, "step": 293920 }, { "epoch": 1.1187701255300198, "grad_norm": 0.1614762544631958, "learning_rate": 0.00012400132978958555, "loss": 1.9829, "step": 293930 }, { "epoch": 1.1188081879981426, "grad_norm": 0.17596665024757385, "learning_rate": 0.00012394592587412824, "loss": 1.9943, "step": 293940 }, { "epoch": 1.1188462504662653, "grad_norm": 0.18127337098121643, "learning_rate": 0.00012389053012010808, "loss": 2.001, "step": 293950 }, { "epoch": 1.118884312934388, "grad_norm": 0.1930181235074997, "learning_rate": 0.0001238351425239195, "loss": 1.9824, "step": 293960 }, { "epoch": 1.1189223754025106, "grad_norm": 0.15906044840812683, "learning_rate": 0.00012377976308195932, "loss": 1.9944, "step": 293970 }, { "epoch": 1.1189604378706333, "grad_norm": 0.1689421683549881, "learning_rate": 0.00012372439179062734, "loss": 1.9806, "step": 293980 }, { "epoch": 1.118998500338756, "grad_norm": 0.17643670737743378, "learning_rate": 0.00012366902864632573, "loss": 1.9793, "step": 293990 }, { "epoch": 1.1190365628068786, "grad_norm": 0.19098548591136932, "learning_rate": 0.0001236136736454595, "loss": 1.9854, "step": 294000 }, { "epoch": 1.1190746252750012, "grad_norm": 0.16189756989479065, "learning_rate": 0.0001235583267844362, "loss": 1.9983, "step": 294010 }, { "epoch": 1.1191126877431241, "grad_norm": 0.17617790400981903, "learning_rate": 0.00012350298805966604, "loss": 1.9805, "step": 294020 }, { "epoch": 1.1191507502112468, "grad_norm": 0.16275504231452942, "learning_rate": 0.0001234476574675618, "loss": 1.978, "step": 294030 }, { "epoch": 1.1191888126793694, "grad_norm": 0.17957115173339844, "learning_rate": 0.00012339233500453906, "loss": 1.996, "step": 294040 }, { "epoch": 1.119226875147492, "grad_norm": 0.1702452450990677, "learning_rate": 0.0001233370206670159, "loss": 1.9915, "step": 294050 }, { "epoch": 1.1192649376156147, "grad_norm": 0.17218199372291565, "learning_rate": 0.0001232817144514131, "loss": 1.9914, "step": 294060 }, { "epoch": 1.1193030000837374, "grad_norm": 0.15313132107257843, "learning_rate": 0.000123226416354154, "loss": 1.9918, "step": 294070 }, { "epoch": 1.11934106255186, "grad_norm": 0.15285362303256989, "learning_rate": 0.00012317112637166456, "loss": 1.9765, "step": 294080 }, { "epoch": 1.1193791250199827, "grad_norm": 0.22781339287757874, "learning_rate": 0.00012311584450037344, "loss": 1.9786, "step": 294090 }, { "epoch": 1.1194171874881054, "grad_norm": 0.18625149130821228, "learning_rate": 0.00012306057073671196, "loss": 1.9889, "step": 294100 }, { "epoch": 1.1194552499562282, "grad_norm": 0.1693805605173111, "learning_rate": 0.00012300530507711383, "loss": 1.9917, "step": 294110 }, { "epoch": 1.119493312424351, "grad_norm": 0.1493048220872879, "learning_rate": 0.00012295004751801565, "loss": 1.9787, "step": 294120 }, { "epoch": 1.1195313748924736, "grad_norm": 0.23088662326335907, "learning_rate": 0.00012289479805585642, "loss": 1.9755, "step": 294130 }, { "epoch": 1.1195694373605962, "grad_norm": 0.17690658569335938, "learning_rate": 0.0001228395566870778, "loss": 1.9939, "step": 294140 }, { "epoch": 1.1196074998287189, "grad_norm": 0.16235333681106567, "learning_rate": 0.00012278432340812412, "loss": 1.9822, "step": 294150 }, { "epoch": 1.1196455622968415, "grad_norm": 0.17188286781311035, "learning_rate": 0.00012272909821544237, "loss": 1.9945, "step": 294160 }, { "epoch": 1.1196836247649642, "grad_norm": 0.21481271088123322, "learning_rate": 0.00012267388110548188, "loss": 1.9845, "step": 294170 }, { "epoch": 1.1197216872330868, "grad_norm": 0.15324686467647552, "learning_rate": 0.0001226186720746949, "loss": 1.979, "step": 294180 }, { "epoch": 1.1197597497012097, "grad_norm": 0.17416979372501373, "learning_rate": 0.00012256347111953603, "loss": 1.983, "step": 294190 }, { "epoch": 1.1197978121693324, "grad_norm": 0.16954316198825836, "learning_rate": 0.00012250827823646256, "loss": 1.9834, "step": 294200 }, { "epoch": 1.119835874637455, "grad_norm": 0.1516610085964203, "learning_rate": 0.0001224530934219343, "loss": 1.9998, "step": 294210 }, { "epoch": 1.1198739371055777, "grad_norm": 0.1607905924320221, "learning_rate": 0.00012239791667241384, "loss": 1.9864, "step": 294220 }, { "epoch": 1.1199119995737004, "grad_norm": 0.1633176952600479, "learning_rate": 0.00012234274798436612, "loss": 1.9747, "step": 294230 }, { "epoch": 1.119950062041823, "grad_norm": 0.16344870626926422, "learning_rate": 0.00012228758735425882, "loss": 1.9829, "step": 294240 }, { "epoch": 1.1199881245099457, "grad_norm": 0.14256510138511658, "learning_rate": 0.00012223243477856212, "loss": 1.9633, "step": 294250 }, { "epoch": 1.1200261869780683, "grad_norm": 0.2190176248550415, "learning_rate": 0.0001221772902537488, "loss": 1.9703, "step": 294260 }, { "epoch": 1.120064249446191, "grad_norm": 0.1784692406654358, "learning_rate": 0.00012212215377629414, "loss": 1.9881, "step": 294270 }, { "epoch": 1.1201023119143139, "grad_norm": 0.19331470131874084, "learning_rate": 0.00012206702534267622, "loss": 1.9846, "step": 294280 }, { "epoch": 1.1201403743824365, "grad_norm": 0.19215671718120575, "learning_rate": 0.00012201190494937541, "loss": 1.9854, "step": 294290 }, { "epoch": 1.1201784368505592, "grad_norm": 0.17129290103912354, "learning_rate": 0.00012195679259287485, "loss": 1.9964, "step": 294300 }, { "epoch": 1.1202164993186818, "grad_norm": 0.247706338763237, "learning_rate": 0.00012190168826966003, "loss": 1.9791, "step": 294310 }, { "epoch": 1.1202545617868045, "grad_norm": 0.23695670068264008, "learning_rate": 0.00012184659197621928, "loss": 1.9807, "step": 294320 }, { "epoch": 1.1202926242549271, "grad_norm": 0.202765554189682, "learning_rate": 0.00012179150370904319, "loss": 1.9836, "step": 294330 }, { "epoch": 1.1203306867230498, "grad_norm": 0.15483033657073975, "learning_rate": 0.00012173642346462521, "loss": 1.9949, "step": 294340 }, { "epoch": 1.1203687491911725, "grad_norm": 0.1478613317012787, "learning_rate": 0.00012168135123946112, "loss": 1.9832, "step": 294350 }, { "epoch": 1.1204068116592953, "grad_norm": 0.19954794645309448, "learning_rate": 0.00012162628703004924, "loss": 1.9974, "step": 294360 }, { "epoch": 1.120444874127418, "grad_norm": 0.20727083086967468, "learning_rate": 0.00012157123083289068, "loss": 1.9894, "step": 294370 }, { "epoch": 1.1204829365955407, "grad_norm": 0.22470323741436005, "learning_rate": 0.00012151618264448877, "loss": 1.9849, "step": 294380 }, { "epoch": 1.1205209990636633, "grad_norm": 0.17775776982307434, "learning_rate": 0.0001214611424613496, "loss": 1.9869, "step": 294390 }, { "epoch": 1.120559061531786, "grad_norm": 0.19694171845912933, "learning_rate": 0.00012140611027998177, "loss": 1.9774, "step": 294400 }, { "epoch": 1.1205971239999086, "grad_norm": 0.159882590174675, "learning_rate": 0.00012135108609689632, "loss": 1.9881, "step": 294410 }, { "epoch": 1.1206351864680313, "grad_norm": 0.17960825562477112, "learning_rate": 0.00012129606990860703, "loss": 1.9956, "step": 294420 }, { "epoch": 1.120673248936154, "grad_norm": 0.1673920601606369, "learning_rate": 0.0001212410617116299, "loss": 1.9874, "step": 294430 }, { "epoch": 1.1207113114042766, "grad_norm": 0.1664450317621231, "learning_rate": 0.00012118606150248378, "loss": 1.9838, "step": 294440 }, { "epoch": 1.1207493738723995, "grad_norm": 0.18804626166820526, "learning_rate": 0.0001211310692776898, "loss": 1.9796, "step": 294450 }, { "epoch": 1.1207874363405221, "grad_norm": 0.16094692051410675, "learning_rate": 0.00012107608503377182, "loss": 1.9809, "step": 294460 }, { "epoch": 1.1208254988086448, "grad_norm": 0.2077358514070511, "learning_rate": 0.00012102110876725603, "loss": 1.9958, "step": 294470 }, { "epoch": 1.1208635612767675, "grad_norm": 0.1666060984134674, "learning_rate": 0.00012096614047467125, "loss": 1.9916, "step": 294480 }, { "epoch": 1.12090162374489, "grad_norm": 0.16506783664226532, "learning_rate": 0.00012091118015254876, "loss": 1.9917, "step": 294490 }, { "epoch": 1.1209396862130128, "grad_norm": 0.156228706240654, "learning_rate": 0.0001208562277974225, "loss": 1.9862, "step": 294500 }, { "epoch": 1.1209777486811354, "grad_norm": 0.2048158347606659, "learning_rate": 0.00012080128340582863, "loss": 1.9775, "step": 294510 }, { "epoch": 1.121015811149258, "grad_norm": 0.15326504409313202, "learning_rate": 0.00012074634697430619, "loss": 1.9846, "step": 294520 }, { "epoch": 1.121053873617381, "grad_norm": 0.15472504496574402, "learning_rate": 0.00012069141849939646, "loss": 1.9914, "step": 294530 }, { "epoch": 1.1210919360855036, "grad_norm": 0.15494997799396515, "learning_rate": 0.00012063649797764325, "loss": 1.9635, "step": 294540 }, { "epoch": 1.1211299985536263, "grad_norm": 0.1556081771850586, "learning_rate": 0.000120581585405593, "loss": 2.0011, "step": 294550 }, { "epoch": 1.121168061021749, "grad_norm": 0.15699365735054016, "learning_rate": 0.00012052668077979451, "loss": 1.9982, "step": 294560 }, { "epoch": 1.1212061234898716, "grad_norm": 0.21263249218463898, "learning_rate": 0.00012047178409679909, "loss": 1.9909, "step": 294570 }, { "epoch": 1.1212441859579942, "grad_norm": 0.22214330732822418, "learning_rate": 0.00012041689535316069, "loss": 1.984, "step": 294580 }, { "epoch": 1.121282248426117, "grad_norm": 0.19332750141620636, "learning_rate": 0.00012036201454543567, "loss": 1.9856, "step": 294590 }, { "epoch": 1.1213203108942396, "grad_norm": 0.16281723976135254, "learning_rate": 0.00012030714167018275, "loss": 1.983, "step": 294600 }, { "epoch": 1.1213583733623622, "grad_norm": 0.1823878437280655, "learning_rate": 0.00012025227672396332, "loss": 1.9874, "step": 294610 }, { "epoch": 1.121396435830485, "grad_norm": 0.150102898478508, "learning_rate": 0.00012019741970334113, "loss": 1.9854, "step": 294620 }, { "epoch": 1.1214344982986078, "grad_norm": 0.15728197991847992, "learning_rate": 0.00012014257060488248, "loss": 1.9612, "step": 294630 }, { "epoch": 1.1214725607667304, "grad_norm": 0.19842971861362457, "learning_rate": 0.00012008772942515617, "loss": 1.9752, "step": 294640 }, { "epoch": 1.121510623234853, "grad_norm": 0.16742175817489624, "learning_rate": 0.00012003289616073342, "loss": 1.9868, "step": 294650 }, { "epoch": 1.1215486857029757, "grad_norm": 0.17433230578899384, "learning_rate": 0.0001199780708081879, "loss": 1.9827, "step": 294660 }, { "epoch": 1.1215867481710984, "grad_norm": 0.16636930406093597, "learning_rate": 0.00011992325336409576, "loss": 1.9903, "step": 294670 }, { "epoch": 1.121624810639221, "grad_norm": 0.17104153335094452, "learning_rate": 0.00011986844382503575, "loss": 1.9881, "step": 294680 }, { "epoch": 1.1216628731073437, "grad_norm": 0.18843293190002441, "learning_rate": 0.00011981364218758889, "loss": 1.9935, "step": 294690 }, { "epoch": 1.1217009355754666, "grad_norm": 0.18074916303157806, "learning_rate": 0.00011975884844833884, "loss": 1.9796, "step": 294700 }, { "epoch": 1.1217389980435892, "grad_norm": 0.16955699026584625, "learning_rate": 0.00011970406260387162, "loss": 1.9886, "step": 294710 }, { "epoch": 1.1217770605117119, "grad_norm": 0.15711715817451477, "learning_rate": 0.00011964928465077568, "loss": 1.9702, "step": 294720 }, { "epoch": 1.1218151229798345, "grad_norm": 0.20470353960990906, "learning_rate": 0.00011959451458564202, "loss": 1.984, "step": 294730 }, { "epoch": 1.1218531854479572, "grad_norm": 0.19052280485630035, "learning_rate": 0.00011953975240506404, "loss": 1.9768, "step": 294740 }, { "epoch": 1.1218912479160799, "grad_norm": 0.2238382250070572, "learning_rate": 0.00011948499810563751, "loss": 1.9749, "step": 294750 }, { "epoch": 1.1219293103842025, "grad_norm": 0.20275108516216278, "learning_rate": 0.00011943025168396093, "loss": 1.9908, "step": 294760 }, { "epoch": 1.1219673728523252, "grad_norm": 0.1481522172689438, "learning_rate": 0.00011937551313663486, "loss": 1.9866, "step": 294770 }, { "epoch": 1.1220054353204478, "grad_norm": 0.1640080064535141, "learning_rate": 0.00011932078246026268, "loss": 1.978, "step": 294780 }, { "epoch": 1.1220434977885705, "grad_norm": 0.15450188517570496, "learning_rate": 0.00011926605965144988, "loss": 1.9854, "step": 294790 }, { "epoch": 1.1220815602566934, "grad_norm": 0.1643836796283722, "learning_rate": 0.00011921134470680461, "loss": 1.9925, "step": 294800 }, { "epoch": 1.122119622724816, "grad_norm": 0.1798993945121765, "learning_rate": 0.00011915663762293733, "loss": 1.9843, "step": 294810 }, { "epoch": 1.1221576851929387, "grad_norm": 0.19793778657913208, "learning_rate": 0.00011910193839646105, "loss": 1.9928, "step": 294820 }, { "epoch": 1.1221957476610613, "grad_norm": 0.16370028257369995, "learning_rate": 0.00011904724702399117, "loss": 1.9861, "step": 294830 }, { "epoch": 1.122233810129184, "grad_norm": 0.14759381115436554, "learning_rate": 0.00011899256350214543, "loss": 1.9916, "step": 294840 }, { "epoch": 1.1222718725973067, "grad_norm": 0.17767883837223053, "learning_rate": 0.00011893788782754417, "loss": 1.9853, "step": 294850 }, { "epoch": 1.1223099350654293, "grad_norm": 0.14556005597114563, "learning_rate": 0.00011888321999680989, "loss": 1.9898, "step": 294860 }, { "epoch": 1.122347997533552, "grad_norm": 0.15929433703422546, "learning_rate": 0.00011882856000656783, "loss": 1.9894, "step": 294870 }, { "epoch": 1.1223860600016748, "grad_norm": 0.1680683046579361, "learning_rate": 0.00011877390785344538, "loss": 1.9915, "step": 294880 }, { "epoch": 1.1224241224697975, "grad_norm": 0.1863175928592682, "learning_rate": 0.00011871926353407259, "loss": 1.9944, "step": 294890 }, { "epoch": 1.1224621849379202, "grad_norm": 0.18595843017101288, "learning_rate": 0.00011866462704508168, "loss": 1.9977, "step": 294900 }, { "epoch": 1.1225002474060428, "grad_norm": 0.1734783798456192, "learning_rate": 0.00011860999838310743, "loss": 1.9636, "step": 294910 }, { "epoch": 1.1225383098741655, "grad_norm": 0.15593071281909943, "learning_rate": 0.00011855537754478701, "loss": 1.9946, "step": 294920 }, { "epoch": 1.1225763723422881, "grad_norm": 0.1951831728219986, "learning_rate": 0.00011850076452675995, "loss": 1.9852, "step": 294930 }, { "epoch": 1.1226144348104108, "grad_norm": 0.1786825805902481, "learning_rate": 0.00011844615932566827, "loss": 1.9759, "step": 294940 }, { "epoch": 1.1226524972785334, "grad_norm": 0.18678231537342072, "learning_rate": 0.00011839156193815631, "loss": 1.9857, "step": 294950 }, { "epoch": 1.122690559746656, "grad_norm": 0.18317410349845886, "learning_rate": 0.00011833697236087082, "loss": 2.0051, "step": 294960 }, { "epoch": 1.122728622214779, "grad_norm": 0.16210854053497314, "learning_rate": 0.00011828239059046109, "loss": 1.9848, "step": 294970 }, { "epoch": 1.1227666846829016, "grad_norm": 0.1885615587234497, "learning_rate": 0.00011822781662357851, "loss": 1.9832, "step": 294980 }, { "epoch": 1.1228047471510243, "grad_norm": 0.18907485902309418, "learning_rate": 0.00011817325045687716, "loss": 1.988, "step": 294990 }, { "epoch": 1.122842809619147, "grad_norm": 0.14906156063079834, "learning_rate": 0.00011811869208701332, "loss": 1.9745, "step": 295000 }, { "epoch": 1.1228808720872696, "grad_norm": 0.1860986202955246, "learning_rate": 0.00011806414151064581, "loss": 1.9751, "step": 295010 }, { "epoch": 1.1229189345553923, "grad_norm": 0.20608893036842346, "learning_rate": 0.00011800959872443567, "loss": 1.9909, "step": 295020 }, { "epoch": 1.122956997023515, "grad_norm": 0.17302121222019196, "learning_rate": 0.00011795506372504644, "loss": 1.9879, "step": 295030 }, { "epoch": 1.1229950594916376, "grad_norm": 0.1496732234954834, "learning_rate": 0.000117900536509144, "loss": 1.9784, "step": 295040 }, { "epoch": 1.1230331219597605, "grad_norm": 0.2081655114889145, "learning_rate": 0.00011784601707339665, "loss": 1.9958, "step": 295050 }, { "epoch": 1.1230711844278831, "grad_norm": 0.2021922916173935, "learning_rate": 0.00011779150541447492, "loss": 1.9986, "step": 295060 }, { "epoch": 1.1231092468960058, "grad_norm": 0.23316898941993713, "learning_rate": 0.00011773700152905198, "loss": 1.9816, "step": 295070 }, { "epoch": 1.1231473093641284, "grad_norm": 0.2011638879776001, "learning_rate": 0.00011768250541380315, "loss": 1.9981, "step": 295080 }, { "epoch": 1.123185371832251, "grad_norm": 0.1774028092622757, "learning_rate": 0.00011762801706540616, "loss": 1.9899, "step": 295090 }, { "epoch": 1.1232234343003737, "grad_norm": 0.181764155626297, "learning_rate": 0.00011757353648054114, "loss": 1.9833, "step": 295100 }, { "epoch": 1.1232614967684964, "grad_norm": 0.1900632381439209, "learning_rate": 0.0001175190636558906, "loss": 1.9869, "step": 295110 }, { "epoch": 1.123299559236619, "grad_norm": 0.15032033622264862, "learning_rate": 0.00011746459858813935, "loss": 1.9868, "step": 295120 }, { "epoch": 1.1233376217047417, "grad_norm": 0.25727778673171997, "learning_rate": 0.0001174101412739747, "loss": 1.9942, "step": 295130 }, { "epoch": 1.1233756841728646, "grad_norm": 0.17406918108463287, "learning_rate": 0.00011735569171008609, "loss": 1.9872, "step": 295140 }, { "epoch": 1.1234137466409873, "grad_norm": 0.19947175681591034, "learning_rate": 0.00011730124989316553, "loss": 1.9611, "step": 295150 }, { "epoch": 1.12345180910911, "grad_norm": 0.23555511236190796, "learning_rate": 0.00011724681581990726, "loss": 1.9822, "step": 295160 }, { "epoch": 1.1234898715772326, "grad_norm": 0.15338340401649475, "learning_rate": 0.00011719238948700788, "loss": 1.9855, "step": 295170 }, { "epoch": 1.1235279340453552, "grad_norm": 0.14458732306957245, "learning_rate": 0.0001171379708911664, "loss": 1.9884, "step": 295180 }, { "epoch": 1.1235659965134779, "grad_norm": 0.16276195645332336, "learning_rate": 0.00011708356002908416, "loss": 1.9876, "step": 295190 }, { "epoch": 1.1236040589816005, "grad_norm": 0.1536705493927002, "learning_rate": 0.00011702915689746474, "loss": 1.9978, "step": 295200 }, { "epoch": 1.1236421214497232, "grad_norm": 0.2029026299715042, "learning_rate": 0.00011697476149301423, "loss": 1.9782, "step": 295210 }, { "epoch": 1.123680183917846, "grad_norm": 0.15842191874980927, "learning_rate": 0.00011692037381244092, "loss": 1.987, "step": 295220 }, { "epoch": 1.1237182463859687, "grad_norm": 0.17644508183002472, "learning_rate": 0.00011686599385245549, "loss": 1.9821, "step": 295230 }, { "epoch": 1.1237563088540914, "grad_norm": 0.17277035117149353, "learning_rate": 0.00011681162160977098, "loss": 1.9751, "step": 295240 }, { "epoch": 1.123794371322214, "grad_norm": 0.1563599556684494, "learning_rate": 0.00011675725708110268, "loss": 1.9903, "step": 295250 }, { "epoch": 1.1238324337903367, "grad_norm": 0.20365029573440552, "learning_rate": 0.00011670290026316837, "loss": 1.9731, "step": 295260 }, { "epoch": 1.1238704962584594, "grad_norm": 0.15751436352729797, "learning_rate": 0.00011664855115268791, "loss": 1.9847, "step": 295270 }, { "epoch": 1.123908558726582, "grad_norm": 0.2296774536371231, "learning_rate": 0.00011659420974638374, "loss": 1.9644, "step": 295280 }, { "epoch": 1.1239466211947047, "grad_norm": 0.1706577092409134, "learning_rate": 0.00011653987604098043, "loss": 1.9926, "step": 295290 }, { "epoch": 1.1239846836628273, "grad_norm": 0.16438205540180206, "learning_rate": 0.00011648555003320493, "loss": 1.9909, "step": 295300 }, { "epoch": 1.1240227461309502, "grad_norm": 0.15377695858478546, "learning_rate": 0.00011643123171978664, "loss": 1.9713, "step": 295310 }, { "epoch": 1.1240608085990729, "grad_norm": 0.17905206978321075, "learning_rate": 0.00011637692109745707, "loss": 1.9679, "step": 295320 }, { "epoch": 1.1240988710671955, "grad_norm": 0.17283768951892853, "learning_rate": 0.0001163226181629502, "loss": 1.988, "step": 295330 }, { "epoch": 1.1241369335353182, "grad_norm": 0.17996124923229218, "learning_rate": 0.00011626832291300216, "loss": 1.9979, "step": 295340 }, { "epoch": 1.1241749960034408, "grad_norm": 0.21404337882995605, "learning_rate": 0.00011621403534435155, "loss": 2.0035, "step": 295350 }, { "epoch": 1.1242130584715635, "grad_norm": 0.1598508507013321, "learning_rate": 0.00011615975545373914, "loss": 1.9755, "step": 295360 }, { "epoch": 1.1242511209396862, "grad_norm": 0.17142625153064728, "learning_rate": 0.00011610548323790814, "loss": 1.9922, "step": 295370 }, { "epoch": 1.1242891834078088, "grad_norm": 0.15926562249660492, "learning_rate": 0.00011605121869360402, "loss": 1.9932, "step": 295380 }, { "epoch": 1.1243272458759317, "grad_norm": 0.18492233753204346, "learning_rate": 0.00011599696181757452, "loss": 1.9789, "step": 295390 }, { "epoch": 1.1243653083440543, "grad_norm": 0.1605922430753708, "learning_rate": 0.00011594271260656957, "loss": 1.9902, "step": 295400 }, { "epoch": 1.124403370812177, "grad_norm": 0.16567596793174744, "learning_rate": 0.00011588847105734168, "loss": 1.9772, "step": 295410 }, { "epoch": 1.1244414332802997, "grad_norm": 0.18696080148220062, "learning_rate": 0.00011583423716664527, "loss": 1.9932, "step": 295420 }, { "epoch": 1.1244794957484223, "grad_norm": 0.16154401004314423, "learning_rate": 0.00011578001093123746, "loss": 1.9821, "step": 295430 }, { "epoch": 1.124517558216545, "grad_norm": 0.20887313783168793, "learning_rate": 0.0001157257923478774, "loss": 1.9735, "step": 295440 }, { "epoch": 1.1245556206846676, "grad_norm": 0.23249229788780212, "learning_rate": 0.00011567158141332651, "loss": 1.9901, "step": 295450 }, { "epoch": 1.1245936831527903, "grad_norm": 0.16391891241073608, "learning_rate": 0.00011561737812434863, "loss": 1.9923, "step": 295460 }, { "epoch": 1.124631745620913, "grad_norm": 0.1754762977361679, "learning_rate": 0.0001155631824777098, "loss": 1.9943, "step": 295470 }, { "epoch": 1.1246698080890358, "grad_norm": 0.1715954691171646, "learning_rate": 0.00011550899447017837, "loss": 1.968, "step": 295480 }, { "epoch": 1.1247078705571585, "grad_norm": 0.15342910587787628, "learning_rate": 0.00011545481409852493, "loss": 1.9897, "step": 295490 }, { "epoch": 1.1247459330252811, "grad_norm": 0.15989144146442413, "learning_rate": 0.00011540064135952244, "loss": 1.9736, "step": 295500 }, { "epoch": 1.1247839954934038, "grad_norm": 0.15441451966762543, "learning_rate": 0.00011534647624994604, "loss": 1.9855, "step": 295510 }, { "epoch": 1.1248220579615265, "grad_norm": 0.1686556190252304, "learning_rate": 0.0001152923187665731, "loss": 1.9804, "step": 295520 }, { "epoch": 1.1248601204296491, "grad_norm": 0.1646524965763092, "learning_rate": 0.00011523816890618339, "loss": 1.9759, "step": 295530 }, { "epoch": 1.1248981828977718, "grad_norm": 0.19196270406246185, "learning_rate": 0.00011518402666555883, "loss": 1.9846, "step": 295540 }, { "epoch": 1.1249362453658944, "grad_norm": 0.15963903069496155, "learning_rate": 0.00011512989204148366, "loss": 1.97, "step": 295550 }, { "epoch": 1.1249743078340173, "grad_norm": 0.2054601013660431, "learning_rate": 0.00011507576503074446, "loss": 1.9729, "step": 295560 }, { "epoch": 1.12501237030214, "grad_norm": 0.24047240614891052, "learning_rate": 0.00011502164563012984, "loss": 1.9889, "step": 295570 }, { "epoch": 1.1250504327702626, "grad_norm": 0.15630276501178741, "learning_rate": 0.00011496753383643088, "loss": 1.9876, "step": 295580 }, { "epoch": 1.1250884952383853, "grad_norm": 0.17492572963237762, "learning_rate": 0.00011491342964644086, "loss": 1.9782, "step": 295590 }, { "epoch": 1.125126557706508, "grad_norm": 0.16902922093868256, "learning_rate": 0.00011485933305695523, "loss": 1.9842, "step": 295600 }, { "epoch": 1.1251646201746306, "grad_norm": 0.22221390902996063, "learning_rate": 0.00011480524406477183, "loss": 1.9568, "step": 295610 }, { "epoch": 1.1252026826427532, "grad_norm": 0.17057080566883087, "learning_rate": 0.00011475116266669061, "loss": 2.005, "step": 295620 }, { "epoch": 1.125240745110876, "grad_norm": 0.1738719493150711, "learning_rate": 0.00011469708885951391, "loss": 1.9981, "step": 295630 }, { "epoch": 1.1252788075789986, "grad_norm": 0.19181248545646667, "learning_rate": 0.00011464302264004616, "loss": 1.9821, "step": 295640 }, { "epoch": 1.1253168700471212, "grad_norm": 0.1625184714794159, "learning_rate": 0.00011458896400509412, "loss": 1.9852, "step": 295650 }, { "epoch": 1.125354932515244, "grad_norm": 0.16737158596515656, "learning_rate": 0.00011453491295146673, "loss": 1.9703, "step": 295660 }, { "epoch": 1.1253929949833668, "grad_norm": 0.1710674911737442, "learning_rate": 0.0001144808694759753, "loss": 1.9956, "step": 295670 }, { "epoch": 1.1254310574514894, "grad_norm": 0.19023360311985016, "learning_rate": 0.00011442683357543321, "loss": 1.981, "step": 295680 }, { "epoch": 1.125469119919612, "grad_norm": 0.15628500282764435, "learning_rate": 0.00011437280524665622, "loss": 1.9769, "step": 295690 }, { "epoch": 1.1255071823877347, "grad_norm": 0.20175419747829437, "learning_rate": 0.00011431878448646221, "loss": 1.9766, "step": 295700 }, { "epoch": 1.1255452448558574, "grad_norm": 0.24420325458049774, "learning_rate": 0.0001142647712916713, "loss": 1.9911, "step": 295710 }, { "epoch": 1.12558330732398, "grad_norm": 0.16868871450424194, "learning_rate": 0.00011421076565910588, "loss": 1.9839, "step": 295720 }, { "epoch": 1.125621369792103, "grad_norm": 0.20664623379707336, "learning_rate": 0.00011415676758559052, "loss": 1.9715, "step": 295730 }, { "epoch": 1.1256594322602256, "grad_norm": 0.16074231266975403, "learning_rate": 0.00011410277706795214, "loss": 1.9817, "step": 295740 }, { "epoch": 1.1256974947283482, "grad_norm": 0.16438347101211548, "learning_rate": 0.00011404879410301966, "loss": 1.9852, "step": 295750 }, { "epoch": 1.125735557196471, "grad_norm": 0.16818535327911377, "learning_rate": 0.00011399481868762435, "loss": 1.9846, "step": 295760 }, { "epoch": 1.1257736196645936, "grad_norm": 0.1612391471862793, "learning_rate": 0.00011394085081859973, "loss": 1.9794, "step": 295770 }, { "epoch": 1.1258116821327162, "grad_norm": 0.16969744861125946, "learning_rate": 0.00011388689049278144, "loss": 1.9829, "step": 295780 }, { "epoch": 1.1258497446008389, "grad_norm": 0.1831132024526596, "learning_rate": 0.00011383293770700742, "loss": 2.0053, "step": 295790 }, { "epoch": 1.1258878070689615, "grad_norm": 0.17153841257095337, "learning_rate": 0.00011377899245811773, "loss": 1.9794, "step": 295800 }, { "epoch": 1.1259258695370842, "grad_norm": 0.18936266005039215, "learning_rate": 0.00011372505474295469, "loss": 1.974, "step": 295810 }, { "epoch": 1.1259639320052068, "grad_norm": 0.173858642578125, "learning_rate": 0.00011367112455836287, "loss": 1.9904, "step": 295820 }, { "epoch": 1.1260019944733297, "grad_norm": 0.19076798856258392, "learning_rate": 0.00011361720190118885, "loss": 1.981, "step": 295830 }, { "epoch": 1.1260400569414524, "grad_norm": 0.1642879694700241, "learning_rate": 0.00011356328676828164, "loss": 1.9907, "step": 295840 }, { "epoch": 1.126078119409575, "grad_norm": 0.21258746087551117, "learning_rate": 0.00011350937915649235, "loss": 1.9874, "step": 295850 }, { "epoch": 1.1261161818776977, "grad_norm": 0.18977473676204681, "learning_rate": 0.00011345547906267423, "loss": 1.9859, "step": 295860 }, { "epoch": 1.1261542443458203, "grad_norm": 0.15967296063899994, "learning_rate": 0.00011340158648368287, "loss": 1.9887, "step": 295870 }, { "epoch": 1.126192306813943, "grad_norm": 0.19567842781543732, "learning_rate": 0.0001133477014163759, "loss": 1.9831, "step": 295880 }, { "epoch": 1.1262303692820657, "grad_norm": 0.1792905181646347, "learning_rate": 0.00011329382385761322, "loss": 1.979, "step": 295890 }, { "epoch": 1.1262684317501883, "grad_norm": 0.2360984981060028, "learning_rate": 0.0001132399538042569, "loss": 1.9905, "step": 295900 }, { "epoch": 1.1263064942183112, "grad_norm": 0.2607705891132355, "learning_rate": 0.0001131860912531712, "loss": 1.9877, "step": 295910 }, { "epoch": 1.1263445566864339, "grad_norm": 0.19371066987514496, "learning_rate": 0.00011313223620122254, "loss": 1.9767, "step": 295920 }, { "epoch": 1.1263826191545565, "grad_norm": 0.20442219078540802, "learning_rate": 0.00011307838864527958, "loss": 1.993, "step": 295930 }, { "epoch": 1.1264206816226792, "grad_norm": 0.1811714768409729, "learning_rate": 0.00011302454858221306, "loss": 1.9741, "step": 295940 }, { "epoch": 1.1264587440908018, "grad_norm": 0.18447086215019226, "learning_rate": 0.00011297071600889597, "loss": 1.9793, "step": 295950 }, { "epoch": 1.1264968065589245, "grad_norm": 0.2133498638868332, "learning_rate": 0.00011291689092220347, "loss": 1.9918, "step": 295960 }, { "epoch": 1.1265348690270471, "grad_norm": 0.18323472142219543, "learning_rate": 0.00011286307331901286, "loss": 1.9766, "step": 295970 }, { "epoch": 1.1265729314951698, "grad_norm": 0.19975514709949493, "learning_rate": 0.00011280926319620367, "loss": 1.9853, "step": 295980 }, { "epoch": 1.1266109939632925, "grad_norm": 0.1980314552783966, "learning_rate": 0.00011275546055065755, "loss": 1.9823, "step": 295990 }, { "epoch": 1.1266490564314153, "grad_norm": 0.1543724536895752, "learning_rate": 0.0001127016653792583, "loss": 1.9694, "step": 296000 }, { "epoch": 1.126687118899538, "grad_norm": 0.2997741997241974, "learning_rate": 0.00011264787767889195, "loss": 1.9968, "step": 296010 }, { "epoch": 1.1267251813676606, "grad_norm": 0.1481897085905075, "learning_rate": 0.00011259409744644656, "loss": 1.9892, "step": 296020 }, { "epoch": 1.1267632438357833, "grad_norm": 0.1712397038936615, "learning_rate": 0.00011254032467881254, "loss": 1.9968, "step": 296030 }, { "epoch": 1.126801306303906, "grad_norm": 0.18674853444099426, "learning_rate": 0.00011248655937288233, "loss": 1.9717, "step": 296040 }, { "epoch": 1.1268393687720286, "grad_norm": 0.15560908615589142, "learning_rate": 0.00011243280152555052, "loss": 1.9718, "step": 296050 }, { "epoch": 1.1268774312401513, "grad_norm": 0.1548231691122055, "learning_rate": 0.00011237905113371388, "loss": 1.9717, "step": 296060 }, { "epoch": 1.126915493708274, "grad_norm": 0.17277362942695618, "learning_rate": 0.00011232530819427139, "loss": 1.9809, "step": 296070 }, { "epoch": 1.1269535561763968, "grad_norm": 0.21410466730594635, "learning_rate": 0.00011227157270412408, "loss": 1.9721, "step": 296080 }, { "epoch": 1.1269916186445195, "grad_norm": 0.1602579653263092, "learning_rate": 0.0001122178446601752, "loss": 1.996, "step": 296090 }, { "epoch": 1.1270296811126421, "grad_norm": 0.1896025538444519, "learning_rate": 0.00011216412405933013, "loss": 1.9844, "step": 296100 }, { "epoch": 1.1270677435807648, "grad_norm": 0.17863158881664276, "learning_rate": 0.00011211041089849639, "loss": 1.976, "step": 296110 }, { "epoch": 1.1271058060488874, "grad_norm": 0.16004763543605804, "learning_rate": 0.00011205670517458355, "loss": 1.9752, "step": 296120 }, { "epoch": 1.12714386851701, "grad_norm": 0.18769557774066925, "learning_rate": 0.00011200300688450348, "loss": 1.9684, "step": 296130 }, { "epoch": 1.1271819309851328, "grad_norm": 0.2082507163286209, "learning_rate": 0.00011194931602517006, "loss": 1.9913, "step": 296140 }, { "epoch": 1.1272199934532554, "grad_norm": 0.24489381909370422, "learning_rate": 0.00011189563259349939, "loss": 1.9738, "step": 296150 }, { "epoch": 1.127258055921378, "grad_norm": 0.1563321352005005, "learning_rate": 0.00011184195658640967, "loss": 1.985, "step": 296160 }, { "epoch": 1.127296118389501, "grad_norm": 0.17740443348884583, "learning_rate": 0.0001117882880008212, "loss": 1.9926, "step": 296170 }, { "epoch": 1.1273341808576236, "grad_norm": 0.19535517692565918, "learning_rate": 0.00011173462683365648, "loss": 1.976, "step": 296180 }, { "epoch": 1.1273722433257463, "grad_norm": 0.21839255094528198, "learning_rate": 0.00011168097308184, "loss": 1.9817, "step": 296190 }, { "epoch": 1.127410305793869, "grad_norm": 0.17370064556598663, "learning_rate": 0.00011162732674229854, "loss": 1.9826, "step": 296200 }, { "epoch": 1.1274483682619916, "grad_norm": 0.20356355607509613, "learning_rate": 0.00011157368781196092, "loss": 1.9824, "step": 296210 }, { "epoch": 1.1274864307301142, "grad_norm": 0.19404418766498566, "learning_rate": 0.00011152005628775808, "loss": 1.9751, "step": 296220 }, { "epoch": 1.1275244931982369, "grad_norm": 0.18887370824813843, "learning_rate": 0.00011146643216662305, "loss": 1.9801, "step": 296230 }, { "epoch": 1.1275625556663595, "grad_norm": 0.1544542908668518, "learning_rate": 0.00011141281544549104, "loss": 1.9832, "step": 296240 }, { "epoch": 1.1276006181344824, "grad_norm": 0.16381599009037018, "learning_rate": 0.0001113592061212994, "loss": 1.9786, "step": 296250 }, { "epoch": 1.127638680602605, "grad_norm": 0.15992294251918793, "learning_rate": 0.00011130560419098745, "loss": 1.9862, "step": 296260 }, { "epoch": 1.1276767430707277, "grad_norm": 0.15825308859348297, "learning_rate": 0.00011125200965149674, "loss": 1.9713, "step": 296270 }, { "epoch": 1.1277148055388504, "grad_norm": 0.26244789361953735, "learning_rate": 0.00011119842249977086, "loss": 1.9804, "step": 296280 }, { "epoch": 1.127752868006973, "grad_norm": 0.18329375982284546, "learning_rate": 0.00011114484273275565, "loss": 1.9765, "step": 296290 }, { "epoch": 1.1277909304750957, "grad_norm": 0.17616413533687592, "learning_rate": 0.00011109127034739886, "loss": 1.9673, "step": 296300 }, { "epoch": 1.1278289929432184, "grad_norm": 0.173064187169075, "learning_rate": 0.00011103770534065044, "loss": 1.9677, "step": 296310 }, { "epoch": 1.127867055411341, "grad_norm": 0.15833042562007904, "learning_rate": 0.00011098414770946247, "loss": 2.0004, "step": 296320 }, { "epoch": 1.1279051178794637, "grad_norm": 0.17509348690509796, "learning_rate": 0.00011093059745078898, "loss": 1.9947, "step": 296330 }, { "epoch": 1.1279431803475863, "grad_norm": 0.165169358253479, "learning_rate": 0.0001108770545615863, "loss": 1.987, "step": 296340 }, { "epoch": 1.1279812428157092, "grad_norm": 0.15263719856739044, "learning_rate": 0.00011082351903881278, "loss": 1.9635, "step": 296350 }, { "epoch": 1.1280193052838319, "grad_norm": 0.15287859737873077, "learning_rate": 0.00011076999087942874, "loss": 1.9762, "step": 296360 }, { "epoch": 1.1280573677519545, "grad_norm": 0.2017001211643219, "learning_rate": 0.0001107164700803967, "loss": 1.9723, "step": 296370 }, { "epoch": 1.1280954302200772, "grad_norm": 0.20477700233459473, "learning_rate": 0.00011066295663868136, "loss": 1.9905, "step": 296380 }, { "epoch": 1.1281334926881998, "grad_norm": 0.14756295084953308, "learning_rate": 0.00011060945055124927, "loss": 1.9988, "step": 296390 }, { "epoch": 1.1281715551563225, "grad_norm": 0.15950356423854828, "learning_rate": 0.00011055595181506923, "loss": 1.9633, "step": 296400 }, { "epoch": 1.1282096176244452, "grad_norm": 0.18172836303710938, "learning_rate": 0.00011050246042711215, "loss": 1.9882, "step": 296410 }, { "epoch": 1.128247680092568, "grad_norm": 0.21200765669345856, "learning_rate": 0.00011044897638435092, "loss": 1.9799, "step": 296420 }, { "epoch": 1.1282857425606907, "grad_norm": 0.23758922517299652, "learning_rate": 0.0001103954996837605, "loss": 1.9862, "step": 296430 }, { "epoch": 1.1283238050288134, "grad_norm": 0.19562426209449768, "learning_rate": 0.00011034203032231804, "loss": 1.9813, "step": 296440 }, { "epoch": 1.128361867496936, "grad_norm": 0.14884260296821594, "learning_rate": 0.00011028856829700262, "loss": 1.9836, "step": 296450 }, { "epoch": 1.1283999299650587, "grad_norm": 0.1560460478067398, "learning_rate": 0.00011023511360479548, "loss": 1.9928, "step": 296460 }, { "epoch": 1.1284379924331813, "grad_norm": 0.21366089582443237, "learning_rate": 0.00011018166624268, "loss": 2.0003, "step": 296470 }, { "epoch": 1.128476054901304, "grad_norm": 0.16234548389911652, "learning_rate": 0.00011012822620764146, "loss": 1.9894, "step": 296480 }, { "epoch": 1.1285141173694266, "grad_norm": 0.16439791023731232, "learning_rate": 0.00011007479349666728, "loss": 1.9939, "step": 296490 }, { "epoch": 1.1285521798375493, "grad_norm": 0.19343294203281403, "learning_rate": 0.00011002136810674701, "loss": 1.9721, "step": 296500 }, { "epoch": 1.128590242305672, "grad_norm": 0.19053199887275696, "learning_rate": 0.00010996795003487214, "loss": 1.9727, "step": 296510 }, { "epoch": 1.1286283047737948, "grad_norm": 0.17076334357261658, "learning_rate": 0.0001099145392780363, "loss": 1.9911, "step": 296520 }, { "epoch": 1.1286663672419175, "grad_norm": 0.1734541654586792, "learning_rate": 0.0001098611358332352, "loss": 1.9801, "step": 296530 }, { "epoch": 1.1287044297100401, "grad_norm": 0.15060538053512573, "learning_rate": 0.0001098077396974666, "loss": 1.9822, "step": 296540 }, { "epoch": 1.1287424921781628, "grad_norm": 0.1633031666278839, "learning_rate": 0.00010975435086773017, "loss": 1.9791, "step": 296550 }, { "epoch": 1.1287805546462855, "grad_norm": 0.1746307760477066, "learning_rate": 0.0001097009693410278, "loss": 1.9897, "step": 296560 }, { "epoch": 1.1288186171144081, "grad_norm": 0.22154556214809418, "learning_rate": 0.00010964759511436334, "loss": 1.9744, "step": 296570 }, { "epoch": 1.1288566795825308, "grad_norm": 0.18822148442268372, "learning_rate": 0.0001095942281847428, "loss": 1.9763, "step": 296580 }, { "epoch": 1.1288947420506537, "grad_norm": 0.19326618313789368, "learning_rate": 0.00010954086854917411, "loss": 1.982, "step": 296590 }, { "epoch": 1.1289328045187763, "grad_norm": 0.17983761429786682, "learning_rate": 0.00010948751620466729, "loss": 1.9656, "step": 296600 }, { "epoch": 1.128970866986899, "grad_norm": 0.15488919615745544, "learning_rate": 0.0001094341711482344, "loss": 1.9861, "step": 296610 }, { "epoch": 1.1290089294550216, "grad_norm": 0.15306726098060608, "learning_rate": 0.00010938083337688954, "loss": 1.9758, "step": 296620 }, { "epoch": 1.1290469919231443, "grad_norm": 0.19177936017513275, "learning_rate": 0.00010932750288764886, "loss": 1.9768, "step": 296630 }, { "epoch": 1.129085054391267, "grad_norm": 0.1583833545446396, "learning_rate": 0.00010927417967753056, "loss": 1.9795, "step": 296640 }, { "epoch": 1.1291231168593896, "grad_norm": 0.2055852711200714, "learning_rate": 0.00010922086374355483, "loss": 1.9832, "step": 296650 }, { "epoch": 1.1291611793275123, "grad_norm": 0.17747201025485992, "learning_rate": 0.00010916755508274395, "loss": 1.9769, "step": 296660 }, { "epoch": 1.129199241795635, "grad_norm": 0.17510271072387695, "learning_rate": 0.00010911425369212213, "loss": 1.9808, "step": 296670 }, { "epoch": 1.1292373042637576, "grad_norm": 0.18866494297981262, "learning_rate": 0.0001090609595687157, "loss": 1.9882, "step": 296680 }, { "epoch": 1.1292753667318804, "grad_norm": 0.16015563905239105, "learning_rate": 0.00010900767270955303, "loss": 2.0012, "step": 296690 }, { "epoch": 1.129313429200003, "grad_norm": 0.18745210766792297, "learning_rate": 0.00010895439311166444, "loss": 1.976, "step": 296700 }, { "epoch": 1.1293514916681258, "grad_norm": 0.16483469307422638, "learning_rate": 0.00010890112077208231, "loss": 1.9783, "step": 296710 }, { "epoch": 1.1293895541362484, "grad_norm": 0.19420553743839264, "learning_rate": 0.00010884785568784106, "loss": 1.9935, "step": 296720 }, { "epoch": 1.129427616604371, "grad_norm": 0.22112570703029633, "learning_rate": 0.00010879459785597712, "loss": 1.9688, "step": 296730 }, { "epoch": 1.1294656790724937, "grad_norm": 0.167385995388031, "learning_rate": 0.00010874134727352891, "loss": 1.9629, "step": 296740 }, { "epoch": 1.1295037415406164, "grad_norm": 0.15777790546417236, "learning_rate": 0.00010868810393753681, "loss": 1.9813, "step": 296750 }, { "epoch": 1.129541804008739, "grad_norm": 0.15105342864990234, "learning_rate": 0.00010863486784504334, "loss": 1.9845, "step": 296760 }, { "epoch": 1.129579866476862, "grad_norm": 0.18358340859413147, "learning_rate": 0.00010858163899309304, "loss": 1.9745, "step": 296770 }, { "epoch": 1.1296179289449846, "grad_norm": 0.1916862279176712, "learning_rate": 0.00010852841737873231, "loss": 1.9855, "step": 296780 }, { "epoch": 1.1296559914131072, "grad_norm": 0.20552465319633484, "learning_rate": 0.0001084752029990097, "loss": 1.9693, "step": 296790 }, { "epoch": 1.12969405388123, "grad_norm": 0.20443643629550934, "learning_rate": 0.00010842199585097567, "loss": 1.9762, "step": 296800 }, { "epoch": 1.1297321163493526, "grad_norm": 0.17776182293891907, "learning_rate": 0.0001083687959316827, "loss": 1.9901, "step": 296810 }, { "epoch": 1.1297701788174752, "grad_norm": 0.19605980813503265, "learning_rate": 0.00010831560323818528, "loss": 1.997, "step": 296820 }, { "epoch": 1.1298082412855979, "grad_norm": 0.18584735691547394, "learning_rate": 0.00010826241776754003, "loss": 2.0007, "step": 296830 }, { "epoch": 1.1298463037537205, "grad_norm": 0.1926882266998291, "learning_rate": 0.00010820923951680534, "loss": 1.978, "step": 296840 }, { "epoch": 1.1298843662218432, "grad_norm": 0.162827268242836, "learning_rate": 0.00010815606848304177, "loss": 1.9698, "step": 296850 }, { "epoch": 1.129922428689966, "grad_norm": 0.16366046667099, "learning_rate": 0.00010810290466331174, "loss": 1.9902, "step": 296860 }, { "epoch": 1.1299604911580887, "grad_norm": 0.17877492308616638, "learning_rate": 0.00010804974805467976, "loss": 1.9791, "step": 296870 }, { "epoch": 1.1299985536262114, "grad_norm": 0.2411981225013733, "learning_rate": 0.00010799659865421236, "loss": 1.9728, "step": 296880 }, { "epoch": 1.130036616094334, "grad_norm": 0.16815371811389923, "learning_rate": 0.00010794345645897791, "loss": 1.9928, "step": 296890 }, { "epoch": 1.1300746785624567, "grad_norm": 0.1610078513622284, "learning_rate": 0.00010789032146604694, "loss": 1.9701, "step": 296900 }, { "epoch": 1.1301127410305793, "grad_norm": 0.16686779260635376, "learning_rate": 0.00010783719367249184, "loss": 1.9837, "step": 296910 }, { "epoch": 1.130150803498702, "grad_norm": 0.20752695202827454, "learning_rate": 0.00010778407307538707, "loss": 1.9828, "step": 296920 }, { "epoch": 1.1301888659668247, "grad_norm": 0.17727501690387726, "learning_rate": 0.0001077309596718089, "loss": 1.9746, "step": 296930 }, { "epoch": 1.1302269284349475, "grad_norm": 0.22601988911628723, "learning_rate": 0.00010767785345883585, "loss": 1.9872, "step": 296940 }, { "epoch": 1.1302649909030702, "grad_norm": 0.19532141089439392, "learning_rate": 0.00010762475443354824, "loss": 1.975, "step": 296950 }, { "epoch": 1.1303030533711929, "grad_norm": 0.2647620439529419, "learning_rate": 0.00010757166259302831, "loss": 1.9796, "step": 296960 }, { "epoch": 1.1303411158393155, "grad_norm": 0.16196277737617493, "learning_rate": 0.00010751857793436048, "loss": 1.9828, "step": 296970 }, { "epoch": 1.1303791783074382, "grad_norm": 0.17007310688495636, "learning_rate": 0.000107465500454631, "loss": 1.9866, "step": 296980 }, { "epoch": 1.1304172407755608, "grad_norm": 0.18671287596225739, "learning_rate": 0.00010741243015092795, "loss": 1.9739, "step": 296990 }, { "epoch": 1.1304553032436835, "grad_norm": 0.15183429419994354, "learning_rate": 0.00010735936702034177, "loss": 1.9822, "step": 297000 }, { "epoch": 1.1304933657118061, "grad_norm": 0.1812126636505127, "learning_rate": 0.00010730631105996452, "loss": 1.9597, "step": 297010 }, { "epoch": 1.1305314281799288, "grad_norm": 0.1727277636528015, "learning_rate": 0.00010725326226689036, "loss": 1.9757, "step": 297020 }, { "epoch": 1.1305694906480517, "grad_norm": 0.26448723673820496, "learning_rate": 0.00010720022063821538, "loss": 1.9662, "step": 297030 }, { "epoch": 1.1306075531161743, "grad_norm": 0.17803862690925598, "learning_rate": 0.00010714718617103769, "loss": 1.9927, "step": 297040 }, { "epoch": 1.130645615584297, "grad_norm": 0.19999749958515167, "learning_rate": 0.00010709415886245721, "loss": 1.9591, "step": 297050 }, { "epoch": 1.1306836780524196, "grad_norm": 0.15710560977458954, "learning_rate": 0.00010704113870957605, "loss": 1.9859, "step": 297060 }, { "epoch": 1.1307217405205423, "grad_norm": 0.1491328626871109, "learning_rate": 0.00010698812570949801, "loss": 1.9753, "step": 297070 }, { "epoch": 1.130759802988665, "grad_norm": 0.15009577572345734, "learning_rate": 0.00010693511985932908, "loss": 1.986, "step": 297080 }, { "epoch": 1.1307978654567876, "grad_norm": 0.25345826148986816, "learning_rate": 0.00010688212115617708, "loss": 1.9726, "step": 297090 }, { "epoch": 1.1308359279249103, "grad_norm": 0.1647961139678955, "learning_rate": 0.00010682912959715174, "loss": 1.9772, "step": 297100 }, { "epoch": 1.1308739903930332, "grad_norm": 0.20859834551811218, "learning_rate": 0.0001067761451793648, "loss": 1.9753, "step": 297110 }, { "epoch": 1.1309120528611558, "grad_norm": 0.19893114268779755, "learning_rate": 0.00010672316789992998, "loss": 1.9791, "step": 297120 }, { "epoch": 1.1309501153292785, "grad_norm": 0.15905693173408508, "learning_rate": 0.00010667019775596287, "loss": 1.9719, "step": 297130 }, { "epoch": 1.1309881777974011, "grad_norm": 0.17043165862560272, "learning_rate": 0.00010661723474458113, "loss": 1.9805, "step": 297140 }, { "epoch": 1.1310262402655238, "grad_norm": 0.1681310534477234, "learning_rate": 0.00010656427886290415, "loss": 1.9964, "step": 297150 }, { "epoch": 1.1310643027336464, "grad_norm": 0.16626735031604767, "learning_rate": 0.00010651133010805341, "loss": 1.9854, "step": 297160 }, { "epoch": 1.131102365201769, "grad_norm": 0.15672798454761505, "learning_rate": 0.00010645838847715228, "loss": 1.9995, "step": 297170 }, { "epoch": 1.1311404276698918, "grad_norm": 0.16240811347961426, "learning_rate": 0.00010640545396732609, "loss": 1.9765, "step": 297180 }, { "epoch": 1.1311784901380144, "grad_norm": 0.1623651087284088, "learning_rate": 0.0001063525265757021, "loss": 1.984, "step": 297190 }, { "epoch": 1.131216552606137, "grad_norm": 0.17290998995304108, "learning_rate": 0.00010629960629940943, "loss": 1.9954, "step": 297200 }, { "epoch": 1.13125461507426, "grad_norm": 0.1701308637857437, "learning_rate": 0.00010624669313557927, "loss": 1.9883, "step": 297210 }, { "epoch": 1.1312926775423826, "grad_norm": 0.15361827611923218, "learning_rate": 0.00010619378708134464, "loss": 1.9643, "step": 297220 }, { "epoch": 1.1313307400105053, "grad_norm": 0.22033323347568512, "learning_rate": 0.00010614088813384043, "loss": 1.985, "step": 297230 }, { "epoch": 1.131368802478628, "grad_norm": 0.20703737437725067, "learning_rate": 0.00010608799629020355, "loss": 1.9874, "step": 297240 }, { "epoch": 1.1314068649467506, "grad_norm": 0.224261075258255, "learning_rate": 0.00010603511154757289, "loss": 1.9673, "step": 297250 }, { "epoch": 1.1314449274148732, "grad_norm": 0.1555107980966568, "learning_rate": 0.00010598223390308909, "loss": 1.9845, "step": 297260 }, { "epoch": 1.131482989882996, "grad_norm": 0.16893711686134338, "learning_rate": 0.0001059293633538948, "loss": 1.9947, "step": 297270 }, { "epoch": 1.1315210523511188, "grad_norm": 0.15874548256397247, "learning_rate": 0.0001058764998971346, "loss": 1.9789, "step": 297280 }, { "epoch": 1.1315591148192414, "grad_norm": 0.157329261302948, "learning_rate": 0.000105823643529955, "loss": 1.9629, "step": 297290 }, { "epoch": 1.131597177287364, "grad_norm": 0.19571171700954437, "learning_rate": 0.00010577079424950431, "loss": 1.9684, "step": 297300 }, { "epoch": 1.1316352397554867, "grad_norm": 0.22006621956825256, "learning_rate": 0.00010571795205293289, "loss": 1.9703, "step": 297310 }, { "epoch": 1.1316733022236094, "grad_norm": 0.16492506861686707, "learning_rate": 0.00010566511693739294, "loss": 1.9637, "step": 297320 }, { "epoch": 1.131711364691732, "grad_norm": 0.14963102340698242, "learning_rate": 0.00010561228890003854, "loss": 1.981, "step": 297330 }, { "epoch": 1.1317494271598547, "grad_norm": 0.1697867512702942, "learning_rate": 0.0001055594679380258, "loss": 1.9744, "step": 297340 }, { "epoch": 1.1317874896279774, "grad_norm": 0.2040063887834549, "learning_rate": 0.00010550665404851251, "loss": 1.9844, "step": 297350 }, { "epoch": 1.1318255520961, "grad_norm": 0.1494297832250595, "learning_rate": 0.00010545384722865859, "loss": 1.9817, "step": 297360 }, { "epoch": 1.1318636145642227, "grad_norm": 0.18378987908363342, "learning_rate": 0.00010540104747562578, "loss": 1.9842, "step": 297370 }, { "epoch": 1.1319016770323456, "grad_norm": 0.14976900815963745, "learning_rate": 0.00010534825478657767, "loss": 1.9872, "step": 297380 }, { "epoch": 1.1319397395004682, "grad_norm": 0.1700267791748047, "learning_rate": 0.00010529546915867983, "loss": 1.9756, "step": 297390 }, { "epoch": 1.1319778019685909, "grad_norm": 0.163448765873909, "learning_rate": 0.00010524269058909964, "loss": 1.9755, "step": 297400 }, { "epoch": 1.1320158644367135, "grad_norm": 0.1536484956741333, "learning_rate": 0.00010518991907500635, "loss": 1.9725, "step": 297410 }, { "epoch": 1.1320539269048362, "grad_norm": 0.18388310074806213, "learning_rate": 0.00010513715461357132, "loss": 1.987, "step": 297420 }, { "epoch": 1.1320919893729589, "grad_norm": 0.25140684843063354, "learning_rate": 0.00010508439720196755, "loss": 1.9676, "step": 297430 }, { "epoch": 1.1321300518410815, "grad_norm": 0.17318034172058105, "learning_rate": 0.00010503164683737005, "loss": 1.9887, "step": 297440 }, { "epoch": 1.1321681143092044, "grad_norm": 0.22555945813655853, "learning_rate": 0.0001049789035169556, "loss": 1.9752, "step": 297450 }, { "epoch": 1.132206176777327, "grad_norm": 0.1887245774269104, "learning_rate": 0.00010492616723790311, "loss": 1.9603, "step": 297460 }, { "epoch": 1.1322442392454497, "grad_norm": 0.17808274924755096, "learning_rate": 0.00010487343799739302, "loss": 1.982, "step": 297470 }, { "epoch": 1.1322823017135724, "grad_norm": 0.1736079901456833, "learning_rate": 0.00010482071579260805, "loss": 1.9716, "step": 297480 }, { "epoch": 1.132320364181695, "grad_norm": 0.1533351093530655, "learning_rate": 0.00010476800062073244, "loss": 1.9908, "step": 297490 }, { "epoch": 1.1323584266498177, "grad_norm": 0.1535065919160843, "learning_rate": 0.00010471529247895256, "loss": 1.967, "step": 297500 }, { "epoch": 1.1323964891179403, "grad_norm": 0.1636301577091217, "learning_rate": 0.00010466259136445654, "loss": 1.9878, "step": 297510 }, { "epoch": 1.132434551586063, "grad_norm": 0.15821027755737305, "learning_rate": 0.00010460989727443437, "loss": 1.9884, "step": 297520 }, { "epoch": 1.1324726140541856, "grad_norm": 0.14496588706970215, "learning_rate": 0.000104557210206078, "loss": 1.9745, "step": 297530 }, { "epoch": 1.1325106765223083, "grad_norm": 0.22977522015571594, "learning_rate": 0.00010450453015658107, "loss": 1.9829, "step": 297540 }, { "epoch": 1.1325487389904312, "grad_norm": 0.21885627508163452, "learning_rate": 0.00010445185712313937, "loss": 1.9794, "step": 297550 }, { "epoch": 1.1325868014585538, "grad_norm": 0.1769750565290451, "learning_rate": 0.00010439919110295037, "loss": 1.9772, "step": 297560 }, { "epoch": 1.1326248639266765, "grad_norm": 0.16426600515842438, "learning_rate": 0.00010434653209321331, "loss": 1.9725, "step": 297570 }, { "epoch": 1.1326629263947992, "grad_norm": 0.26224613189697266, "learning_rate": 0.00010429388009112961, "loss": 1.9706, "step": 297580 }, { "epoch": 1.1327009888629218, "grad_norm": 0.1549231857061386, "learning_rate": 0.00010424123509390221, "loss": 1.9857, "step": 297590 }, { "epoch": 1.1327390513310445, "grad_norm": 0.1712195724248886, "learning_rate": 0.00010418859709873607, "loss": 1.9808, "step": 297600 }, { "epoch": 1.1327771137991671, "grad_norm": 0.16183438897132874, "learning_rate": 0.0001041359661028381, "loss": 1.9599, "step": 297610 }, { "epoch": 1.1328151762672898, "grad_norm": 0.17860190570354462, "learning_rate": 0.00010408334210341691, "loss": 1.9769, "step": 297620 }, { "epoch": 1.1328532387354127, "grad_norm": 0.17467913031578064, "learning_rate": 0.00010403072509768302, "loss": 1.9814, "step": 297630 }, { "epoch": 1.1328913012035353, "grad_norm": 0.1577252298593521, "learning_rate": 0.00010397811508284882, "loss": 1.979, "step": 297640 }, { "epoch": 1.132929363671658, "grad_norm": 0.15403807163238525, "learning_rate": 0.00010392551205612854, "loss": 1.9672, "step": 297650 }, { "epoch": 1.1329674261397806, "grad_norm": 0.16157092154026031, "learning_rate": 0.0001038729160147382, "loss": 1.9873, "step": 297660 }, { "epoch": 1.1330054886079033, "grad_norm": 0.1798163652420044, "learning_rate": 0.00010382032695589577, "loss": 1.9807, "step": 297670 }, { "epoch": 1.133043551076026, "grad_norm": 0.1702636331319809, "learning_rate": 0.000103767744876821, "loss": 1.9789, "step": 297680 }, { "epoch": 1.1330816135441486, "grad_norm": 0.1928313672542572, "learning_rate": 0.00010371516977473555, "loss": 1.9817, "step": 297690 }, { "epoch": 1.1331196760122713, "grad_norm": 0.22674444317817688, "learning_rate": 0.00010366260164686286, "loss": 1.9741, "step": 297700 }, { "epoch": 1.133157738480394, "grad_norm": 0.19050756096839905, "learning_rate": 0.00010361004049042815, "loss": 1.975, "step": 297710 }, { "epoch": 1.1331958009485168, "grad_norm": 0.19661086797714233, "learning_rate": 0.00010355748630265865, "loss": 1.96, "step": 297720 }, { "epoch": 1.1332338634166395, "grad_norm": 0.16321800649166107, "learning_rate": 0.00010350493908078334, "loss": 1.9791, "step": 297730 }, { "epoch": 1.133271925884762, "grad_norm": 0.18268156051635742, "learning_rate": 0.00010345239882203295, "loss": 1.9836, "step": 297740 }, { "epoch": 1.1333099883528848, "grad_norm": 0.2021750658750534, "learning_rate": 0.00010339986552364023, "loss": 1.9676, "step": 297750 }, { "epoch": 1.1333480508210074, "grad_norm": 0.18211646378040314, "learning_rate": 0.00010334733918283956, "loss": 1.9793, "step": 297760 }, { "epoch": 1.13338611328913, "grad_norm": 0.19580800831317902, "learning_rate": 0.00010329481979686728, "loss": 1.9851, "step": 297770 }, { "epoch": 1.1334241757572527, "grad_norm": 0.23755834996700287, "learning_rate": 0.00010324230736296152, "loss": 1.9817, "step": 297780 }, { "epoch": 1.1334622382253754, "grad_norm": 0.2371157705783844, "learning_rate": 0.00010318980187836235, "loss": 1.9978, "step": 297790 }, { "epoch": 1.1335003006934983, "grad_norm": 0.2062951624393463, "learning_rate": 0.0001031373033403114, "loss": 1.9864, "step": 297800 }, { "epoch": 1.133538363161621, "grad_norm": 0.2074883133172989, "learning_rate": 0.00010308481174605239, "loss": 1.9832, "step": 297810 }, { "epoch": 1.1335764256297436, "grad_norm": 0.21004609763622284, "learning_rate": 0.00010303232709283072, "loss": 1.9749, "step": 297820 }, { "epoch": 1.1336144880978662, "grad_norm": 0.1587824821472168, "learning_rate": 0.00010297984937789368, "loss": 1.9848, "step": 297830 }, { "epoch": 1.133652550565989, "grad_norm": 0.1737077683210373, "learning_rate": 0.00010292737859849027, "loss": 1.967, "step": 297840 }, { "epoch": 1.1336906130341116, "grad_norm": 0.1679849773645401, "learning_rate": 0.00010287491475187149, "loss": 1.9777, "step": 297850 }, { "epoch": 1.1337286755022342, "grad_norm": 0.16807515919208527, "learning_rate": 0.00010282245783528998, "loss": 1.9746, "step": 297860 }, { "epoch": 1.1337667379703569, "grad_norm": 0.20174390077590942, "learning_rate": 0.00010277000784600032, "loss": 1.9811, "step": 297870 }, { "epoch": 1.1338048004384795, "grad_norm": 0.17630982398986816, "learning_rate": 0.00010271756478125876, "loss": 1.9681, "step": 297880 }, { "epoch": 1.1338428629066024, "grad_norm": 0.1595836579799652, "learning_rate": 0.00010266512863832355, "loss": 1.978, "step": 297890 }, { "epoch": 1.133880925374725, "grad_norm": 0.17745345830917358, "learning_rate": 0.0001026126994144545, "loss": 1.973, "step": 297900 }, { "epoch": 1.1339189878428477, "grad_norm": 0.17257341742515564, "learning_rate": 0.00010256027710691357, "loss": 1.9759, "step": 297910 }, { "epoch": 1.1339570503109704, "grad_norm": 0.17954018712043762, "learning_rate": 0.00010250786171296417, "loss": 1.9809, "step": 297920 }, { "epoch": 1.133995112779093, "grad_norm": 0.28424471616744995, "learning_rate": 0.00010245545322987176, "loss": 1.9967, "step": 297930 }, { "epoch": 1.1340331752472157, "grad_norm": 0.17595075070858002, "learning_rate": 0.00010240305165490349, "loss": 1.9744, "step": 297940 }, { "epoch": 1.1340712377153384, "grad_norm": 0.23913931846618652, "learning_rate": 0.00010235065698532836, "loss": 1.9924, "step": 297950 }, { "epoch": 1.134109300183461, "grad_norm": 0.16063128411769867, "learning_rate": 0.00010229826921841707, "loss": 1.9784, "step": 297960 }, { "epoch": 1.134147362651584, "grad_norm": 0.15882925689220428, "learning_rate": 0.00010224588835144227, "loss": 1.9737, "step": 297970 }, { "epoch": 1.1341854251197065, "grad_norm": 0.1765865981578827, "learning_rate": 0.00010219351438167829, "loss": 1.9787, "step": 297980 }, { "epoch": 1.1342234875878292, "grad_norm": 0.17888866364955902, "learning_rate": 0.00010214114730640129, "loss": 1.9502, "step": 297990 }, { "epoch": 1.1342615500559519, "grad_norm": 0.1510838121175766, "learning_rate": 0.00010208878712288928, "loss": 1.9698, "step": 298000 }, { "epoch": 1.1342996125240745, "grad_norm": 0.1642741858959198, "learning_rate": 0.00010203643382842193, "loss": 1.976, "step": 298010 }, { "epoch": 1.1343376749921972, "grad_norm": 0.1682412475347519, "learning_rate": 0.00010198408742028082, "loss": 1.9792, "step": 298020 }, { "epoch": 1.1343757374603198, "grad_norm": 0.2024160772562027, "learning_rate": 0.00010193174789574922, "loss": 1.9628, "step": 298030 }, { "epoch": 1.1344137999284425, "grad_norm": 0.15808521211147308, "learning_rate": 0.00010187941525211236, "loss": 1.9679, "step": 298040 }, { "epoch": 1.1344518623965651, "grad_norm": 0.1619788408279419, "learning_rate": 0.00010182708948665698, "loss": 1.9777, "step": 298050 }, { "epoch": 1.1344899248646878, "grad_norm": 0.16730374097824097, "learning_rate": 0.00010177477059667185, "loss": 1.9801, "step": 298060 }, { "epoch": 1.1345279873328107, "grad_norm": 0.16122709214687347, "learning_rate": 0.00010172245857944739, "loss": 1.951, "step": 298070 }, { "epoch": 1.1345660498009333, "grad_norm": 0.21013543009757996, "learning_rate": 0.00010167015343227582, "loss": 1.9829, "step": 298080 }, { "epoch": 1.134604112269056, "grad_norm": 0.20861048996448517, "learning_rate": 0.0001016178551524512, "loss": 1.9743, "step": 298090 }, { "epoch": 1.1346421747371787, "grad_norm": 0.19113865494728088, "learning_rate": 0.00010156556373726933, "loss": 1.9696, "step": 298100 }, { "epoch": 1.1346802372053013, "grad_norm": 0.15994352102279663, "learning_rate": 0.00010151327918402769, "loss": 1.9772, "step": 298110 }, { "epoch": 1.134718299673424, "grad_norm": 0.17390236258506775, "learning_rate": 0.00010146100149002568, "loss": 1.9778, "step": 298120 }, { "epoch": 1.1347563621415466, "grad_norm": 0.16655398905277252, "learning_rate": 0.00010140873065256435, "loss": 1.9879, "step": 298130 }, { "epoch": 1.1347944246096695, "grad_norm": 0.21683935821056366, "learning_rate": 0.0001013564666689466, "loss": 1.9837, "step": 298140 }, { "epoch": 1.1348324870777922, "grad_norm": 0.2061341404914856, "learning_rate": 0.00010130420953647712, "loss": 1.9745, "step": 298150 }, { "epoch": 1.1348705495459148, "grad_norm": 0.16226203739643097, "learning_rate": 0.00010125195925246228, "loss": 1.9701, "step": 298160 }, { "epoch": 1.1349086120140375, "grad_norm": 0.17854997515678406, "learning_rate": 0.00010119971581421028, "loss": 1.9734, "step": 298170 }, { "epoch": 1.1349466744821601, "grad_norm": 0.15226025879383087, "learning_rate": 0.000101147479219031, "loss": 1.9774, "step": 298180 }, { "epoch": 1.1349847369502828, "grad_norm": 0.16963133215904236, "learning_rate": 0.00010109524946423615, "loss": 1.9851, "step": 298190 }, { "epoch": 1.1350227994184054, "grad_norm": 0.1541719138622284, "learning_rate": 0.0001010430265471392, "loss": 1.9658, "step": 298200 }, { "epoch": 1.135060861886528, "grad_norm": 0.14477787911891937, "learning_rate": 0.00010099081046505543, "loss": 1.9773, "step": 298210 }, { "epoch": 1.1350989243546508, "grad_norm": 0.17722904682159424, "learning_rate": 0.00010093860121530168, "loss": 1.9694, "step": 298220 }, { "epoch": 1.1351369868227734, "grad_norm": 0.21424292027950287, "learning_rate": 0.00010088639879519684, "loss": 1.9837, "step": 298230 }, { "epoch": 1.1351750492908963, "grad_norm": 0.1900191605091095, "learning_rate": 0.00010083420320206127, "loss": 1.9801, "step": 298240 }, { "epoch": 1.135213111759019, "grad_norm": 0.21574735641479492, "learning_rate": 0.00010078201443321722, "loss": 1.9715, "step": 298250 }, { "epoch": 1.1352511742271416, "grad_norm": 0.21656334400177002, "learning_rate": 0.00010072983248598866, "loss": 1.9911, "step": 298260 }, { "epoch": 1.1352892366952643, "grad_norm": 0.2067733258008957, "learning_rate": 0.0001006776573577014, "loss": 1.9777, "step": 298270 }, { "epoch": 1.135327299163387, "grad_norm": 0.166462704539299, "learning_rate": 0.00010062548904568286, "loss": 1.9704, "step": 298280 }, { "epoch": 1.1353653616315096, "grad_norm": 0.19629982113838196, "learning_rate": 0.00010057332754726222, "loss": 1.9824, "step": 298290 }, { "epoch": 1.1354034240996322, "grad_norm": 0.18000733852386475, "learning_rate": 0.00010052117285977053, "loss": 1.9789, "step": 298300 }, { "epoch": 1.1354414865677551, "grad_norm": 0.1905238926410675, "learning_rate": 0.00010046902498054051, "loss": 1.9669, "step": 298310 }, { "epoch": 1.1354795490358778, "grad_norm": 0.16512887179851532, "learning_rate": 0.00010041688390690646, "loss": 1.994, "step": 298320 }, { "epoch": 1.1355176115040004, "grad_norm": 0.21684348583221436, "learning_rate": 0.00010036474963620473, "loss": 1.9725, "step": 298330 }, { "epoch": 1.135555673972123, "grad_norm": 0.16749443113803864, "learning_rate": 0.00010031262216577314, "loss": 1.9656, "step": 298340 }, { "epoch": 1.1355937364402457, "grad_norm": 0.20533698797225952, "learning_rate": 0.00010026050149295146, "loss": 1.9785, "step": 298350 }, { "epoch": 1.1356317989083684, "grad_norm": 0.20634326338768005, "learning_rate": 0.000100208387615081, "loss": 1.9695, "step": 298360 }, { "epoch": 1.135669861376491, "grad_norm": 0.19714361429214478, "learning_rate": 0.00010015628052950487, "loss": 1.9733, "step": 298370 }, { "epoch": 1.1357079238446137, "grad_norm": 0.1932590901851654, "learning_rate": 0.00010010418023356798, "loss": 1.9822, "step": 298380 }, { "epoch": 1.1357459863127364, "grad_norm": 0.1969641149044037, "learning_rate": 0.0001000520867246169, "loss": 1.9844, "step": 298390 }, { "epoch": 1.135784048780859, "grad_norm": 0.2220790535211563, "learning_rate": 9.999999999999998e-05, "loss": 1.9783, "step": 298400 }, { "epoch": 1.135822111248982, "grad_norm": 0.18969888985157013, "learning_rate": 9.994792005706726e-05, "loss": 1.97, "step": 298410 }, { "epoch": 1.1358601737171046, "grad_norm": 0.1967613846063614, "learning_rate": 9.989584689317044e-05, "loss": 1.9727, "step": 298420 }, { "epoch": 1.1358982361852272, "grad_norm": 0.1870899349451065, "learning_rate": 9.9843780505663e-05, "loss": 1.9715, "step": 298430 }, { "epoch": 1.1359362986533499, "grad_norm": 0.15287436544895172, "learning_rate": 9.979172089190025e-05, "loss": 1.9639, "step": 298440 }, { "epoch": 1.1359743611214725, "grad_norm": 0.15644054114818573, "learning_rate": 9.973966804923906e-05, "loss": 1.9825, "step": 298450 }, { "epoch": 1.1360124235895952, "grad_norm": 0.15644334256649017, "learning_rate": 9.96876219750381e-05, "loss": 1.967, "step": 298460 }, { "epoch": 1.1360504860577179, "grad_norm": 0.16445119678974152, "learning_rate": 9.963558266665774e-05, "loss": 1.9825, "step": 298470 }, { "epoch": 1.1360885485258405, "grad_norm": 0.301796019077301, "learning_rate": 9.958355012146004e-05, "loss": 1.9884, "step": 298480 }, { "epoch": 1.1361266109939634, "grad_norm": 0.1797219067811966, "learning_rate": 9.953152433680879e-05, "loss": 1.9829, "step": 298490 }, { "epoch": 1.136164673462086, "grad_norm": 0.20839370787143707, "learning_rate": 9.947950531006944e-05, "loss": 1.978, "step": 298500 }, { "epoch": 1.1362027359302087, "grad_norm": 0.19797562062740326, "learning_rate": 9.942749303860938e-05, "loss": 1.9762, "step": 298510 }, { "epoch": 1.1362407983983314, "grad_norm": 0.19425931572914124, "learning_rate": 9.937548751979742e-05, "loss": 1.9837, "step": 298520 }, { "epoch": 1.136278860866454, "grad_norm": 0.17285150289535522, "learning_rate": 9.932348875100417e-05, "loss": 1.9877, "step": 298530 }, { "epoch": 1.1363169233345767, "grad_norm": 0.29804208874702454, "learning_rate": 9.927149672960206e-05, "loss": 1.9629, "step": 298540 }, { "epoch": 1.1363549858026993, "grad_norm": 0.23229555785655975, "learning_rate": 9.921951145296509e-05, "loss": 1.9564, "step": 298550 }, { "epoch": 1.136393048270822, "grad_norm": 0.16856501996517181, "learning_rate": 9.916753291846891e-05, "loss": 1.9887, "step": 298560 }, { "epoch": 1.1364311107389446, "grad_norm": 0.1554311364889145, "learning_rate": 9.911556112349118e-05, "loss": 1.9722, "step": 298570 }, { "epoch": 1.1364691732070675, "grad_norm": 0.18315407633781433, "learning_rate": 9.906359606541088e-05, "loss": 1.9774, "step": 298580 }, { "epoch": 1.1365072356751902, "grad_norm": 0.19487372040748596, "learning_rate": 9.901163774160893e-05, "loss": 1.9875, "step": 298590 }, { "epoch": 1.1365452981433128, "grad_norm": 0.2191956639289856, "learning_rate": 9.895968614946788e-05, "loss": 1.9834, "step": 298600 }, { "epoch": 1.1365833606114355, "grad_norm": 0.21160957217216492, "learning_rate": 9.890774128637187e-05, "loss": 1.9923, "step": 298610 }, { "epoch": 1.1366214230795582, "grad_norm": 0.17996318638324738, "learning_rate": 9.885580314970694e-05, "loss": 1.9884, "step": 298620 }, { "epoch": 1.1366594855476808, "grad_norm": 0.15935932099819183, "learning_rate": 9.880387173686067e-05, "loss": 1.9787, "step": 298630 }, { "epoch": 1.1366975480158035, "grad_norm": 0.16337305307388306, "learning_rate": 9.875194704522239e-05, "loss": 1.9762, "step": 298640 }, { "epoch": 1.1367356104839261, "grad_norm": 0.22064292430877686, "learning_rate": 9.870002907218311e-05, "loss": 1.9696, "step": 298650 }, { "epoch": 1.136773672952049, "grad_norm": 0.16844485700130463, "learning_rate": 9.864811781513556e-05, "loss": 1.9818, "step": 298660 }, { "epoch": 1.1368117354201717, "grad_norm": 0.1495898962020874, "learning_rate": 9.8596213271474e-05, "loss": 1.9782, "step": 298670 }, { "epoch": 1.1368497978882943, "grad_norm": 0.17440442740917206, "learning_rate": 9.854431543859454e-05, "loss": 1.9685, "step": 298680 }, { "epoch": 1.136887860356417, "grad_norm": 0.18147921562194824, "learning_rate": 9.849242431389499e-05, "loss": 1.9949, "step": 298690 }, { "epoch": 1.1369259228245396, "grad_norm": 0.17838215827941895, "learning_rate": 9.844053989477475e-05, "loss": 1.9868, "step": 298700 }, { "epoch": 1.1369639852926623, "grad_norm": 0.20777563750743866, "learning_rate": 9.838866217863485e-05, "loss": 1.9595, "step": 298710 }, { "epoch": 1.137002047760785, "grad_norm": 0.1972523182630539, "learning_rate": 9.833679116287819e-05, "loss": 1.9932, "step": 298720 }, { "epoch": 1.1370401102289076, "grad_norm": 0.23394937813282013, "learning_rate": 9.828492684490914e-05, "loss": 1.9728, "step": 298730 }, { "epoch": 1.1370781726970303, "grad_norm": 0.2772226631641388, "learning_rate": 9.823306922213388e-05, "loss": 1.9758, "step": 298740 }, { "epoch": 1.1371162351651531, "grad_norm": 0.17437441647052765, "learning_rate": 9.818121829196019e-05, "loss": 1.9603, "step": 298750 }, { "epoch": 1.1371542976332758, "grad_norm": 0.17861786484718323, "learning_rate": 9.812937405179761e-05, "loss": 1.9753, "step": 298760 }, { "epoch": 1.1371923601013985, "grad_norm": 0.15384817123413086, "learning_rate": 9.807753649905726e-05, "loss": 1.9929, "step": 298770 }, { "epoch": 1.1372304225695211, "grad_norm": 0.25518345832824707, "learning_rate": 9.802570563115192e-05, "loss": 1.9663, "step": 298780 }, { "epoch": 1.1372684850376438, "grad_norm": 0.1736324280500412, "learning_rate": 9.797388144549613e-05, "loss": 1.9664, "step": 298790 }, { "epoch": 1.1373065475057664, "grad_norm": 0.1534629762172699, "learning_rate": 9.792206393950609e-05, "loss": 1.963, "step": 298800 }, { "epoch": 1.137344609973889, "grad_norm": 0.22825098037719727, "learning_rate": 9.787025311059944e-05, "loss": 1.9605, "step": 298810 }, { "epoch": 1.1373826724420117, "grad_norm": 0.26282426714897156, "learning_rate": 9.78184489561959e-05, "loss": 1.9622, "step": 298820 }, { "epoch": 1.1374207349101346, "grad_norm": 0.16258125007152557, "learning_rate": 9.77666514737165e-05, "loss": 1.9895, "step": 298830 }, { "epoch": 1.1374587973782573, "grad_norm": 0.17438267171382904, "learning_rate": 9.771486066058405e-05, "loss": 1.9749, "step": 298840 }, { "epoch": 1.13749685984638, "grad_norm": 0.16162815690040588, "learning_rate": 9.766307651422295e-05, "loss": 1.9826, "step": 298850 }, { "epoch": 1.1375349223145026, "grad_norm": 0.1707250326871872, "learning_rate": 9.761129903205951e-05, "loss": 1.9758, "step": 298860 }, { "epoch": 1.1375729847826253, "grad_norm": 0.15322420001029968, "learning_rate": 9.755952821152125e-05, "loss": 1.9797, "step": 298870 }, { "epoch": 1.137611047250748, "grad_norm": 0.24775369465351105, "learning_rate": 9.750776405003786e-05, "loss": 1.9842, "step": 298880 }, { "epoch": 1.1376491097188706, "grad_norm": 0.22171650826931, "learning_rate": 9.745600654504028e-05, "loss": 1.9615, "step": 298890 }, { "epoch": 1.1376871721869932, "grad_norm": 0.19848200678825378, "learning_rate": 9.740425569396128e-05, "loss": 1.9709, "step": 298900 }, { "epoch": 1.1377252346551159, "grad_norm": 0.18477272987365723, "learning_rate": 9.735251149423519e-05, "loss": 1.9779, "step": 298910 }, { "epoch": 1.1377632971232385, "grad_norm": 0.1732361912727356, "learning_rate": 9.730077394329817e-05, "loss": 1.9699, "step": 298920 }, { "epoch": 1.1378013595913614, "grad_norm": 0.22373192012310028, "learning_rate": 9.724904303858773e-05, "loss": 1.9723, "step": 298930 }, { "epoch": 1.137839422059484, "grad_norm": 0.25084006786346436, "learning_rate": 9.71973187775434e-05, "loss": 1.9754, "step": 298940 }, { "epoch": 1.1378774845276067, "grad_norm": 0.17961499094963074, "learning_rate": 9.714560115760601e-05, "loss": 1.9705, "step": 298950 }, { "epoch": 1.1379155469957294, "grad_norm": 0.21094465255737305, "learning_rate": 9.709389017621817e-05, "loss": 1.9725, "step": 298960 }, { "epoch": 1.137953609463852, "grad_norm": 0.16603122651576996, "learning_rate": 9.704218583082425e-05, "loss": 1.9843, "step": 298970 }, { "epoch": 1.1379916719319747, "grad_norm": 0.20088063180446625, "learning_rate": 9.699048811887001e-05, "loss": 1.9762, "step": 298980 }, { "epoch": 1.1380297344000974, "grad_norm": 0.23191094398498535, "learning_rate": 9.693879703780301e-05, "loss": 1.9712, "step": 298990 }, { "epoch": 1.1380677968682202, "grad_norm": 0.16466926038265228, "learning_rate": 9.688711258507249e-05, "loss": 1.9798, "step": 299000 }, { "epoch": 1.138105859336343, "grad_norm": 0.2375544011592865, "learning_rate": 9.683543475812923e-05, "loss": 1.9771, "step": 299010 }, { "epoch": 1.1381439218044656, "grad_norm": 0.2090522199869156, "learning_rate": 9.678376355442564e-05, "loss": 1.975, "step": 299020 }, { "epoch": 1.1381819842725882, "grad_norm": 0.21409104764461517, "learning_rate": 9.673209897141577e-05, "loss": 1.9739, "step": 299030 }, { "epoch": 1.1382200467407109, "grad_norm": 0.28223034739494324, "learning_rate": 9.668044100655537e-05, "loss": 1.972, "step": 299040 }, { "epoch": 1.1382581092088335, "grad_norm": 0.23368722200393677, "learning_rate": 9.66287896573017e-05, "loss": 1.9783, "step": 299050 }, { "epoch": 1.1382961716769562, "grad_norm": 0.159254789352417, "learning_rate": 9.65771449211138e-05, "loss": 1.9837, "step": 299060 }, { "epoch": 1.1383342341450788, "grad_norm": 0.19009754061698914, "learning_rate": 9.652550679545224e-05, "loss": 1.9702, "step": 299070 }, { "epoch": 1.1383722966132015, "grad_norm": 0.19186371564865112, "learning_rate": 9.647387527777918e-05, "loss": 1.9598, "step": 299080 }, { "epoch": 1.1384103590813242, "grad_norm": 0.19739702343940735, "learning_rate": 9.642225036555846e-05, "loss": 1.9891, "step": 299090 }, { "epoch": 1.138448421549447, "grad_norm": 0.16868695616722107, "learning_rate": 9.637063205625552e-05, "loss": 1.9734, "step": 299100 }, { "epoch": 1.1384864840175697, "grad_norm": 0.19652950763702393, "learning_rate": 9.631902034733747e-05, "loss": 1.9728, "step": 299110 }, { "epoch": 1.1385245464856923, "grad_norm": 0.20640340447425842, "learning_rate": 9.626741523627302e-05, "loss": 1.9793, "step": 299120 }, { "epoch": 1.138562608953815, "grad_norm": 0.1784249097108841, "learning_rate": 9.621581672053242e-05, "loss": 1.9887, "step": 299130 }, { "epoch": 1.1386006714219377, "grad_norm": 0.20432287454605103, "learning_rate": 9.616422479758763e-05, "loss": 1.9682, "step": 299140 }, { "epoch": 1.1386387338900603, "grad_norm": 0.17736558616161346, "learning_rate": 9.611263946491222e-05, "loss": 1.9784, "step": 299150 }, { "epoch": 1.138676796358183, "grad_norm": 0.16323047876358032, "learning_rate": 9.606106071998128e-05, "loss": 1.9779, "step": 299160 }, { "epoch": 1.1387148588263059, "grad_norm": 0.1606374979019165, "learning_rate": 9.600948856027158e-05, "loss": 1.9792, "step": 299170 }, { "epoch": 1.1387529212944285, "grad_norm": 0.16580626368522644, "learning_rate": 9.595792298326156e-05, "loss": 1.9712, "step": 299180 }, { "epoch": 1.1387909837625512, "grad_norm": 0.1735002100467682, "learning_rate": 9.590636398643115e-05, "loss": 1.9833, "step": 299190 }, { "epoch": 1.1388290462306738, "grad_norm": 0.1848945915699005, "learning_rate": 9.585481156726194e-05, "loss": 1.9876, "step": 299200 }, { "epoch": 1.1388671086987965, "grad_norm": 0.1890447437763214, "learning_rate": 9.580326572323721e-05, "loss": 1.9624, "step": 299210 }, { "epoch": 1.1389051711669191, "grad_norm": 0.1751815676689148, "learning_rate": 9.575172645184166e-05, "loss": 1.9613, "step": 299220 }, { "epoch": 1.1389432336350418, "grad_norm": 0.1621846705675125, "learning_rate": 9.570019375056172e-05, "loss": 1.9691, "step": 299230 }, { "epoch": 1.1389812961031645, "grad_norm": 0.19736886024475098, "learning_rate": 9.56486676168854e-05, "loss": 1.9573, "step": 299240 }, { "epoch": 1.139019358571287, "grad_norm": 0.17240135371685028, "learning_rate": 9.559714804830239e-05, "loss": 1.9839, "step": 299250 }, { "epoch": 1.1390574210394098, "grad_norm": 0.16781572997570038, "learning_rate": 9.554563504230378e-05, "loss": 1.987, "step": 299260 }, { "epoch": 1.1390954835075326, "grad_norm": 0.1575121283531189, "learning_rate": 9.549412859638246e-05, "loss": 1.9769, "step": 299270 }, { "epoch": 1.1391335459756553, "grad_norm": 0.16098329424858093, "learning_rate": 9.544262870803272e-05, "loss": 1.9785, "step": 299280 }, { "epoch": 1.139171608443778, "grad_norm": 0.14912086725234985, "learning_rate": 9.539113537475064e-05, "loss": 1.9649, "step": 299290 }, { "epoch": 1.1392096709119006, "grad_norm": 0.24539200961589813, "learning_rate": 9.533964859403388e-05, "loss": 1.9819, "step": 299300 }, { "epoch": 1.1392477333800233, "grad_norm": 0.15809039771556854, "learning_rate": 9.528816836338145e-05, "loss": 1.9839, "step": 299310 }, { "epoch": 1.139285795848146, "grad_norm": 0.201808899641037, "learning_rate": 9.523669468029428e-05, "loss": 1.973, "step": 299320 }, { "epoch": 1.1393238583162686, "grad_norm": 0.1430976837873459, "learning_rate": 9.51852275422746e-05, "loss": 1.9703, "step": 299330 }, { "epoch": 1.1393619207843912, "grad_norm": 0.20181354880332947, "learning_rate": 9.51337669468264e-05, "loss": 1.9741, "step": 299340 }, { "epoch": 1.1393999832525141, "grad_norm": 0.20010481774806976, "learning_rate": 9.508231289145525e-05, "loss": 1.965, "step": 299350 }, { "epoch": 1.1394380457206368, "grad_norm": 0.27025026082992554, "learning_rate": 9.503086537366824e-05, "loss": 1.973, "step": 299360 }, { "epoch": 1.1394761081887594, "grad_norm": 0.24568350613117218, "learning_rate": 9.497942439097412e-05, "loss": 1.9673, "step": 299370 }, { "epoch": 1.139514170656882, "grad_norm": 0.18625633418560028, "learning_rate": 9.492798994088309e-05, "loss": 1.9752, "step": 299380 }, { "epoch": 1.1395522331250048, "grad_norm": 0.21505199372768402, "learning_rate": 9.487656202090706e-05, "loss": 1.9768, "step": 299390 }, { "epoch": 1.1395902955931274, "grad_norm": 0.17063318192958832, "learning_rate": 9.48251406285594e-05, "loss": 1.9715, "step": 299400 }, { "epoch": 1.13962835806125, "grad_norm": 0.15697281062602997, "learning_rate": 9.477372576135524e-05, "loss": 1.9826, "step": 299410 }, { "epoch": 1.1396664205293727, "grad_norm": 0.1719808429479599, "learning_rate": 9.472231741681109e-05, "loss": 1.9865, "step": 299420 }, { "epoch": 1.1397044829974954, "grad_norm": 0.15887048840522766, "learning_rate": 9.467091559244523e-05, "loss": 1.9684, "step": 299430 }, { "epoch": 1.1397425454656183, "grad_norm": 0.1654588282108307, "learning_rate": 9.461952028577731e-05, "loss": 1.9778, "step": 299440 }, { "epoch": 1.139780607933741, "grad_norm": 0.19598272442817688, "learning_rate": 9.456813149432863e-05, "loss": 1.9752, "step": 299450 }, { "epoch": 1.1398186704018636, "grad_norm": 0.17805804312229156, "learning_rate": 9.451674921562215e-05, "loss": 1.9555, "step": 299460 }, { "epoch": 1.1398567328699862, "grad_norm": 0.1505025327205658, "learning_rate": 9.446537344718226e-05, "loss": 1.964, "step": 299470 }, { "epoch": 1.139894795338109, "grad_norm": 0.18607187271118164, "learning_rate": 9.441400418653506e-05, "loss": 1.9841, "step": 299480 }, { "epoch": 1.1399328578062315, "grad_norm": 0.15925636887550354, "learning_rate": 9.436264143120809e-05, "loss": 1.9809, "step": 299490 }, { "epoch": 1.1399709202743542, "grad_norm": 0.1464940458536148, "learning_rate": 9.43112851787305e-05, "loss": 1.9839, "step": 299500 }, { "epoch": 1.1400089827424769, "grad_norm": 0.16832898557186127, "learning_rate": 9.425993542663302e-05, "loss": 1.984, "step": 299510 }, { "epoch": 1.1400470452105997, "grad_norm": 0.16101309657096863, "learning_rate": 9.420859217244798e-05, "loss": 1.9805, "step": 299520 }, { "epoch": 1.1400851076787224, "grad_norm": 0.21225060522556305, "learning_rate": 9.415725541370912e-05, "loss": 1.9737, "step": 299530 }, { "epoch": 1.140123170146845, "grad_norm": 0.19584670662879944, "learning_rate": 9.410592514795191e-05, "loss": 1.9933, "step": 299540 }, { "epoch": 1.1401612326149677, "grad_norm": 0.1607595980167389, "learning_rate": 9.405460137271332e-05, "loss": 1.9671, "step": 299550 }, { "epoch": 1.1401992950830904, "grad_norm": 0.17394018173217773, "learning_rate": 9.400328408553189e-05, "loss": 1.9726, "step": 299560 }, { "epoch": 1.140237357551213, "grad_norm": 0.2028769999742508, "learning_rate": 9.395197328394761e-05, "loss": 1.9808, "step": 299570 }, { "epoch": 1.1402754200193357, "grad_norm": 0.14995448291301727, "learning_rate": 9.390066896550214e-05, "loss": 1.9827, "step": 299580 }, { "epoch": 1.1403134824874583, "grad_norm": 0.14870086312294006, "learning_rate": 9.384937112773866e-05, "loss": 1.9908, "step": 299590 }, { "epoch": 1.140351544955581, "grad_norm": 0.20218494534492493, "learning_rate": 9.379807976820198e-05, "loss": 1.9749, "step": 299600 }, { "epoch": 1.1403896074237039, "grad_norm": 0.18345682322978973, "learning_rate": 9.374679488443827e-05, "loss": 1.988, "step": 299610 }, { "epoch": 1.1404276698918265, "grad_norm": 0.181381493806839, "learning_rate": 9.369551647399544e-05, "loss": 1.9728, "step": 299620 }, { "epoch": 1.1404657323599492, "grad_norm": 0.22997988760471344, "learning_rate": 9.364424453442277e-05, "loss": 1.9923, "step": 299630 }, { "epoch": 1.1405037948280718, "grad_norm": 0.2116875797510147, "learning_rate": 9.35929790632713e-05, "loss": 1.9713, "step": 299640 }, { "epoch": 1.1405418572961945, "grad_norm": 0.15564244985580444, "learning_rate": 9.354172005809336e-05, "loss": 1.9671, "step": 299650 }, { "epoch": 1.1405799197643172, "grad_norm": 0.17033228278160095, "learning_rate": 9.349046751644308e-05, "loss": 1.9729, "step": 299660 }, { "epoch": 1.1406179822324398, "grad_norm": 0.15838542580604553, "learning_rate": 9.343922143587602e-05, "loss": 1.9854, "step": 299670 }, { "epoch": 1.1406560447005625, "grad_norm": 0.16979332268238068, "learning_rate": 9.33879818139492e-05, "loss": 1.9885, "step": 299680 }, { "epoch": 1.1406941071686854, "grad_norm": 1.0191371440887451, "learning_rate": 9.333674864822123e-05, "loss": 1.9587, "step": 299690 }, { "epoch": 1.140732169636808, "grad_norm": 0.15956035256385803, "learning_rate": 9.32855219362524e-05, "loss": 1.9788, "step": 299700 }, { "epoch": 1.1407702321049307, "grad_norm": 0.20539459586143494, "learning_rate": 9.323430167560427e-05, "loss": 1.969, "step": 299710 }, { "epoch": 1.1408082945730533, "grad_norm": 0.1533263623714447, "learning_rate": 9.318308786384016e-05, "loss": 1.9509, "step": 299720 }, { "epoch": 1.140846357041176, "grad_norm": 0.1617736518383026, "learning_rate": 9.313188049852484e-05, "loss": 1.9832, "step": 299730 }, { "epoch": 1.1408844195092986, "grad_norm": 0.18019530177116394, "learning_rate": 9.308067957722466e-05, "loss": 1.9786, "step": 299740 }, { "epoch": 1.1409224819774213, "grad_norm": 0.19964605569839478, "learning_rate": 9.302948509750736e-05, "loss": 1.9749, "step": 299750 }, { "epoch": 1.140960544445544, "grad_norm": 0.1651037633419037, "learning_rate": 9.297829705694239e-05, "loss": 1.9657, "step": 299760 }, { "epoch": 1.1409986069136666, "grad_norm": 0.16229411959648132, "learning_rate": 9.29271154531005e-05, "loss": 1.9559, "step": 299770 }, { "epoch": 1.1410366693817893, "grad_norm": 0.17111137509346008, "learning_rate": 9.287594028355434e-05, "loss": 1.9762, "step": 299780 }, { "epoch": 1.1410747318499121, "grad_norm": 0.15575209259986877, "learning_rate": 9.28247715458777e-05, "loss": 1.9798, "step": 299790 }, { "epoch": 1.1411127943180348, "grad_norm": 0.15766337513923645, "learning_rate": 9.27736092376461e-05, "loss": 1.9719, "step": 299800 }, { "epoch": 1.1411508567861575, "grad_norm": 0.19725461304187775, "learning_rate": 9.272245335643659e-05, "loss": 1.981, "step": 299810 }, { "epoch": 1.1411889192542801, "grad_norm": 0.17753826081752777, "learning_rate": 9.267130389982752e-05, "loss": 1.9767, "step": 299820 }, { "epoch": 1.1412269817224028, "grad_norm": 0.234220951795578, "learning_rate": 9.262016086539904e-05, "loss": 1.9754, "step": 299830 }, { "epoch": 1.1412650441905254, "grad_norm": 0.15564116835594177, "learning_rate": 9.256902425073272e-05, "loss": 1.9668, "step": 299840 }, { "epoch": 1.141303106658648, "grad_norm": 0.1745157688856125, "learning_rate": 9.251789405341165e-05, "loss": 1.9849, "step": 299850 }, { "epoch": 1.141341169126771, "grad_norm": 0.237146258354187, "learning_rate": 9.24667702710203e-05, "loss": 1.9658, "step": 299860 }, { "epoch": 1.1413792315948936, "grad_norm": 0.18115739524364471, "learning_rate": 9.24156529011449e-05, "loss": 1.9765, "step": 299870 }, { "epoch": 1.1414172940630163, "grad_norm": 0.18322153389453888, "learning_rate": 9.2364541941373e-05, "loss": 1.9627, "step": 299880 }, { "epoch": 1.141455356531139, "grad_norm": 0.19589002430438995, "learning_rate": 9.231343738929371e-05, "loss": 1.9795, "step": 299890 }, { "epoch": 1.1414934189992616, "grad_norm": 0.16380958259105682, "learning_rate": 9.226233924249772e-05, "loss": 1.9838, "step": 299900 }, { "epoch": 1.1415314814673843, "grad_norm": 0.15757855772972107, "learning_rate": 9.221124749857718e-05, "loss": 1.9773, "step": 299910 }, { "epoch": 1.141569543935507, "grad_norm": 0.16874462366104126, "learning_rate": 9.216016215512574e-05, "loss": 1.9866, "step": 299920 }, { "epoch": 1.1416076064036296, "grad_norm": 0.166023388504982, "learning_rate": 9.210908320973855e-05, "loss": 1.9787, "step": 299930 }, { "epoch": 1.1416456688717522, "grad_norm": 0.16237130761146545, "learning_rate": 9.20580106600123e-05, "loss": 1.9737, "step": 299940 }, { "epoch": 1.1416837313398749, "grad_norm": 0.19373784959316254, "learning_rate": 9.200694450354508e-05, "loss": 1.9597, "step": 299950 }, { "epoch": 1.1417217938079978, "grad_norm": 0.19997325539588928, "learning_rate": 9.19558847379367e-05, "loss": 1.9802, "step": 299960 }, { "epoch": 1.1417598562761204, "grad_norm": 0.24005170166492462, "learning_rate": 9.190483136078826e-05, "loss": 1.9716, "step": 299970 }, { "epoch": 1.141797918744243, "grad_norm": 0.17374595999717712, "learning_rate": 9.185378436970249e-05, "loss": 1.9686, "step": 299980 }, { "epoch": 1.1418359812123657, "grad_norm": 0.1472892165184021, "learning_rate": 9.180274376228348e-05, "loss": 1.9745, "step": 299990 }, { "epoch": 1.1418740436804884, "grad_norm": 0.16212306916713715, "learning_rate": 9.175170953613698e-05, "loss": 1.9749, "step": 300000 }, { "epoch": 1.141912106148611, "grad_norm": 0.18655334413051605, "learning_rate": 9.17006816888702e-05, "loss": 1.9681, "step": 300010 }, { "epoch": 1.1419501686167337, "grad_norm": 0.14725945889949799, "learning_rate": 9.164966021809163e-05, "loss": 1.9648, "step": 300020 }, { "epoch": 1.1419882310848566, "grad_norm": 0.16695605218410492, "learning_rate": 9.159864512141164e-05, "loss": 1.9784, "step": 300030 }, { "epoch": 1.1420262935529792, "grad_norm": 0.26002660393714905, "learning_rate": 9.154763639644175e-05, "loss": 1.9867, "step": 300040 }, { "epoch": 1.142064356021102, "grad_norm": 0.20152299106121063, "learning_rate": 9.149663404079523e-05, "loss": 1.9748, "step": 300050 }, { "epoch": 1.1421024184892246, "grad_norm": 0.17907015979290009, "learning_rate": 9.144563805208655e-05, "loss": 1.9745, "step": 300060 }, { "epoch": 1.1421404809573472, "grad_norm": 0.18931683897972107, "learning_rate": 9.1394648427932e-05, "loss": 1.9837, "step": 300070 }, { "epoch": 1.1421785434254699, "grad_norm": 0.26568910479545593, "learning_rate": 9.1343665165949e-05, "loss": 1.9771, "step": 300080 }, { "epoch": 1.1422166058935925, "grad_norm": 0.2429303526878357, "learning_rate": 9.129268826375681e-05, "loss": 1.9723, "step": 300090 }, { "epoch": 1.1422546683617152, "grad_norm": 0.16020800173282623, "learning_rate": 9.124171771897594e-05, "loss": 1.9659, "step": 300100 }, { "epoch": 1.1422927308298378, "grad_norm": 0.2040097862482071, "learning_rate": 9.119075352922852e-05, "loss": 1.9734, "step": 300110 }, { "epoch": 1.1423307932979605, "grad_norm": 0.23395033180713654, "learning_rate": 9.113979569213799e-05, "loss": 1.9724, "step": 300120 }, { "epoch": 1.1423688557660834, "grad_norm": 0.18835385143756866, "learning_rate": 9.108884420532948e-05, "loss": 1.9819, "step": 300130 }, { "epoch": 1.142406918234206, "grad_norm": 0.1681976020336151, "learning_rate": 9.10378990664294e-05, "loss": 1.9761, "step": 300140 }, { "epoch": 1.1424449807023287, "grad_norm": 0.17211827635765076, "learning_rate": 9.09869602730658e-05, "loss": 1.9749, "step": 300150 }, { "epoch": 1.1424830431704514, "grad_norm": 0.20787794888019562, "learning_rate": 9.093602782286809e-05, "loss": 1.9556, "step": 300160 }, { "epoch": 1.142521105638574, "grad_norm": 0.18131273984909058, "learning_rate": 9.088510171346731e-05, "loss": 1.9549, "step": 300170 }, { "epoch": 1.1425591681066967, "grad_norm": 0.20370520651340485, "learning_rate": 9.083418194249577e-05, "loss": 1.9759, "step": 300180 }, { "epoch": 1.1425972305748193, "grad_norm": 0.23574639856815338, "learning_rate": 9.07832685075874e-05, "loss": 1.9816, "step": 300190 }, { "epoch": 1.142635293042942, "grad_norm": 0.2160944789648056, "learning_rate": 9.073236140637748e-05, "loss": 1.9807, "step": 300200 }, { "epoch": 1.1426733555110649, "grad_norm": 0.18580316007137299, "learning_rate": 9.068146063650295e-05, "loss": 1.9964, "step": 300210 }, { "epoch": 1.1427114179791875, "grad_norm": 0.17532704770565033, "learning_rate": 9.063056619560206e-05, "loss": 1.9674, "step": 300220 }, { "epoch": 1.1427494804473102, "grad_norm": 0.16181550920009613, "learning_rate": 9.057967808131457e-05, "loss": 1.9736, "step": 300230 }, { "epoch": 1.1427875429154328, "grad_norm": 0.2239847034215927, "learning_rate": 9.052879629128174e-05, "loss": 1.9651, "step": 300240 }, { "epoch": 1.1428256053835555, "grad_norm": 0.15953215956687927, "learning_rate": 9.04779208231462e-05, "loss": 1.9786, "step": 300250 }, { "epoch": 1.1428636678516781, "grad_norm": 0.1677812933921814, "learning_rate": 9.042705167455212e-05, "loss": 1.9803, "step": 300260 }, { "epoch": 1.1429017303198008, "grad_norm": 0.16961197555065155, "learning_rate": 9.037618884314519e-05, "loss": 1.963, "step": 300270 }, { "epoch": 1.1429397927879235, "grad_norm": 0.18011970818042755, "learning_rate": 9.032533232657242e-05, "loss": 1.9596, "step": 300280 }, { "epoch": 1.1429778552560461, "grad_norm": 0.17349569499492645, "learning_rate": 9.027448212248241e-05, "loss": 1.9693, "step": 300290 }, { "epoch": 1.143015917724169, "grad_norm": 0.1612197458744049, "learning_rate": 9.022363822852514e-05, "loss": 1.981, "step": 300300 }, { "epoch": 1.1430539801922917, "grad_norm": 0.19596810638904572, "learning_rate": 9.017280064235206e-05, "loss": 1.9687, "step": 300310 }, { "epoch": 1.1430920426604143, "grad_norm": 0.22040875256061554, "learning_rate": 9.012196936161604e-05, "loss": 1.9715, "step": 300320 }, { "epoch": 1.143130105128537, "grad_norm": 0.16568516194820404, "learning_rate": 9.007114438397157e-05, "loss": 1.9823, "step": 300330 }, { "epoch": 1.1431681675966596, "grad_norm": 0.24007253348827362, "learning_rate": 9.002032570707441e-05, "loss": 1.9561, "step": 300340 }, { "epoch": 1.1432062300647823, "grad_norm": 0.15698036551475525, "learning_rate": 8.996951332858189e-05, "loss": 1.9642, "step": 300350 }, { "epoch": 1.143244292532905, "grad_norm": 0.2927793264389038, "learning_rate": 8.991870724615264e-05, "loss": 1.9689, "step": 300360 }, { "epoch": 1.1432823550010276, "grad_norm": 0.2039732038974762, "learning_rate": 8.986790745744694e-05, "loss": 1.9858, "step": 300370 }, { "epoch": 1.1433204174691505, "grad_norm": 0.18189412355422974, "learning_rate": 8.981711396012631e-05, "loss": 1.9597, "step": 300380 }, { "epoch": 1.1433584799372731, "grad_norm": 0.16770325601100922, "learning_rate": 8.976632675185392e-05, "loss": 1.963, "step": 300390 }, { "epoch": 1.1433965424053958, "grad_norm": 0.18383848667144775, "learning_rate": 8.971554583029428e-05, "loss": 1.9853, "step": 300400 }, { "epoch": 1.1434346048735184, "grad_norm": 0.16434144973754883, "learning_rate": 8.96647711931134e-05, "loss": 1.9733, "step": 300410 }, { "epoch": 1.143472667341641, "grad_norm": 0.159880593419075, "learning_rate": 8.961400283797855e-05, "loss": 1.9717, "step": 300420 }, { "epoch": 1.1435107298097638, "grad_norm": 0.21327455341815948, "learning_rate": 8.956324076255873e-05, "loss": 1.9518, "step": 300430 }, { "epoch": 1.1435487922778864, "grad_norm": 0.20327025651931763, "learning_rate": 8.951248496452414e-05, "loss": 1.9664, "step": 300440 }, { "epoch": 1.143586854746009, "grad_norm": 0.1809774935245514, "learning_rate": 8.946173544154663e-05, "loss": 1.97, "step": 300450 }, { "epoch": 1.1436249172141317, "grad_norm": 0.2367480844259262, "learning_rate": 8.941099219129923e-05, "loss": 1.9739, "step": 300460 }, { "epoch": 1.1436629796822546, "grad_norm": 0.2103966772556305, "learning_rate": 8.936025521145669e-05, "loss": 1.9825, "step": 300470 }, { "epoch": 1.1437010421503773, "grad_norm": 0.2010219842195511, "learning_rate": 8.930952449969499e-05, "loss": 1.9819, "step": 300480 }, { "epoch": 1.1437391046185, "grad_norm": 0.17966939508914948, "learning_rate": 8.925880005369158e-05, "loss": 1.9826, "step": 300490 }, { "epoch": 1.1437771670866226, "grad_norm": 0.19157104194164276, "learning_rate": 8.920808187112539e-05, "loss": 1.9739, "step": 300500 }, { "epoch": 1.1438152295547452, "grad_norm": 0.16343864798545837, "learning_rate": 8.915736994967688e-05, "loss": 1.957, "step": 300510 }, { "epoch": 1.143853292022868, "grad_norm": 0.23392702639102936, "learning_rate": 8.910666428702774e-05, "loss": 1.9744, "step": 300520 }, { "epoch": 1.1438913544909906, "grad_norm": 0.19292297959327698, "learning_rate": 8.905596488086121e-05, "loss": 1.9719, "step": 300530 }, { "epoch": 1.1439294169591132, "grad_norm": 0.1789822280406952, "learning_rate": 8.900527172886186e-05, "loss": 1.9835, "step": 300540 }, { "epoch": 1.143967479427236, "grad_norm": 0.18455876410007477, "learning_rate": 8.895458482871588e-05, "loss": 1.9684, "step": 300550 }, { "epoch": 1.1440055418953587, "grad_norm": 0.19163396954536438, "learning_rate": 8.890390417811068e-05, "loss": 1.9713, "step": 300560 }, { "epoch": 1.1440436043634814, "grad_norm": 0.16672499477863312, "learning_rate": 8.88532297747352e-05, "loss": 1.9732, "step": 300570 }, { "epoch": 1.144081666831604, "grad_norm": 0.16392666101455688, "learning_rate": 8.880256161627987e-05, "loss": 1.9744, "step": 300580 }, { "epoch": 1.1441197292997267, "grad_norm": 0.16160540282726288, "learning_rate": 8.87518997004363e-05, "loss": 1.9866, "step": 300590 }, { "epoch": 1.1441577917678494, "grad_norm": 0.1681511402130127, "learning_rate": 8.870124402489783e-05, "loss": 1.9804, "step": 300600 }, { "epoch": 1.144195854235972, "grad_norm": 0.16743887960910797, "learning_rate": 8.865059458735896e-05, "loss": 1.961, "step": 300610 }, { "epoch": 1.1442339167040947, "grad_norm": 0.1727055162191391, "learning_rate": 8.859995138551575e-05, "loss": 1.9758, "step": 300620 }, { "epoch": 1.1442719791722173, "grad_norm": 0.24352505803108215, "learning_rate": 8.85493144170657e-05, "loss": 1.974, "step": 300630 }, { "epoch": 1.14431004164034, "grad_norm": 0.3063167929649353, "learning_rate": 8.849868367970759e-05, "loss": 1.9736, "step": 300640 }, { "epoch": 1.1443481041084629, "grad_norm": 0.2421969473361969, "learning_rate": 8.844805917114179e-05, "loss": 1.9631, "step": 300650 }, { "epoch": 1.1443861665765855, "grad_norm": 0.2752586603164673, "learning_rate": 8.839744088906998e-05, "loss": 1.9775, "step": 300660 }, { "epoch": 1.1444242290447082, "grad_norm": 0.27569398283958435, "learning_rate": 8.834682883119516e-05, "loss": 1.9517, "step": 300670 }, { "epoch": 1.1444622915128309, "grad_norm": 0.20763832330703735, "learning_rate": 8.829622299522195e-05, "loss": 1.9607, "step": 300680 }, { "epoch": 1.1445003539809535, "grad_norm": 0.19430187344551086, "learning_rate": 8.824562337885627e-05, "loss": 1.9661, "step": 300690 }, { "epoch": 1.1445384164490762, "grad_norm": 0.1806371957063675, "learning_rate": 8.819502997980544e-05, "loss": 1.966, "step": 300700 }, { "epoch": 1.1445764789171988, "grad_norm": 0.17690540850162506, "learning_rate": 8.814444279577821e-05, "loss": 1.9626, "step": 300710 }, { "epoch": 1.1446145413853217, "grad_norm": 0.16142280399799347, "learning_rate": 8.809386182448475e-05, "loss": 1.9715, "step": 300720 }, { "epoch": 1.1446526038534444, "grad_norm": 0.2089589387178421, "learning_rate": 8.804328706363663e-05, "loss": 1.9686, "step": 300730 }, { "epoch": 1.144690666321567, "grad_norm": 0.19803060591220856, "learning_rate": 8.79927185109467e-05, "loss": 1.9662, "step": 300740 }, { "epoch": 1.1447287287896897, "grad_norm": 0.288856565952301, "learning_rate": 8.794215616412949e-05, "loss": 1.9652, "step": 300750 }, { "epoch": 1.1447667912578123, "grad_norm": 0.26154083013534546, "learning_rate": 8.789160002090063e-05, "loss": 1.9576, "step": 300760 }, { "epoch": 1.144804853725935, "grad_norm": 0.17871473729610443, "learning_rate": 8.784105007897741e-05, "loss": 1.9585, "step": 300770 }, { "epoch": 1.1448429161940576, "grad_norm": 0.2214268147945404, "learning_rate": 8.779050633607838e-05, "loss": 1.9551, "step": 300780 }, { "epoch": 1.1448809786621803, "grad_norm": 0.2183777391910553, "learning_rate": 8.773996878992341e-05, "loss": 1.9834, "step": 300790 }, { "epoch": 1.144919041130303, "grad_norm": 0.17460443079471588, "learning_rate": 8.768943743823393e-05, "loss": 1.9695, "step": 300800 }, { "epoch": 1.1449571035984256, "grad_norm": 0.17166626453399658, "learning_rate": 8.763891227873272e-05, "loss": 1.9736, "step": 300810 }, { "epoch": 1.1449951660665485, "grad_norm": 0.19382481276988983, "learning_rate": 8.758839330914398e-05, "loss": 1.9818, "step": 300820 }, { "epoch": 1.1450332285346712, "grad_norm": 0.2120654433965683, "learning_rate": 8.753788052719314e-05, "loss": 1.9526, "step": 300830 }, { "epoch": 1.1450712910027938, "grad_norm": 0.18074551224708557, "learning_rate": 8.748737393060723e-05, "loss": 1.9668, "step": 300840 }, { "epoch": 1.1451093534709165, "grad_norm": 0.16520459949970245, "learning_rate": 8.743687351711455e-05, "loss": 1.9662, "step": 300850 }, { "epoch": 1.1451474159390391, "grad_norm": 0.172722727060318, "learning_rate": 8.738637928444481e-05, "loss": 1.9783, "step": 300860 }, { "epoch": 1.1451854784071618, "grad_norm": 0.1552434265613556, "learning_rate": 8.73358912303292e-05, "loss": 1.9719, "step": 300870 }, { "epoch": 1.1452235408752844, "grad_norm": 0.21896371245384216, "learning_rate": 8.728540935250018e-05, "loss": 1.9716, "step": 300880 }, { "epoch": 1.1452616033434073, "grad_norm": 0.16902369260787964, "learning_rate": 8.723493364869161e-05, "loss": 1.9721, "step": 300890 }, { "epoch": 1.14529966581153, "grad_norm": 0.16694094240665436, "learning_rate": 8.718446411663882e-05, "loss": 1.9724, "step": 300900 }, { "epoch": 1.1453377282796526, "grad_norm": 0.24911977350711823, "learning_rate": 8.71340007540784e-05, "loss": 1.9756, "step": 300910 }, { "epoch": 1.1453757907477753, "grad_norm": 0.15349158644676208, "learning_rate": 8.708354355874842e-05, "loss": 1.9725, "step": 300920 }, { "epoch": 1.145413853215898, "grad_norm": 0.17509305477142334, "learning_rate": 8.703309252838832e-05, "loss": 1.9719, "step": 300930 }, { "epoch": 1.1454519156840206, "grad_norm": 0.16706439852714539, "learning_rate": 8.698264766073894e-05, "loss": 1.99, "step": 300940 }, { "epoch": 1.1454899781521433, "grad_norm": 0.1890660524368286, "learning_rate": 8.693220895354236e-05, "loss": 1.9681, "step": 300950 }, { "epoch": 1.145528040620266, "grad_norm": 0.1490461230278015, "learning_rate": 8.68817764045422e-05, "loss": 1.9789, "step": 300960 }, { "epoch": 1.1455661030883886, "grad_norm": 0.16561798751354218, "learning_rate": 8.68313500114834e-05, "loss": 1.9659, "step": 300970 }, { "epoch": 1.1456041655565112, "grad_norm": 0.17476516962051392, "learning_rate": 8.678092977211227e-05, "loss": 1.9652, "step": 300980 }, { "epoch": 1.1456422280246341, "grad_norm": 0.27982303500175476, "learning_rate": 8.67305156841765e-05, "loss": 1.978, "step": 300990 }, { "epoch": 1.1456802904927568, "grad_norm": 0.1696423441171646, "learning_rate": 8.668010774542517e-05, "loss": 1.9711, "step": 301000 }, { "epoch": 1.1457183529608794, "grad_norm": 0.17661775648593903, "learning_rate": 8.66297059536087e-05, "loss": 1.9582, "step": 301010 }, { "epoch": 1.145756415429002, "grad_norm": 0.1834118366241455, "learning_rate": 8.657931030647892e-05, "loss": 1.9677, "step": 301020 }, { "epoch": 1.1457944778971247, "grad_norm": 0.2517413794994354, "learning_rate": 8.652892080178893e-05, "loss": 1.9707, "step": 301030 }, { "epoch": 1.1458325403652474, "grad_norm": 0.20651330053806305, "learning_rate": 8.647853743729334e-05, "loss": 1.9786, "step": 301040 }, { "epoch": 1.14587060283337, "grad_norm": 0.18979932367801666, "learning_rate": 8.642816021074806e-05, "loss": 1.9777, "step": 301050 }, { "epoch": 1.145908665301493, "grad_norm": 0.1647159606218338, "learning_rate": 8.637778911991034e-05, "loss": 1.9661, "step": 301060 }, { "epoch": 1.1459467277696156, "grad_norm": 0.16355659067630768, "learning_rate": 8.632742416253892e-05, "loss": 1.978, "step": 301070 }, { "epoch": 1.1459847902377382, "grad_norm": 0.18683548271656036, "learning_rate": 8.627706533639367e-05, "loss": 1.9661, "step": 301080 }, { "epoch": 1.146022852705861, "grad_norm": 0.16159670054912567, "learning_rate": 8.62267126392361e-05, "loss": 1.9747, "step": 301090 }, { "epoch": 1.1460609151739836, "grad_norm": 0.1670175939798355, "learning_rate": 8.617636606882878e-05, "loss": 1.9645, "step": 301100 }, { "epoch": 1.1460989776421062, "grad_norm": 0.2345433384180069, "learning_rate": 8.612602562293592e-05, "loss": 1.9624, "step": 301110 }, { "epoch": 1.1461370401102289, "grad_norm": 0.1708359718322754, "learning_rate": 8.607569129932302e-05, "loss": 1.959, "step": 301120 }, { "epoch": 1.1461751025783515, "grad_norm": 0.17826248705387115, "learning_rate": 8.602536309575682e-05, "loss": 1.9683, "step": 301130 }, { "epoch": 1.1462131650464742, "grad_norm": 0.16386419534683228, "learning_rate": 8.597504101000547e-05, "loss": 1.9544, "step": 301140 }, { "epoch": 1.1462512275145968, "grad_norm": 0.2092752605676651, "learning_rate": 8.592472503983856e-05, "loss": 1.9565, "step": 301150 }, { "epoch": 1.1462892899827197, "grad_norm": 0.18252654373645782, "learning_rate": 8.587441518302686e-05, "loss": 1.9588, "step": 301160 }, { "epoch": 1.1463273524508424, "grad_norm": 0.19344913959503174, "learning_rate": 8.582411143734275e-05, "loss": 1.9642, "step": 301170 }, { "epoch": 1.146365414918965, "grad_norm": 0.14749857783317566, "learning_rate": 8.577381380055976e-05, "loss": 1.9716, "step": 301180 }, { "epoch": 1.1464034773870877, "grad_norm": 0.320700466632843, "learning_rate": 8.572352227045283e-05, "loss": 1.9729, "step": 301190 }, { "epoch": 1.1464415398552104, "grad_norm": 0.20049598813056946, "learning_rate": 8.567323684479821e-05, "loss": 1.9823, "step": 301200 }, { "epoch": 1.146479602323333, "grad_norm": 0.21233174204826355, "learning_rate": 8.56229575213736e-05, "loss": 1.9532, "step": 301210 }, { "epoch": 1.1465176647914557, "grad_norm": 0.1554403305053711, "learning_rate": 8.557268429795795e-05, "loss": 1.9611, "step": 301220 }, { "epoch": 1.1465557272595783, "grad_norm": 0.21042974293231964, "learning_rate": 8.552241717233167e-05, "loss": 1.9917, "step": 301230 }, { "epoch": 1.1465937897277012, "grad_norm": 0.23165065050125122, "learning_rate": 8.547215614227633e-05, "loss": 1.9802, "step": 301240 }, { "epoch": 1.1466318521958239, "grad_norm": 0.22600439190864563, "learning_rate": 8.542190120557503e-05, "loss": 1.9662, "step": 301250 }, { "epoch": 1.1466699146639465, "grad_norm": 0.1871139258146286, "learning_rate": 8.53716523600121e-05, "loss": 1.9679, "step": 301260 }, { "epoch": 1.1467079771320692, "grad_norm": 0.15982182323932648, "learning_rate": 8.532140960337325e-05, "loss": 1.9672, "step": 301270 }, { "epoch": 1.1467460396001918, "grad_norm": 0.17912767827510834, "learning_rate": 8.52711729334456e-05, "loss": 1.9752, "step": 301280 }, { "epoch": 1.1467841020683145, "grad_norm": 0.20834913849830627, "learning_rate": 8.522094234801742e-05, "loss": 1.9641, "step": 301290 }, { "epoch": 1.1468221645364371, "grad_norm": 0.2113134264945984, "learning_rate": 8.517071784487856e-05, "loss": 1.9671, "step": 301300 }, { "epoch": 1.1468602270045598, "grad_norm": 0.1577615588903427, "learning_rate": 8.51204994218201e-05, "loss": 1.9691, "step": 301310 }, { "epoch": 1.1468982894726825, "grad_norm": 0.2631106376647949, "learning_rate": 8.507028707663434e-05, "loss": 1.9665, "step": 301320 }, { "epoch": 1.1469363519408053, "grad_norm": 0.2079096883535385, "learning_rate": 8.502008080711504e-05, "loss": 1.97, "step": 301330 }, { "epoch": 1.146974414408928, "grad_norm": 0.2349461168050766, "learning_rate": 8.496988061105737e-05, "loss": 1.9595, "step": 301340 }, { "epoch": 1.1470124768770507, "grad_norm": 0.2771170139312744, "learning_rate": 8.491968648625759e-05, "loss": 1.9716, "step": 301350 }, { "epoch": 1.1470505393451733, "grad_norm": 0.1749347448348999, "learning_rate": 8.486949843051361e-05, "loss": 1.9601, "step": 301360 }, { "epoch": 1.147088601813296, "grad_norm": 0.17974738776683807, "learning_rate": 8.481931644162444e-05, "loss": 1.972, "step": 301370 }, { "epoch": 1.1471266642814186, "grad_norm": 0.1842113435268402, "learning_rate": 8.476914051739044e-05, "loss": 1.9564, "step": 301380 }, { "epoch": 1.1471647267495413, "grad_norm": 0.1851554960012436, "learning_rate": 8.471897065561335e-05, "loss": 1.9538, "step": 301390 }, { "epoch": 1.147202789217664, "grad_norm": 0.1589783877134323, "learning_rate": 8.466880685409628e-05, "loss": 1.9642, "step": 301400 }, { "epoch": 1.1472408516857868, "grad_norm": 0.1574392318725586, "learning_rate": 8.461864911064354e-05, "loss": 1.9566, "step": 301410 }, { "epoch": 1.1472789141539095, "grad_norm": 0.19300295412540436, "learning_rate": 8.456849742306095e-05, "loss": 1.9905, "step": 301420 }, { "epoch": 1.1473169766220321, "grad_norm": 0.19914722442626953, "learning_rate": 8.451835178915546e-05, "loss": 1.9676, "step": 301430 }, { "epoch": 1.1473550390901548, "grad_norm": 0.17109766602516174, "learning_rate": 8.446821220673545e-05, "loss": 1.9665, "step": 301440 }, { "epoch": 1.1473931015582775, "grad_norm": 0.17407475411891937, "learning_rate": 8.441807867361057e-05, "loss": 1.9642, "step": 301450 }, { "epoch": 1.1474311640264, "grad_norm": 0.1888018250465393, "learning_rate": 8.436795118759188e-05, "loss": 1.9629, "step": 301460 }, { "epoch": 1.1474692264945228, "grad_norm": 0.16494682431221008, "learning_rate": 8.431782974649166e-05, "loss": 1.9727, "step": 301470 }, { "epoch": 1.1475072889626454, "grad_norm": 0.1666668802499771, "learning_rate": 8.426771434812363e-05, "loss": 1.9768, "step": 301480 }, { "epoch": 1.147545351430768, "grad_norm": 0.1791200041770935, "learning_rate": 8.421760499030267e-05, "loss": 1.9882, "step": 301490 }, { "epoch": 1.1475834138988907, "grad_norm": 0.16579587757587433, "learning_rate": 8.416750167084503e-05, "loss": 1.9702, "step": 301500 }, { "epoch": 1.1476214763670136, "grad_norm": 0.16474118828773499, "learning_rate": 8.411740438756837e-05, "loss": 1.9715, "step": 301510 }, { "epoch": 1.1476595388351363, "grad_norm": 0.2248903065919876, "learning_rate": 8.406731313829158e-05, "loss": 1.9559, "step": 301520 }, { "epoch": 1.147697601303259, "grad_norm": 0.15927183628082275, "learning_rate": 8.401722792083483e-05, "loss": 1.975, "step": 301530 }, { "epoch": 1.1477356637713816, "grad_norm": 0.14855605363845825, "learning_rate": 8.396714873301969e-05, "loss": 1.9761, "step": 301540 }, { "epoch": 1.1477737262395042, "grad_norm": 0.1880766898393631, "learning_rate": 8.391707557266903e-05, "loss": 1.9772, "step": 301550 }, { "epoch": 1.147811788707627, "grad_norm": 0.17366820573806763, "learning_rate": 8.386700843760698e-05, "loss": 1.9774, "step": 301560 }, { "epoch": 1.1478498511757496, "grad_norm": 0.1661127507686615, "learning_rate": 8.381694732565898e-05, "loss": 1.9658, "step": 301570 }, { "epoch": 1.1478879136438724, "grad_norm": 0.18777918815612793, "learning_rate": 8.376689223465179e-05, "loss": 1.9882, "step": 301580 }, { "epoch": 1.147925976111995, "grad_norm": 0.2077363282442093, "learning_rate": 8.371684316241346e-05, "loss": 1.968, "step": 301590 }, { "epoch": 1.1479640385801178, "grad_norm": 0.23291166126728058, "learning_rate": 8.366680010677346e-05, "loss": 1.9749, "step": 301600 }, { "epoch": 1.1480021010482404, "grad_norm": 0.19629411399364471, "learning_rate": 8.361676306556238e-05, "loss": 1.9864, "step": 301610 }, { "epoch": 1.148040163516363, "grad_norm": 0.18168382346630096, "learning_rate": 8.356673203661231e-05, "loss": 1.9607, "step": 301620 }, { "epoch": 1.1480782259844857, "grad_norm": 0.18806378543376923, "learning_rate": 8.351670701775644e-05, "loss": 1.974, "step": 301630 }, { "epoch": 1.1481162884526084, "grad_norm": 0.157528817653656, "learning_rate": 8.346668800682944e-05, "loss": 1.961, "step": 301640 }, { "epoch": 1.148154350920731, "grad_norm": 0.17656798660755157, "learning_rate": 8.341667500166706e-05, "loss": 1.952, "step": 301650 }, { "epoch": 1.1481924133888537, "grad_norm": 0.27359622716903687, "learning_rate": 8.336666800010667e-05, "loss": 1.9646, "step": 301660 }, { "epoch": 1.1482304758569764, "grad_norm": 0.17596830427646637, "learning_rate": 8.33166669999867e-05, "loss": 1.9712, "step": 301670 }, { "epoch": 1.1482685383250992, "grad_norm": 0.16874903440475464, "learning_rate": 8.326667199914684e-05, "loss": 1.972, "step": 301680 }, { "epoch": 1.1483066007932219, "grad_norm": 0.20028790831565857, "learning_rate": 8.321668299542828e-05, "loss": 1.9815, "step": 301690 }, { "epoch": 1.1483446632613445, "grad_norm": 0.15614601969718933, "learning_rate": 8.316669998667337e-05, "loss": 1.9696, "step": 301700 }, { "epoch": 1.1483827257294672, "grad_norm": 0.2561275064945221, "learning_rate": 8.31167229707257e-05, "loss": 1.9636, "step": 301710 }, { "epoch": 1.1484207881975899, "grad_norm": 0.1641322672367096, "learning_rate": 8.306675194543029e-05, "loss": 1.9527, "step": 301720 }, { "epoch": 1.1484588506657125, "grad_norm": 0.177987203001976, "learning_rate": 8.301678690863346e-05, "loss": 1.9626, "step": 301730 }, { "epoch": 1.1484969131338352, "grad_norm": 0.18081584572792053, "learning_rate": 8.296682785818266e-05, "loss": 1.9683, "step": 301740 }, { "epoch": 1.148534975601958, "grad_norm": 0.16380837559700012, "learning_rate": 8.291687479192673e-05, "loss": 1.9854, "step": 301750 }, { "epoch": 1.1485730380700807, "grad_norm": 0.1782199740409851, "learning_rate": 8.286692770771581e-05, "loss": 1.9782, "step": 301760 }, { "epoch": 1.1486111005382034, "grad_norm": 0.19196809828281403, "learning_rate": 8.281698660340126e-05, "loss": 1.978, "step": 301770 }, { "epoch": 1.148649163006326, "grad_norm": 0.28066837787628174, "learning_rate": 8.276705147683583e-05, "loss": 1.9707, "step": 301780 }, { "epoch": 1.1486872254744487, "grad_norm": 0.22474101185798645, "learning_rate": 8.271712232587353e-05, "loss": 1.968, "step": 301790 }, { "epoch": 1.1487252879425713, "grad_norm": 0.18222326040267944, "learning_rate": 8.266719914836951e-05, "loss": 1.9798, "step": 301800 }, { "epoch": 1.148763350410694, "grad_norm": 0.173833966255188, "learning_rate": 8.261728194218038e-05, "loss": 1.9731, "step": 301810 }, { "epoch": 1.1488014128788167, "grad_norm": 0.16559606790542603, "learning_rate": 8.256737070516396e-05, "loss": 1.9481, "step": 301820 }, { "epoch": 1.1488394753469393, "grad_norm": 0.15625733137130737, "learning_rate": 8.251746543517935e-05, "loss": 1.9775, "step": 301830 }, { "epoch": 1.148877537815062, "grad_norm": 0.1697421818971634, "learning_rate": 8.246756613008694e-05, "loss": 1.9711, "step": 301840 }, { "epoch": 1.1489156002831848, "grad_norm": 0.24292589724063873, "learning_rate": 8.241767278774837e-05, "loss": 1.9604, "step": 301850 }, { "epoch": 1.1489536627513075, "grad_norm": 0.1861419975757599, "learning_rate": 8.236778540602657e-05, "loss": 1.9623, "step": 301860 }, { "epoch": 1.1489917252194302, "grad_norm": 0.17582234740257263, "learning_rate": 8.231790398278582e-05, "loss": 1.9622, "step": 301870 }, { "epoch": 1.1490297876875528, "grad_norm": 0.16839978098869324, "learning_rate": 8.226802851589154e-05, "loss": 1.9493, "step": 301880 }, { "epoch": 1.1490678501556755, "grad_norm": 0.16905924677848816, "learning_rate": 8.221815900321056e-05, "loss": 1.966, "step": 301890 }, { "epoch": 1.1491059126237981, "grad_norm": 0.17358462512493134, "learning_rate": 8.216829544261084e-05, "loss": 1.9735, "step": 301900 }, { "epoch": 1.1491439750919208, "grad_norm": 0.22775432467460632, "learning_rate": 8.211843783196177e-05, "loss": 1.9678, "step": 301910 }, { "epoch": 1.1491820375600437, "grad_norm": 0.17102882266044617, "learning_rate": 8.206858616913388e-05, "loss": 1.9719, "step": 301920 }, { "epoch": 1.1492201000281663, "grad_norm": 0.17452996969223022, "learning_rate": 8.201874045199909e-05, "loss": 1.9729, "step": 301930 }, { "epoch": 1.149258162496289, "grad_norm": 0.15462011098861694, "learning_rate": 8.196890067843038e-05, "loss": 1.959, "step": 301940 }, { "epoch": 1.1492962249644116, "grad_norm": 0.20610308647155762, "learning_rate": 8.191906684630224e-05, "loss": 1.9652, "step": 301950 }, { "epoch": 1.1493342874325343, "grad_norm": 0.16314344108104706, "learning_rate": 8.186923895349036e-05, "loss": 1.9803, "step": 301960 }, { "epoch": 1.149372349900657, "grad_norm": 0.15605981647968292, "learning_rate": 8.18194169978716e-05, "loss": 1.9577, "step": 301970 }, { "epoch": 1.1494104123687796, "grad_norm": 0.1960136443376541, "learning_rate": 8.176960097732416e-05, "loss": 1.9453, "step": 301980 }, { "epoch": 1.1494484748369023, "grad_norm": 0.16937169432640076, "learning_rate": 8.171979088972747e-05, "loss": 1.9588, "step": 301990 }, { "epoch": 1.149486537305025, "grad_norm": 0.1623869091272354, "learning_rate": 8.166998673296222e-05, "loss": 1.9786, "step": 302000 }, { "epoch": 1.1495245997731476, "grad_norm": 0.1552773416042328, "learning_rate": 8.162018850491043e-05, "loss": 1.9606, "step": 302010 }, { "epoch": 1.1495626622412705, "grad_norm": 0.1646445244550705, "learning_rate": 8.157039620345536e-05, "loss": 1.9741, "step": 302020 }, { "epoch": 1.1496007247093931, "grad_norm": 0.18836228549480438, "learning_rate": 8.15206098264814e-05, "loss": 1.9618, "step": 302030 }, { "epoch": 1.1496387871775158, "grad_norm": 0.1809222400188446, "learning_rate": 8.147082937187439e-05, "loss": 1.9648, "step": 302040 }, { "epoch": 1.1496768496456384, "grad_norm": 0.16798196732997894, "learning_rate": 8.142105483752133e-05, "loss": 1.9748, "step": 302050 }, { "epoch": 1.149714912113761, "grad_norm": 0.17388495802879333, "learning_rate": 8.137128622131046e-05, "loss": 1.9723, "step": 302060 }, { "epoch": 1.1497529745818837, "grad_norm": 0.15599116683006287, "learning_rate": 8.132152352113121e-05, "loss": 1.963, "step": 302070 }, { "epoch": 1.1497910370500064, "grad_norm": 0.1911918967962265, "learning_rate": 8.127176673487451e-05, "loss": 1.9852, "step": 302080 }, { "epoch": 1.149829099518129, "grad_norm": 0.16465184092521667, "learning_rate": 8.122201586043231e-05, "loss": 1.9665, "step": 302090 }, { "epoch": 1.149867161986252, "grad_norm": 0.1780809909105301, "learning_rate": 8.11722708956979e-05, "loss": 1.9684, "step": 302100 }, { "epoch": 1.1499052244543746, "grad_norm": 0.20676323771476746, "learning_rate": 8.112253183856577e-05, "loss": 1.9681, "step": 302110 }, { "epoch": 1.1499432869224973, "grad_norm": 0.19733954966068268, "learning_rate": 8.107279868693179e-05, "loss": 1.968, "step": 302120 }, { "epoch": 1.14998134939062, "grad_norm": 0.17312826216220856, "learning_rate": 8.102307143869287e-05, "loss": 1.9565, "step": 302130 }, { "epoch": 1.1500194118587426, "grad_norm": 0.2496890276670456, "learning_rate": 8.097335009174734e-05, "loss": 1.9626, "step": 302140 }, { "epoch": 1.1500574743268652, "grad_norm": 0.18430748581886292, "learning_rate": 8.092363464399477e-05, "loss": 1.9772, "step": 302150 }, { "epoch": 1.1500955367949879, "grad_norm": 0.16753272712230682, "learning_rate": 8.087392509333585e-05, "loss": 1.9665, "step": 302160 }, { "epoch": 1.1501335992631105, "grad_norm": 0.18290819227695465, "learning_rate": 8.08242214376726e-05, "loss": 1.9639, "step": 302170 }, { "epoch": 1.1501716617312332, "grad_norm": 0.24969084560871124, "learning_rate": 8.077452367490834e-05, "loss": 1.9856, "step": 302180 }, { "epoch": 1.150209724199356, "grad_norm": 0.16676537692546844, "learning_rate": 8.072483180294748e-05, "loss": 1.955, "step": 302190 }, { "epoch": 1.1502477866674787, "grad_norm": 0.19444943964481354, "learning_rate": 8.067514581969587e-05, "loss": 1.9693, "step": 302200 }, { "epoch": 1.1502858491356014, "grad_norm": 0.1640620082616806, "learning_rate": 8.06254657230604e-05, "loss": 1.9651, "step": 302210 }, { "epoch": 1.150323911603724, "grad_norm": 0.21142394840717316, "learning_rate": 8.057579151094929e-05, "loss": 1.9714, "step": 302220 }, { "epoch": 1.1503619740718467, "grad_norm": 0.2291944921016693, "learning_rate": 8.052612318127206e-05, "loss": 1.966, "step": 302230 }, { "epoch": 1.1504000365399694, "grad_norm": 0.16705965995788574, "learning_rate": 8.047646073193937e-05, "loss": 1.9574, "step": 302240 }, { "epoch": 1.150438099008092, "grad_norm": 0.19383689761161804, "learning_rate": 8.042680416086318e-05, "loss": 1.9416, "step": 302250 }, { "epoch": 1.1504761614762147, "grad_norm": 0.18747715651988983, "learning_rate": 8.037715346595658e-05, "loss": 1.9612, "step": 302260 }, { "epoch": 1.1505142239443376, "grad_norm": 0.15447022020816803, "learning_rate": 8.03275086451341e-05, "loss": 1.9681, "step": 302270 }, { "epoch": 1.1505522864124602, "grad_norm": 0.17916887998580933, "learning_rate": 8.02778696963113e-05, "loss": 1.973, "step": 302280 }, { "epoch": 1.1505903488805829, "grad_norm": 0.19271798431873322, "learning_rate": 8.022823661740497e-05, "loss": 1.9645, "step": 302290 }, { "epoch": 1.1506284113487055, "grad_norm": 0.4011487066745758, "learning_rate": 8.017860940633337e-05, "loss": 1.9622, "step": 302300 }, { "epoch": 1.1506664738168282, "grad_norm": 0.20441265404224396, "learning_rate": 8.012898806101571e-05, "loss": 1.9643, "step": 302310 }, { "epoch": 1.1507045362849508, "grad_norm": 0.20115308463573456, "learning_rate": 8.007937257937264e-05, "loss": 1.9782, "step": 302320 }, { "epoch": 1.1507425987530735, "grad_norm": 0.1862449198961258, "learning_rate": 8.00297629593259e-05, "loss": 1.977, "step": 302330 }, { "epoch": 1.1507806612211962, "grad_norm": 0.19222941994667053, "learning_rate": 7.998015919879848e-05, "loss": 1.9711, "step": 302340 }, { "epoch": 1.1508187236893188, "grad_norm": 0.19031883776187897, "learning_rate": 7.993056129571469e-05, "loss": 1.9633, "step": 302350 }, { "epoch": 1.1508567861574415, "grad_norm": 0.1577989161014557, "learning_rate": 7.988096924799992e-05, "loss": 1.9626, "step": 302360 }, { "epoch": 1.1508948486255643, "grad_norm": 0.1963740736246109, "learning_rate": 7.98313830535809e-05, "loss": 1.9553, "step": 302370 }, { "epoch": 1.150932911093687, "grad_norm": 0.19436369836330414, "learning_rate": 7.978180271038555e-05, "loss": 1.9551, "step": 302380 }, { "epoch": 1.1509709735618097, "grad_norm": 0.16641823947429657, "learning_rate": 7.973222821634301e-05, "loss": 1.9818, "step": 302390 }, { "epoch": 1.1510090360299323, "grad_norm": 0.15929053723812103, "learning_rate": 7.968265956938365e-05, "loss": 1.9607, "step": 302400 }, { "epoch": 1.151047098498055, "grad_norm": 0.18515542149543762, "learning_rate": 7.963309676743897e-05, "loss": 1.9773, "step": 302410 }, { "epoch": 1.1510851609661776, "grad_norm": 0.1987186074256897, "learning_rate": 7.958353980844186e-05, "loss": 1.962, "step": 302420 }, { "epoch": 1.1511232234343003, "grad_norm": 0.16314023733139038, "learning_rate": 7.953398869032624e-05, "loss": 1.9514, "step": 302430 }, { "epoch": 1.1511612859024232, "grad_norm": 0.23564612865447998, "learning_rate": 7.948444341102751e-05, "loss": 1.9583, "step": 302440 }, { "epoch": 1.1511993483705458, "grad_norm": 0.1620829701423645, "learning_rate": 7.943490396848196e-05, "loss": 1.9682, "step": 302450 }, { "epoch": 1.1512374108386685, "grad_norm": 0.16987600922584534, "learning_rate": 7.938537036062731e-05, "loss": 1.9683, "step": 302460 }, { "epoch": 1.1512754733067911, "grad_norm": 0.1522493213415146, "learning_rate": 7.933584258540244e-05, "loss": 1.9538, "step": 302470 }, { "epoch": 1.1513135357749138, "grad_norm": 0.2353200763463974, "learning_rate": 7.928632064074742e-05, "loss": 1.9714, "step": 302480 }, { "epoch": 1.1513515982430365, "grad_norm": 0.16485443711280823, "learning_rate": 7.923680452460358e-05, "loss": 1.9737, "step": 302490 }, { "epoch": 1.1513896607111591, "grad_norm": 0.16668550670146942, "learning_rate": 7.918729423491338e-05, "loss": 1.9749, "step": 302500 }, { "epoch": 1.1514277231792818, "grad_norm": 0.20765995979309082, "learning_rate": 7.913778976962066e-05, "loss": 1.9772, "step": 302510 }, { "epoch": 1.1514657856474044, "grad_norm": 0.20101527869701385, "learning_rate": 7.908829112667027e-05, "loss": 1.9733, "step": 302520 }, { "epoch": 1.151503848115527, "grad_norm": 0.2320103794336319, "learning_rate": 7.90387983040084e-05, "loss": 1.9716, "step": 302530 }, { "epoch": 1.15154191058365, "grad_norm": 0.17331230640411377, "learning_rate": 7.898931129958232e-05, "loss": 1.9588, "step": 302540 }, { "epoch": 1.1515799730517726, "grad_norm": 0.16984319686889648, "learning_rate": 7.893983011134065e-05, "loss": 1.9793, "step": 302550 }, { "epoch": 1.1516180355198953, "grad_norm": 0.23322752118110657, "learning_rate": 7.889035473723316e-05, "loss": 1.9725, "step": 302560 }, { "epoch": 1.151656097988018, "grad_norm": 0.16987261176109314, "learning_rate": 7.884088517521077e-05, "loss": 1.9758, "step": 302570 }, { "epoch": 1.1516941604561406, "grad_norm": 0.17440181970596313, "learning_rate": 7.87914214232257e-05, "loss": 1.9754, "step": 302580 }, { "epoch": 1.1517322229242632, "grad_norm": 0.175144761800766, "learning_rate": 7.874196347923124e-05, "loss": 1.9655, "step": 302590 }, { "epoch": 1.151770285392386, "grad_norm": 0.15603378415107727, "learning_rate": 7.869251134118211e-05, "loss": 1.9788, "step": 302600 }, { "epoch": 1.1518083478605088, "grad_norm": 0.16844221949577332, "learning_rate": 7.864306500703388e-05, "loss": 1.9644, "step": 302610 }, { "epoch": 1.1518464103286314, "grad_norm": 0.21338878571987152, "learning_rate": 7.859362447474367e-05, "loss": 1.9785, "step": 302620 }, { "epoch": 1.151884472796754, "grad_norm": 0.2597297728061676, "learning_rate": 7.854418974226968e-05, "loss": 1.9643, "step": 302630 }, { "epoch": 1.1519225352648768, "grad_norm": 0.1646358072757721, "learning_rate": 7.849476080757112e-05, "loss": 1.9587, "step": 302640 }, { "epoch": 1.1519605977329994, "grad_norm": 0.17903150618076324, "learning_rate": 7.844533766860873e-05, "loss": 1.9807, "step": 302650 }, { "epoch": 1.151998660201122, "grad_norm": 0.20694409310817719, "learning_rate": 7.839592032334414e-05, "loss": 1.9693, "step": 302660 }, { "epoch": 1.1520367226692447, "grad_norm": 0.16047678887844086, "learning_rate": 7.834650876974037e-05, "loss": 1.9586, "step": 302670 }, { "epoch": 1.1520747851373674, "grad_norm": 0.16605503857135773, "learning_rate": 7.829710300576149e-05, "loss": 1.9728, "step": 302680 }, { "epoch": 1.15211284760549, "grad_norm": 0.20652402937412262, "learning_rate": 7.824770302937296e-05, "loss": 1.9672, "step": 302690 }, { "epoch": 1.1521509100736127, "grad_norm": 0.27568838000297546, "learning_rate": 7.819830883854123e-05, "loss": 1.9635, "step": 302700 }, { "epoch": 1.1521889725417356, "grad_norm": 0.21026568114757538, "learning_rate": 7.814892043123401e-05, "loss": 1.9642, "step": 302710 }, { "epoch": 1.1522270350098582, "grad_norm": 0.1902541071176529, "learning_rate": 7.809953780542029e-05, "loss": 1.9524, "step": 302720 }, { "epoch": 1.152265097477981, "grad_norm": 0.2355373501777649, "learning_rate": 7.805016095907009e-05, "loss": 1.9549, "step": 302730 }, { "epoch": 1.1523031599461036, "grad_norm": 0.2074352353811264, "learning_rate": 7.800078989015475e-05, "loss": 1.965, "step": 302740 }, { "epoch": 1.1523412224142262, "grad_norm": 0.1499231606721878, "learning_rate": 7.795142459664667e-05, "loss": 1.9625, "step": 302750 }, { "epoch": 1.1523792848823489, "grad_norm": 0.17020770907402039, "learning_rate": 7.790206507651964e-05, "loss": 1.9787, "step": 302760 }, { "epoch": 1.1524173473504715, "grad_norm": 0.2170293778181076, "learning_rate": 7.78527113277484e-05, "loss": 1.9478, "step": 302770 }, { "epoch": 1.1524554098185944, "grad_norm": 0.15761138498783112, "learning_rate": 7.780336334830901e-05, "loss": 1.9772, "step": 302780 }, { "epoch": 1.152493472286717, "grad_norm": 0.15958812832832336, "learning_rate": 7.775402113617869e-05, "loss": 1.9716, "step": 302790 }, { "epoch": 1.1525315347548397, "grad_norm": 0.16891001164913177, "learning_rate": 7.770468468933572e-05, "loss": 1.9664, "step": 302800 }, { "epoch": 1.1525695972229624, "grad_norm": 0.20234356820583344, "learning_rate": 7.76553540057599e-05, "loss": 1.97, "step": 302810 }, { "epoch": 1.152607659691085, "grad_norm": 0.19146844744682312, "learning_rate": 7.760602908343178e-05, "loss": 1.9494, "step": 302820 }, { "epoch": 1.1526457221592077, "grad_norm": 0.15346533060073853, "learning_rate": 7.755670992033342e-05, "loss": 1.9651, "step": 302830 }, { "epoch": 1.1526837846273303, "grad_norm": 0.1874997764825821, "learning_rate": 7.750739651444782e-05, "loss": 1.9729, "step": 302840 }, { "epoch": 1.152721847095453, "grad_norm": 0.1829904466867447, "learning_rate": 7.745808886375938e-05, "loss": 1.9689, "step": 302850 }, { "epoch": 1.1527599095635757, "grad_norm": 0.18967437744140625, "learning_rate": 7.740878696625348e-05, "loss": 1.9536, "step": 302860 }, { "epoch": 1.1527979720316983, "grad_norm": 0.2254030406475067, "learning_rate": 7.735949081991678e-05, "loss": 1.9757, "step": 302870 }, { "epoch": 1.1528360344998212, "grad_norm": 0.18517284095287323, "learning_rate": 7.731020042273713e-05, "loss": 1.9689, "step": 302880 }, { "epoch": 1.1528740969679439, "grad_norm": 0.16112570464611053, "learning_rate": 7.726091577270344e-05, "loss": 1.9783, "step": 302890 }, { "epoch": 1.1529121594360665, "grad_norm": 0.18462646007537842, "learning_rate": 7.721163686780592e-05, "loss": 1.9542, "step": 302900 }, { "epoch": 1.1529502219041892, "grad_norm": 0.16934382915496826, "learning_rate": 7.716236370603591e-05, "loss": 1.9459, "step": 302910 }, { "epoch": 1.1529882843723118, "grad_norm": 0.25239670276641846, "learning_rate": 7.711309628538582e-05, "loss": 1.9621, "step": 302920 }, { "epoch": 1.1530263468404345, "grad_norm": 0.24700672924518585, "learning_rate": 7.706383460384947e-05, "loss": 1.975, "step": 302930 }, { "epoch": 1.1530644093085571, "grad_norm": 0.19068533182144165, "learning_rate": 7.701457865942157e-05, "loss": 1.9715, "step": 302940 }, { "epoch": 1.1531024717766798, "grad_norm": 0.1721537709236145, "learning_rate": 7.696532845009818e-05, "loss": 1.9569, "step": 302950 }, { "epoch": 1.1531405342448027, "grad_norm": 0.1698552519083023, "learning_rate": 7.691608397387639e-05, "loss": 1.9763, "step": 302960 }, { "epoch": 1.1531785967129253, "grad_norm": 0.22980883717536926, "learning_rate": 7.686684522875464e-05, "loss": 1.9647, "step": 302970 }, { "epoch": 1.153216659181048, "grad_norm": 0.17749278247356415, "learning_rate": 7.681761221273237e-05, "loss": 1.9646, "step": 302980 }, { "epoch": 1.1532547216491706, "grad_norm": 0.1919885128736496, "learning_rate": 7.676838492381028e-05, "loss": 1.9735, "step": 302990 }, { "epoch": 1.1532927841172933, "grad_norm": 0.17048048973083496, "learning_rate": 7.671916335999019e-05, "loss": 1.9717, "step": 303000 }, { "epoch": 1.153330846585416, "grad_norm": 0.17630760371685028, "learning_rate": 7.666994751927508e-05, "loss": 1.9608, "step": 303010 }, { "epoch": 1.1533689090535386, "grad_norm": 0.2684946358203888, "learning_rate": 7.662073739966907e-05, "loss": 1.9576, "step": 303020 }, { "epoch": 1.1534069715216613, "grad_norm": 0.2561207115650177, "learning_rate": 7.657153299917746e-05, "loss": 1.9509, "step": 303030 }, { "epoch": 1.153445033989784, "grad_norm": 0.16998520493507385, "learning_rate": 7.652233431580674e-05, "loss": 1.9554, "step": 303040 }, { "epoch": 1.1534830964579068, "grad_norm": 0.16677066683769226, "learning_rate": 7.647314134756462e-05, "loss": 1.9666, "step": 303050 }, { "epoch": 1.1535211589260295, "grad_norm": 0.19740892946720123, "learning_rate": 7.642395409245972e-05, "loss": 1.9617, "step": 303060 }, { "epoch": 1.1535592213941521, "grad_norm": 0.16956956684589386, "learning_rate": 7.637477254850211e-05, "loss": 1.9576, "step": 303070 }, { "epoch": 1.1535972838622748, "grad_norm": 0.18850646913051605, "learning_rate": 7.632559671370282e-05, "loss": 1.9697, "step": 303080 }, { "epoch": 1.1536353463303974, "grad_norm": 0.23317211866378784, "learning_rate": 7.627642658607408e-05, "loss": 1.9564, "step": 303090 }, { "epoch": 1.15367340879852, "grad_norm": 0.20958870649337769, "learning_rate": 7.622726216362935e-05, "loss": 1.9667, "step": 303100 }, { "epoch": 1.1537114712666428, "grad_norm": 0.17249689996242523, "learning_rate": 7.617810344438313e-05, "loss": 1.9644, "step": 303110 }, { "epoch": 1.1537495337347654, "grad_norm": 0.17346225678920746, "learning_rate": 7.612895042635115e-05, "loss": 1.9599, "step": 303120 }, { "epoch": 1.1537875962028883, "grad_norm": 0.20161131024360657, "learning_rate": 7.60798031075503e-05, "loss": 1.9571, "step": 303130 }, { "epoch": 1.153825658671011, "grad_norm": 0.21738913655281067, "learning_rate": 7.603066148599852e-05, "loss": 1.9649, "step": 303140 }, { "epoch": 1.1538637211391336, "grad_norm": 0.18385431170463562, "learning_rate": 7.598152555971499e-05, "loss": 1.9635, "step": 303150 }, { "epoch": 1.1539017836072563, "grad_norm": 0.16730673611164093, "learning_rate": 7.593239532671992e-05, "loss": 1.9565, "step": 303160 }, { "epoch": 1.153939846075379, "grad_norm": 0.2135828733444214, "learning_rate": 7.588327078503493e-05, "loss": 1.9588, "step": 303170 }, { "epoch": 1.1539779085435016, "grad_norm": 0.16073031723499298, "learning_rate": 7.58341519326825e-05, "loss": 1.9711, "step": 303180 }, { "epoch": 1.1540159710116242, "grad_norm": 0.19444327056407928, "learning_rate": 7.578503876768639e-05, "loss": 1.9652, "step": 303190 }, { "epoch": 1.1540540334797469, "grad_norm": 0.23536929488182068, "learning_rate": 7.573593128807149e-05, "loss": 1.9553, "step": 303200 }, { "epoch": 1.1540920959478695, "grad_norm": 0.18366196751594543, "learning_rate": 7.568682949186378e-05, "loss": 1.9519, "step": 303210 }, { "epoch": 1.1541301584159922, "grad_norm": 0.15574891865253448, "learning_rate": 7.563773337709045e-05, "loss": 1.966, "step": 303220 }, { "epoch": 1.154168220884115, "grad_norm": 0.15715356171131134, "learning_rate": 7.558864294177986e-05, "loss": 1.962, "step": 303230 }, { "epoch": 1.1542062833522377, "grad_norm": 0.180779829621315, "learning_rate": 7.553955818396141e-05, "loss": 1.9712, "step": 303240 }, { "epoch": 1.1542443458203604, "grad_norm": 0.21225744485855103, "learning_rate": 7.549047910166567e-05, "loss": 1.9687, "step": 303250 }, { "epoch": 1.154282408288483, "grad_norm": 0.19442957639694214, "learning_rate": 7.54414056929244e-05, "loss": 1.9653, "step": 303260 }, { "epoch": 1.1543204707566057, "grad_norm": 0.2081403285264969, "learning_rate": 7.539233795577044e-05, "loss": 1.9541, "step": 303270 }, { "epoch": 1.1543585332247284, "grad_norm": 0.20303623378276825, "learning_rate": 7.534327588823775e-05, "loss": 1.9758, "step": 303280 }, { "epoch": 1.154396595692851, "grad_norm": 0.18361127376556396, "learning_rate": 7.529421948836157e-05, "loss": 1.9541, "step": 303290 }, { "epoch": 1.154434658160974, "grad_norm": 0.16972817480564117, "learning_rate": 7.524516875417809e-05, "loss": 1.9627, "step": 303300 }, { "epoch": 1.1544727206290966, "grad_norm": 0.17593394219875336, "learning_rate": 7.51961236837247e-05, "loss": 1.9843, "step": 303310 }, { "epoch": 1.1545107830972192, "grad_norm": 0.16857187449932098, "learning_rate": 7.514708427503997e-05, "loss": 1.9515, "step": 303320 }, { "epoch": 1.1545488455653419, "grad_norm": 0.1726546436548233, "learning_rate": 7.509805052616358e-05, "loss": 1.9772, "step": 303330 }, { "epoch": 1.1545869080334645, "grad_norm": 0.17818816006183624, "learning_rate": 7.504902243513623e-05, "loss": 1.9621, "step": 303340 }, { "epoch": 1.1546249705015872, "grad_norm": 0.15913088619709015, "learning_rate": 7.500000000000001e-05, "loss": 1.9798, "step": 303350 }, { "epoch": 1.1546630329697098, "grad_norm": 0.17664912343025208, "learning_rate": 7.495098321879785e-05, "loss": 1.9676, "step": 303360 }, { "epoch": 1.1547010954378325, "grad_norm": 0.16069193184375763, "learning_rate": 7.4901972089574e-05, "loss": 1.9605, "step": 303370 }, { "epoch": 1.1547391579059552, "grad_norm": 0.17986124753952026, "learning_rate": 7.485296661037371e-05, "loss": 1.9573, "step": 303380 }, { "epoch": 1.1547772203740778, "grad_norm": 0.19635865092277527, "learning_rate": 7.48039667792435e-05, "loss": 1.9727, "step": 303390 }, { "epoch": 1.1548152828422007, "grad_norm": 0.22032029926776886, "learning_rate": 7.475497259423086e-05, "loss": 1.954, "step": 303400 }, { "epoch": 1.1548533453103234, "grad_norm": 0.22101953625679016, "learning_rate": 7.470598405338452e-05, "loss": 1.9699, "step": 303410 }, { "epoch": 1.154891407778446, "grad_norm": 0.21217355132102966, "learning_rate": 7.465700115475433e-05, "loss": 1.9597, "step": 303420 }, { "epoch": 1.1549294702465687, "grad_norm": 0.24058283865451813, "learning_rate": 7.460802389639115e-05, "loss": 1.9757, "step": 303430 }, { "epoch": 1.1549675327146913, "grad_norm": 0.1930130273103714, "learning_rate": 7.455905227634707e-05, "loss": 1.9633, "step": 303440 }, { "epoch": 1.155005595182814, "grad_norm": 0.24008707702159882, "learning_rate": 7.451008629267525e-05, "loss": 1.9592, "step": 303450 }, { "epoch": 1.1550436576509366, "grad_norm": 0.18197044730186462, "learning_rate": 7.446112594343002e-05, "loss": 1.9622, "step": 303460 }, { "epoch": 1.1550817201190595, "grad_norm": 0.1568765491247177, "learning_rate": 7.441217122666678e-05, "loss": 1.9822, "step": 303470 }, { "epoch": 1.1551197825871822, "grad_norm": 0.19632264971733093, "learning_rate": 7.436322214044206e-05, "loss": 1.9763, "step": 303480 }, { "epoch": 1.1551578450553048, "grad_norm": 0.17583273351192474, "learning_rate": 7.431427868281354e-05, "loss": 1.9582, "step": 303490 }, { "epoch": 1.1551959075234275, "grad_norm": 0.24570678174495697, "learning_rate": 7.426534085184e-05, "loss": 1.9578, "step": 303500 }, { "epoch": 1.1552339699915501, "grad_norm": 0.17563122510910034, "learning_rate": 7.421640864558126e-05, "loss": 1.96, "step": 303510 }, { "epoch": 1.1552720324596728, "grad_norm": 0.1920919120311737, "learning_rate": 7.416748206209834e-05, "loss": 1.98, "step": 303520 }, { "epoch": 1.1553100949277955, "grad_norm": 0.22609946131706238, "learning_rate": 7.411856109945342e-05, "loss": 1.983, "step": 303530 }, { "epoch": 1.1553481573959181, "grad_norm": 0.24520838260650635, "learning_rate": 7.406964575570962e-05, "loss": 1.9539, "step": 303540 }, { "epoch": 1.1553862198640408, "grad_norm": 0.18873053789138794, "learning_rate": 7.402073602893139e-05, "loss": 1.9559, "step": 303550 }, { "epoch": 1.1554242823321634, "grad_norm": 0.16989383101463318, "learning_rate": 7.397183191718415e-05, "loss": 1.9777, "step": 303560 }, { "epoch": 1.1554623448002863, "grad_norm": 0.16686667501926422, "learning_rate": 7.39229334185344e-05, "loss": 1.9763, "step": 303570 }, { "epoch": 1.155500407268409, "grad_norm": 0.19937288761138916, "learning_rate": 7.387404053104985e-05, "loss": 1.9618, "step": 303580 }, { "epoch": 1.1555384697365316, "grad_norm": 0.29525628685951233, "learning_rate": 7.382515325279932e-05, "loss": 1.9504, "step": 303590 }, { "epoch": 1.1555765322046543, "grad_norm": 0.19494149088859558, "learning_rate": 7.377627158185262e-05, "loss": 1.9646, "step": 303600 }, { "epoch": 1.155614594672777, "grad_norm": 0.17477074265480042, "learning_rate": 7.372739551628077e-05, "loss": 1.9841, "step": 303610 }, { "epoch": 1.1556526571408996, "grad_norm": 0.18820302188396454, "learning_rate": 7.36785250541559e-05, "loss": 1.9751, "step": 303620 }, { "epoch": 1.1556907196090223, "grad_norm": 0.17995865643024445, "learning_rate": 7.362966019355116e-05, "loss": 1.9578, "step": 303630 }, { "epoch": 1.1557287820771451, "grad_norm": 0.20242054760456085, "learning_rate": 7.358080093254088e-05, "loss": 1.9558, "step": 303640 }, { "epoch": 1.1557668445452678, "grad_norm": 0.16632670164108276, "learning_rate": 7.353194726920048e-05, "loss": 1.9689, "step": 303650 }, { "epoch": 1.1558049070133904, "grad_norm": 0.19060280919075012, "learning_rate": 7.348309920160646e-05, "loss": 1.98, "step": 303660 }, { "epoch": 1.155842969481513, "grad_norm": 0.19867919385433197, "learning_rate": 7.343425672783649e-05, "loss": 1.9562, "step": 303670 }, { "epoch": 1.1558810319496358, "grad_norm": 0.16830924153327942, "learning_rate": 7.338541984596919e-05, "loss": 1.9738, "step": 303680 }, { "epoch": 1.1559190944177584, "grad_norm": 0.22567281126976013, "learning_rate": 7.333658855408443e-05, "loss": 1.954, "step": 303690 }, { "epoch": 1.155957156885881, "grad_norm": 0.1821957528591156, "learning_rate": 7.328776285026307e-05, "loss": 1.955, "step": 303700 }, { "epoch": 1.1559952193540037, "grad_norm": 0.18949563801288605, "learning_rate": 7.323894273258719e-05, "loss": 1.9618, "step": 303710 }, { "epoch": 1.1560332818221264, "grad_norm": 0.19841155409812927, "learning_rate": 7.319012819913984e-05, "loss": 1.9636, "step": 303720 }, { "epoch": 1.156071344290249, "grad_norm": 0.2149336189031601, "learning_rate": 7.314131924800532e-05, "loss": 1.9625, "step": 303730 }, { "epoch": 1.156109406758372, "grad_norm": 0.21468611061573029, "learning_rate": 7.309251587726878e-05, "loss": 1.9596, "step": 303740 }, { "epoch": 1.1561474692264946, "grad_norm": 0.18424129486083984, "learning_rate": 7.304371808501675e-05, "loss": 1.9603, "step": 303750 }, { "epoch": 1.1561855316946172, "grad_norm": 0.22661066055297852, "learning_rate": 7.299492586933664e-05, "loss": 1.9705, "step": 303760 }, { "epoch": 1.15622359416274, "grad_norm": 0.1968468725681305, "learning_rate": 7.2946139228317e-05, "loss": 1.9627, "step": 303770 }, { "epoch": 1.1562616566308626, "grad_norm": 0.21646060049533844, "learning_rate": 7.289735816004762e-05, "loss": 1.9668, "step": 303780 }, { "epoch": 1.1562997190989852, "grad_norm": 0.19419118762016296, "learning_rate": 7.284858266261913e-05, "loss": 1.9674, "step": 303790 }, { "epoch": 1.1563377815671079, "grad_norm": 0.1530521959066391, "learning_rate": 7.279981273412346e-05, "loss": 1.9683, "step": 303800 }, { "epoch": 1.1563758440352305, "grad_norm": 0.16724202036857605, "learning_rate": 7.275104837265351e-05, "loss": 1.9522, "step": 303810 }, { "epoch": 1.1564139065033534, "grad_norm": 0.19780854880809784, "learning_rate": 7.270228957630338e-05, "loss": 1.9632, "step": 303820 }, { "epoch": 1.156451968971476, "grad_norm": 0.20006397366523743, "learning_rate": 7.265353634316801e-05, "loss": 1.9701, "step": 303830 }, { "epoch": 1.1564900314395987, "grad_norm": 0.1747836023569107, "learning_rate": 7.260478867134385e-05, "loss": 1.9603, "step": 303840 }, { "epoch": 1.1565280939077214, "grad_norm": 0.16377757489681244, "learning_rate": 7.2556046558928e-05, "loss": 1.9596, "step": 303850 }, { "epoch": 1.156566156375844, "grad_norm": 0.16101588308811188, "learning_rate": 7.250731000401889e-05, "loss": 1.9651, "step": 303860 }, { "epoch": 1.1566042188439667, "grad_norm": 0.1737445890903473, "learning_rate": 7.245857900471603e-05, "loss": 1.9536, "step": 303870 }, { "epoch": 1.1566422813120893, "grad_norm": 0.1697576940059662, "learning_rate": 7.240985355911989e-05, "loss": 1.9601, "step": 303880 }, { "epoch": 1.156680343780212, "grad_norm": 0.20912794768810272, "learning_rate": 7.236113366533204e-05, "loss": 1.957, "step": 303890 }, { "epoch": 1.1567184062483347, "grad_norm": 0.2573632299900055, "learning_rate": 7.231241932145532e-05, "loss": 1.9675, "step": 303900 }, { "epoch": 1.1567564687164575, "grad_norm": 0.17370592057704926, "learning_rate": 7.226371052559344e-05, "loss": 1.9684, "step": 303910 }, { "epoch": 1.1567945311845802, "grad_norm": 0.16651690006256104, "learning_rate": 7.221500727585123e-05, "loss": 1.9663, "step": 303920 }, { "epoch": 1.1568325936527029, "grad_norm": 0.19553054869174957, "learning_rate": 7.216630957033471e-05, "loss": 1.9673, "step": 303930 }, { "epoch": 1.1568706561208255, "grad_norm": 0.22437965869903564, "learning_rate": 7.211761740715089e-05, "loss": 1.97, "step": 303940 }, { "epoch": 1.1569087185889482, "grad_norm": 0.17220978438854218, "learning_rate": 7.206893078440773e-05, "loss": 1.9674, "step": 303950 }, { "epoch": 1.1569467810570708, "grad_norm": 0.17623186111450195, "learning_rate": 7.202024970021458e-05, "loss": 1.9689, "step": 303960 }, { "epoch": 1.1569848435251935, "grad_norm": 0.17047753930091858, "learning_rate": 7.197157415268164e-05, "loss": 1.9631, "step": 303970 }, { "epoch": 1.1570229059933161, "grad_norm": 0.16086052358150482, "learning_rate": 7.192290413992014e-05, "loss": 1.9626, "step": 303980 }, { "epoch": 1.157060968461439, "grad_norm": 0.1761477291584015, "learning_rate": 7.187423966004258e-05, "loss": 1.9639, "step": 303990 }, { "epoch": 1.1570990309295617, "grad_norm": 0.2322581708431244, "learning_rate": 7.182558071116241e-05, "loss": 1.9552, "step": 304000 }, { "epoch": 1.1571370933976843, "grad_norm": 0.20044724643230438, "learning_rate": 7.177692729139407e-05, "loss": 1.9746, "step": 304010 }, { "epoch": 1.157175155865807, "grad_norm": 0.19975224137306213, "learning_rate": 7.172827939885328e-05, "loss": 1.9616, "step": 304020 }, { "epoch": 1.1572132183339296, "grad_norm": 0.19980034232139587, "learning_rate": 7.16796370316567e-05, "loss": 1.9614, "step": 304030 }, { "epoch": 1.1572512808020523, "grad_norm": 0.22720147669315338, "learning_rate": 7.163100018792213e-05, "loss": 1.9604, "step": 304040 }, { "epoch": 1.157289343270175, "grad_norm": 0.19307057559490204, "learning_rate": 7.158236886576825e-05, "loss": 1.9668, "step": 304050 }, { "epoch": 1.1573274057382976, "grad_norm": 0.20916832983493805, "learning_rate": 7.153374306331506e-05, "loss": 1.963, "step": 304060 }, { "epoch": 1.1573654682064203, "grad_norm": 0.23292502760887146, "learning_rate": 7.14851227786834e-05, "loss": 1.9586, "step": 304070 }, { "epoch": 1.157403530674543, "grad_norm": 0.19604690372943878, "learning_rate": 7.143650800999546e-05, "loss": 1.9666, "step": 304080 }, { "epoch": 1.1574415931426658, "grad_norm": 0.24700568616390228, "learning_rate": 7.138789875537421e-05, "loss": 1.9708, "step": 304090 }, { "epoch": 1.1574796556107885, "grad_norm": 0.18617315590381622, "learning_rate": 7.133929501294385e-05, "loss": 1.9697, "step": 304100 }, { "epoch": 1.1575177180789111, "grad_norm": 0.1749623566865921, "learning_rate": 7.12906967808295e-05, "loss": 1.9645, "step": 304110 }, { "epoch": 1.1575557805470338, "grad_norm": 0.2002592533826828, "learning_rate": 7.124210405715754e-05, "loss": 1.9637, "step": 304120 }, { "epoch": 1.1575938430151564, "grad_norm": 0.22793354094028473, "learning_rate": 7.11935168400552e-05, "loss": 1.9692, "step": 304130 }, { "epoch": 1.157631905483279, "grad_norm": 0.24129308760166168, "learning_rate": 7.114493512765102e-05, "loss": 1.9641, "step": 304140 }, { "epoch": 1.1576699679514018, "grad_norm": 0.2590607702732086, "learning_rate": 7.109635891807431e-05, "loss": 1.9537, "step": 304150 }, { "epoch": 1.1577080304195246, "grad_norm": 0.18408513069152832, "learning_rate": 7.104778820945567e-05, "loss": 1.9552, "step": 304160 }, { "epoch": 1.1577460928876473, "grad_norm": 0.1907476782798767, "learning_rate": 7.099922299992667e-05, "loss": 1.9644, "step": 304170 }, { "epoch": 1.15778415535577, "grad_norm": 0.1599113494157791, "learning_rate": 7.09506632876199e-05, "loss": 1.9782, "step": 304180 }, { "epoch": 1.1578222178238926, "grad_norm": 0.16144965589046478, "learning_rate": 7.090210907066902e-05, "loss": 1.9544, "step": 304190 }, { "epoch": 1.1578602802920153, "grad_norm": 0.27204036712646484, "learning_rate": 7.085356034720891e-05, "loss": 1.963, "step": 304200 }, { "epoch": 1.157898342760138, "grad_norm": 0.23324240744113922, "learning_rate": 7.080501711537524e-05, "loss": 1.9775, "step": 304210 }, { "epoch": 1.1579364052282606, "grad_norm": 0.18385274708271027, "learning_rate": 7.075647937330492e-05, "loss": 1.9564, "step": 304220 }, { "epoch": 1.1579744676963832, "grad_norm": 0.19012723863124847, "learning_rate": 7.070794711913586e-05, "loss": 1.9635, "step": 304230 }, { "epoch": 1.158012530164506, "grad_norm": 0.16869176924228668, "learning_rate": 7.065942035100697e-05, "loss": 1.9797, "step": 304240 }, { "epoch": 1.1580505926326286, "grad_norm": 0.16135039925575256, "learning_rate": 7.061089906705831e-05, "loss": 1.9657, "step": 304250 }, { "epoch": 1.1580886551007514, "grad_norm": 0.21593551337718964, "learning_rate": 7.056238326543091e-05, "loss": 1.9672, "step": 304260 }, { "epoch": 1.158126717568874, "grad_norm": 0.17168347537517548, "learning_rate": 7.051387294426686e-05, "loss": 1.9796, "step": 304270 }, { "epoch": 1.1581647800369967, "grad_norm": 0.16949261724948883, "learning_rate": 7.046536810170939e-05, "loss": 1.9613, "step": 304280 }, { "epoch": 1.1582028425051194, "grad_norm": 0.17988714575767517, "learning_rate": 7.04168687359027e-05, "loss": 1.9675, "step": 304290 }, { "epoch": 1.158240904973242, "grad_norm": 0.17011313140392303, "learning_rate": 7.036837484499197e-05, "loss": 1.9513, "step": 304300 }, { "epoch": 1.1582789674413647, "grad_norm": 0.15754835307598114, "learning_rate": 7.031988642712356e-05, "loss": 1.9462, "step": 304310 }, { "epoch": 1.1583170299094874, "grad_norm": 0.17138944566249847, "learning_rate": 7.027140348044481e-05, "loss": 1.9552, "step": 304320 }, { "epoch": 1.1583550923776103, "grad_norm": 0.17376810312271118, "learning_rate": 7.022292600310409e-05, "loss": 1.9686, "step": 304330 }, { "epoch": 1.158393154845733, "grad_norm": 0.17283578217029572, "learning_rate": 7.01744539932509e-05, "loss": 1.9541, "step": 304340 }, { "epoch": 1.1584312173138556, "grad_norm": 0.17546117305755615, "learning_rate": 7.012598744903565e-05, "loss": 1.9617, "step": 304350 }, { "epoch": 1.1584692797819782, "grad_norm": 0.21400034427642822, "learning_rate": 7.007752636860987e-05, "loss": 1.9661, "step": 304360 }, { "epoch": 1.1585073422501009, "grad_norm": 0.1719067394733429, "learning_rate": 7.002907075012615e-05, "loss": 1.9594, "step": 304370 }, { "epoch": 1.1585454047182235, "grad_norm": 0.17050892114639282, "learning_rate": 6.998062059173816e-05, "loss": 1.958, "step": 304380 }, { "epoch": 1.1585834671863462, "grad_norm": 0.16611924767494202, "learning_rate": 6.993217589160039e-05, "loss": 1.965, "step": 304390 }, { "epoch": 1.1586215296544689, "grad_norm": 0.1858852505683899, "learning_rate": 6.988373664786868e-05, "loss": 1.9479, "step": 304400 }, { "epoch": 1.1586595921225915, "grad_norm": 0.22573794424533844, "learning_rate": 6.983530285869965e-05, "loss": 1.9637, "step": 304410 }, { "epoch": 1.1586976545907142, "grad_norm": 0.23732437193393707, "learning_rate": 6.978687452225108e-05, "loss": 1.9365, "step": 304420 }, { "epoch": 1.158735717058837, "grad_norm": 0.18224066495895386, "learning_rate": 6.97384516366818e-05, "loss": 1.9747, "step": 304430 }, { "epoch": 1.1587737795269597, "grad_norm": 0.17750975489616394, "learning_rate": 6.969003420015163e-05, "loss": 1.9698, "step": 304440 }, { "epoch": 1.1588118419950824, "grad_norm": 0.1634315848350525, "learning_rate": 6.964162221082143e-05, "loss": 1.9638, "step": 304450 }, { "epoch": 1.158849904463205, "grad_norm": 0.19310379028320312, "learning_rate": 6.959321566685312e-05, "loss": 1.9784, "step": 304460 }, { "epoch": 1.1588879669313277, "grad_norm": 0.1664789468050003, "learning_rate": 6.954481456640965e-05, "loss": 1.9676, "step": 304470 }, { "epoch": 1.1589260293994503, "grad_norm": 0.16946376860141754, "learning_rate": 6.949641890765496e-05, "loss": 1.9643, "step": 304480 }, { "epoch": 1.158964091867573, "grad_norm": 0.16697698831558228, "learning_rate": 6.9448028688754e-05, "loss": 1.975, "step": 304490 }, { "epoch": 1.1590021543356959, "grad_norm": 0.15951833128929138, "learning_rate": 6.93996439078729e-05, "loss": 1.9594, "step": 304500 }, { "epoch": 1.1590402168038185, "grad_norm": 0.15703727304935455, "learning_rate": 6.935126456317864e-05, "loss": 1.9526, "step": 304510 }, { "epoch": 1.1590782792719412, "grad_norm": 0.20362144708633423, "learning_rate": 6.93028906528394e-05, "loss": 1.9527, "step": 304520 }, { "epoch": 1.1591163417400638, "grad_norm": 0.1691470742225647, "learning_rate": 6.925452217502426e-05, "loss": 1.9588, "step": 304530 }, { "epoch": 1.1591544042081865, "grad_norm": 0.2185783088207245, "learning_rate": 6.920615912790335e-05, "loss": 1.9514, "step": 304540 }, { "epoch": 1.1591924666763092, "grad_norm": 0.2571600377559662, "learning_rate": 6.915780150964784e-05, "loss": 1.9577, "step": 304550 }, { "epoch": 1.1592305291444318, "grad_norm": 0.22867870330810547, "learning_rate": 6.910944931842993e-05, "loss": 1.9614, "step": 304560 }, { "epoch": 1.1592685916125545, "grad_norm": 0.18404366075992584, "learning_rate": 6.906110255242293e-05, "loss": 1.97, "step": 304570 }, { "epoch": 1.1593066540806771, "grad_norm": 0.1827774941921234, "learning_rate": 6.901276120980104e-05, "loss": 1.978, "step": 304580 }, { "epoch": 1.1593447165487998, "grad_norm": 0.16489237546920776, "learning_rate": 6.896442528873947e-05, "loss": 1.9562, "step": 304590 }, { "epoch": 1.1593827790169227, "grad_norm": 0.17081385850906372, "learning_rate": 6.891609478741461e-05, "loss": 1.962, "step": 304600 }, { "epoch": 1.1594208414850453, "grad_norm": 0.17646686732769012, "learning_rate": 6.886776970400366e-05, "loss": 1.9628, "step": 304610 }, { "epoch": 1.159458903953168, "grad_norm": 0.16338463127613068, "learning_rate": 6.88194500366851e-05, "loss": 1.9712, "step": 304620 }, { "epoch": 1.1594969664212906, "grad_norm": 0.16763192415237427, "learning_rate": 6.87711357836383e-05, "loss": 1.9664, "step": 304630 }, { "epoch": 1.1595350288894133, "grad_norm": 0.229514941573143, "learning_rate": 6.87228269430435e-05, "loss": 1.9557, "step": 304640 }, { "epoch": 1.159573091357536, "grad_norm": 0.17145362496376038, "learning_rate": 6.867452351308224e-05, "loss": 1.9482, "step": 304650 }, { "epoch": 1.1596111538256586, "grad_norm": 0.17883513867855072, "learning_rate": 6.862622549193687e-05, "loss": 1.9748, "step": 304660 }, { "epoch": 1.1596492162937813, "grad_norm": 0.21892984211444855, "learning_rate": 6.857793287779074e-05, "loss": 1.9617, "step": 304670 }, { "epoch": 1.1596872787619041, "grad_norm": 0.2010929137468338, "learning_rate": 6.852964566882852e-05, "loss": 1.957, "step": 304680 }, { "epoch": 1.1597253412300268, "grad_norm": 0.24334891140460968, "learning_rate": 6.84813638632355e-05, "loss": 1.9722, "step": 304690 }, { "epoch": 1.1597634036981495, "grad_norm": 0.199227973818779, "learning_rate": 6.843308745919829e-05, "loss": 1.9589, "step": 304700 }, { "epoch": 1.159801466166272, "grad_norm": 0.18418475985527039, "learning_rate": 6.83848164549043e-05, "loss": 1.9581, "step": 304710 }, { "epoch": 1.1598395286343948, "grad_norm": 0.17104840278625488, "learning_rate": 6.833655084854206e-05, "loss": 1.9706, "step": 304720 }, { "epoch": 1.1598775911025174, "grad_norm": 0.18676508963108063, "learning_rate": 6.828829063830106e-05, "loss": 1.9681, "step": 304730 }, { "epoch": 1.15991565357064, "grad_norm": 0.1584375649690628, "learning_rate": 6.824003582237192e-05, "loss": 1.9613, "step": 304740 }, { "epoch": 1.1599537160387627, "grad_norm": 0.17978356778621674, "learning_rate": 6.819178639894619e-05, "loss": 1.9759, "step": 304750 }, { "epoch": 1.1599917785068854, "grad_norm": 0.3203710913658142, "learning_rate": 6.814354236621634e-05, "loss": 1.9691, "step": 304760 }, { "epoch": 1.1600298409750083, "grad_norm": 0.18568219244480133, "learning_rate": 6.809530372237599e-05, "loss": 1.979, "step": 304770 }, { "epoch": 1.160067903443131, "grad_norm": 0.16148421168327332, "learning_rate": 6.80470704656197e-05, "loss": 1.9631, "step": 304780 }, { "epoch": 1.1601059659112536, "grad_norm": 0.16459301114082336, "learning_rate": 6.799884259414301e-05, "loss": 1.9657, "step": 304790 }, { "epoch": 1.1601440283793762, "grad_norm": 0.1900845319032669, "learning_rate": 6.795062010614268e-05, "loss": 1.9622, "step": 304800 }, { "epoch": 1.160182090847499, "grad_norm": 0.17193518579006195, "learning_rate": 6.79024029998161e-05, "loss": 1.966, "step": 304810 }, { "epoch": 1.1602201533156216, "grad_norm": 0.15967318415641785, "learning_rate": 6.785419127336201e-05, "loss": 1.9692, "step": 304820 }, { "epoch": 1.1602582157837442, "grad_norm": 0.18435196578502655, "learning_rate": 6.780598492497997e-05, "loss": 1.9687, "step": 304830 }, { "epoch": 1.1602962782518669, "grad_norm": 0.18528448045253754, "learning_rate": 6.775778395287058e-05, "loss": 1.9525, "step": 304840 }, { "epoch": 1.1603343407199898, "grad_norm": 0.17487603425979614, "learning_rate": 6.770958835523545e-05, "loss": 1.9672, "step": 304850 }, { "epoch": 1.1603724031881124, "grad_norm": 0.19005072116851807, "learning_rate": 6.766139813027721e-05, "loss": 1.9568, "step": 304860 }, { "epoch": 1.160410465656235, "grad_norm": 0.18123486638069153, "learning_rate": 6.761321327619951e-05, "loss": 1.952, "step": 304870 }, { "epoch": 1.1604485281243577, "grad_norm": 0.17274688184261322, "learning_rate": 6.756503379120693e-05, "loss": 1.9593, "step": 304880 }, { "epoch": 1.1604865905924804, "grad_norm": 0.21813954412937164, "learning_rate": 6.751685967350512e-05, "loss": 1.9536, "step": 304890 }, { "epoch": 1.160524653060603, "grad_norm": 0.1760721057653427, "learning_rate": 6.746869092130071e-05, "loss": 1.9694, "step": 304900 }, { "epoch": 1.1605627155287257, "grad_norm": 0.1831454485654831, "learning_rate": 6.74205275328012e-05, "loss": 1.9586, "step": 304910 }, { "epoch": 1.1606007779968484, "grad_norm": 0.16150572896003723, "learning_rate": 6.737236950621533e-05, "loss": 1.9704, "step": 304920 }, { "epoch": 1.160638840464971, "grad_norm": 0.19636644423007965, "learning_rate": 6.73242168397527e-05, "loss": 1.9555, "step": 304930 }, { "epoch": 1.160676902933094, "grad_norm": 0.19180063903331757, "learning_rate": 6.72760695316239e-05, "loss": 1.9542, "step": 304940 }, { "epoch": 1.1607149654012165, "grad_norm": 0.16178670525550842, "learning_rate": 6.722792758004054e-05, "loss": 1.9506, "step": 304950 }, { "epoch": 1.1607530278693392, "grad_norm": 0.2005394548177719, "learning_rate": 6.717979098321514e-05, "loss": 1.9667, "step": 304960 }, { "epoch": 1.1607910903374619, "grad_norm": 0.18609443306922913, "learning_rate": 6.713165973936136e-05, "loss": 1.9624, "step": 304970 }, { "epoch": 1.1608291528055845, "grad_norm": 0.16814211010932922, "learning_rate": 6.708353384669386e-05, "loss": 1.9541, "step": 304980 }, { "epoch": 1.1608672152737072, "grad_norm": 0.20419470965862274, "learning_rate": 6.70354133034281e-05, "loss": 1.971, "step": 304990 }, { "epoch": 1.1609052777418298, "grad_norm": 0.17679406702518463, "learning_rate": 6.69872981077807e-05, "loss": 1.9572, "step": 305000 }, { "epoch": 1.1609433402099525, "grad_norm": 0.16615521907806396, "learning_rate": 6.693918825796918e-05, "loss": 1.9707, "step": 305010 }, { "epoch": 1.1609814026780754, "grad_norm": 0.15864615142345428, "learning_rate": 6.689108375221215e-05, "loss": 1.9472, "step": 305020 }, { "epoch": 1.161019465146198, "grad_norm": 0.17157498002052307, "learning_rate": 6.684298458872912e-05, "loss": 1.9614, "step": 305030 }, { "epoch": 1.1610575276143207, "grad_norm": 0.20575599372386932, "learning_rate": 6.679489076574052e-05, "loss": 1.9593, "step": 305040 }, { "epoch": 1.1610955900824433, "grad_norm": 0.17096468806266785, "learning_rate": 6.674680228146807e-05, "loss": 1.9675, "step": 305050 }, { "epoch": 1.161133652550566, "grad_norm": 0.24533316493034363, "learning_rate": 6.669871913413411e-05, "loss": 1.9692, "step": 305060 }, { "epoch": 1.1611717150186887, "grad_norm": 0.17510323226451874, "learning_rate": 6.665064132196219e-05, "loss": 1.9595, "step": 305070 }, { "epoch": 1.1612097774868113, "grad_norm": 0.19663465023040771, "learning_rate": 6.660256884317678e-05, "loss": 1.9644, "step": 305080 }, { "epoch": 1.161247839954934, "grad_norm": 0.191786527633667, "learning_rate": 6.65545016960033e-05, "loss": 1.9567, "step": 305090 }, { "epoch": 1.1612859024230566, "grad_norm": 0.21058157086372375, "learning_rate": 6.650643987866823e-05, "loss": 1.9684, "step": 305100 }, { "epoch": 1.1613239648911793, "grad_norm": 0.26317745447158813, "learning_rate": 6.645838338939902e-05, "loss": 1.9668, "step": 305110 }, { "epoch": 1.1613620273593022, "grad_norm": 0.1573423594236374, "learning_rate": 6.641033222642401e-05, "loss": 1.9637, "step": 305120 }, { "epoch": 1.1614000898274248, "grad_norm": 0.1872624158859253, "learning_rate": 6.636228638797265e-05, "loss": 1.968, "step": 305130 }, { "epoch": 1.1614381522955475, "grad_norm": 0.17351549863815308, "learning_rate": 6.631424587227525e-05, "loss": 1.9619, "step": 305140 }, { "epoch": 1.1614762147636701, "grad_norm": 0.25747624039649963, "learning_rate": 6.626621067756322e-05, "loss": 1.9572, "step": 305150 }, { "epoch": 1.1615142772317928, "grad_norm": 0.1605093777179718, "learning_rate": 6.621818080206881e-05, "loss": 1.9513, "step": 305160 }, { "epoch": 1.1615523396999154, "grad_norm": 0.27054768800735474, "learning_rate": 6.617015624402539e-05, "loss": 1.9632, "step": 305170 }, { "epoch": 1.161590402168038, "grad_norm": 0.31650516390800476, "learning_rate": 6.612213700166726e-05, "loss": 1.9643, "step": 305180 }, { "epoch": 1.161628464636161, "grad_norm": 0.2560306191444397, "learning_rate": 6.607412307322963e-05, "loss": 1.9683, "step": 305190 }, { "epoch": 1.1616665271042836, "grad_norm": 0.17595048248767853, "learning_rate": 6.602611445694878e-05, "loss": 1.9727, "step": 305200 }, { "epoch": 1.1617045895724063, "grad_norm": 0.1622474044561386, "learning_rate": 6.597811115106189e-05, "loss": 1.9543, "step": 305210 }, { "epoch": 1.161742652040529, "grad_norm": 0.15321014821529388, "learning_rate": 6.593011315380715e-05, "loss": 1.9747, "step": 305220 }, { "epoch": 1.1617807145086516, "grad_norm": 0.19443893432617188, "learning_rate": 6.588212046342373e-05, "loss": 1.9778, "step": 305230 }, { "epoch": 1.1618187769767743, "grad_norm": 0.19640414416790009, "learning_rate": 6.583413307815178e-05, "loss": 1.9603, "step": 305240 }, { "epoch": 1.161856839444897, "grad_norm": 0.2108943611383438, "learning_rate": 6.578615099623241e-05, "loss": 1.9596, "step": 305250 }, { "epoch": 1.1618949019130196, "grad_norm": 0.19390849769115448, "learning_rate": 6.573817421590772e-05, "loss": 1.9599, "step": 305260 }, { "epoch": 1.1619329643811422, "grad_norm": 0.19029496610164642, "learning_rate": 6.56902027354207e-05, "loss": 1.9529, "step": 305270 }, { "epoch": 1.161971026849265, "grad_norm": 0.19222138822078705, "learning_rate": 6.564223655301533e-05, "loss": 1.9422, "step": 305280 }, { "epoch": 1.1620090893173878, "grad_norm": 0.19947002828121185, "learning_rate": 6.559427566693677e-05, "loss": 1.9631, "step": 305290 }, { "epoch": 1.1620471517855104, "grad_norm": 0.1722879707813263, "learning_rate": 6.554632007543083e-05, "loss": 1.9616, "step": 305300 }, { "epoch": 1.162085214253633, "grad_norm": 0.23039458692073822, "learning_rate": 6.54983697767445e-05, "loss": 1.9565, "step": 305310 }, { "epoch": 1.1621232767217557, "grad_norm": 0.22281059622764587, "learning_rate": 6.545042476912567e-05, "loss": 1.9498, "step": 305320 }, { "epoch": 1.1621613391898784, "grad_norm": 0.19715505838394165, "learning_rate": 6.540248505082314e-05, "loss": 1.9547, "step": 305330 }, { "epoch": 1.162199401658001, "grad_norm": 0.17719301581382751, "learning_rate": 6.535455062008682e-05, "loss": 1.9519, "step": 305340 }, { "epoch": 1.1622374641261237, "grad_norm": 0.17887887358665466, "learning_rate": 6.530662147516747e-05, "loss": 1.9764, "step": 305350 }, { "epoch": 1.1622755265942466, "grad_norm": 0.20813018083572388, "learning_rate": 6.525869761431685e-05, "loss": 1.9619, "step": 305360 }, { "epoch": 1.1623135890623693, "grad_norm": 0.18251609802246094, "learning_rate": 6.521077903578764e-05, "loss": 1.9576, "step": 305370 }, { "epoch": 1.162351651530492, "grad_norm": 0.1724395602941513, "learning_rate": 6.516286573783359e-05, "loss": 1.9643, "step": 305380 }, { "epoch": 1.1623897139986146, "grad_norm": 0.19655396044254303, "learning_rate": 6.511495771870928e-05, "loss": 1.975, "step": 305390 }, { "epoch": 1.1624277764667372, "grad_norm": 0.19053930044174194, "learning_rate": 6.506705497667037e-05, "loss": 1.955, "step": 305400 }, { "epoch": 1.1624658389348599, "grad_norm": 0.17787426710128784, "learning_rate": 6.501915750997339e-05, "loss": 1.9564, "step": 305410 }, { "epoch": 1.1625039014029825, "grad_norm": 0.1610625982284546, "learning_rate": 6.497126531687586e-05, "loss": 1.9546, "step": 305420 }, { "epoch": 1.1625419638711052, "grad_norm": 0.16635026037693024, "learning_rate": 6.492337839563633e-05, "loss": 1.9731, "step": 305430 }, { "epoch": 1.1625800263392279, "grad_norm": 0.2111584097146988, "learning_rate": 6.487549674451415e-05, "loss": 1.9607, "step": 305440 }, { "epoch": 1.1626180888073505, "grad_norm": 0.24752797186374664, "learning_rate": 6.482762036176976e-05, "loss": 1.9543, "step": 305450 }, { "epoch": 1.1626561512754734, "grad_norm": 0.1773507297039032, "learning_rate": 6.477974924566449e-05, "loss": 1.9613, "step": 305460 }, { "epoch": 1.162694213743596, "grad_norm": 0.1826154589653015, "learning_rate": 6.473188339446073e-05, "loss": 1.9643, "step": 305470 }, { "epoch": 1.1627322762117187, "grad_norm": 0.17847725749015808, "learning_rate": 6.468402280642166e-05, "loss": 1.9631, "step": 305480 }, { "epoch": 1.1627703386798414, "grad_norm": 0.19692540168762207, "learning_rate": 6.46361674798116e-05, "loss": 1.9562, "step": 305490 }, { "epoch": 1.162808401147964, "grad_norm": 0.22006727755069733, "learning_rate": 6.458831741289561e-05, "loss": 1.9556, "step": 305500 }, { "epoch": 1.1628464636160867, "grad_norm": 0.22079311311244965, "learning_rate": 6.454047260393991e-05, "loss": 1.9466, "step": 305510 }, { "epoch": 1.1628845260842093, "grad_norm": 0.17099638283252716, "learning_rate": 6.449263305121155e-05, "loss": 1.968, "step": 305520 }, { "epoch": 1.162922588552332, "grad_norm": 0.19776278734207153, "learning_rate": 6.444479875297859e-05, "loss": 1.9651, "step": 305530 }, { "epoch": 1.1629606510204549, "grad_norm": 0.16865932941436768, "learning_rate": 6.439696970750997e-05, "loss": 1.9672, "step": 305540 }, { "epoch": 1.1629987134885775, "grad_norm": 0.21851569414138794, "learning_rate": 6.434914591307561e-05, "loss": 1.9699, "step": 305550 }, { "epoch": 1.1630367759567002, "grad_norm": 0.1963483840227127, "learning_rate": 6.430132736794653e-05, "loss": 1.9727, "step": 305560 }, { "epoch": 1.1630748384248228, "grad_norm": 0.18260200321674347, "learning_rate": 6.425351407039438e-05, "loss": 1.9569, "step": 305570 }, { "epoch": 1.1631129008929455, "grad_norm": 0.22926168143749237, "learning_rate": 6.420570601869202e-05, "loss": 1.9607, "step": 305580 }, { "epoch": 1.1631509633610682, "grad_norm": 0.16698819398880005, "learning_rate": 6.415790321111326e-05, "loss": 1.9588, "step": 305590 }, { "epoch": 1.1631890258291908, "grad_norm": 0.168158158659935, "learning_rate": 6.411010564593267e-05, "loss": 1.9564, "step": 305600 }, { "epoch": 1.1632270882973135, "grad_norm": 0.15741604566574097, "learning_rate": 6.406231332142586e-05, "loss": 1.9443, "step": 305610 }, { "epoch": 1.1632651507654361, "grad_norm": 0.2450927495956421, "learning_rate": 6.401452623586951e-05, "loss": 1.9674, "step": 305620 }, { "epoch": 1.163303213233559, "grad_norm": 0.16945163905620575, "learning_rate": 6.396674438754102e-05, "loss": 1.9597, "step": 305630 }, { "epoch": 1.1633412757016817, "grad_norm": 0.18803608417510986, "learning_rate": 6.391896777471889e-05, "loss": 1.9671, "step": 305640 }, { "epoch": 1.1633793381698043, "grad_norm": 0.1747310608625412, "learning_rate": 6.387119639568251e-05, "loss": 1.9599, "step": 305650 }, { "epoch": 1.163417400637927, "grad_norm": 0.1990187019109726, "learning_rate": 6.382343024871223e-05, "loss": 1.9542, "step": 305660 }, { "epoch": 1.1634554631060496, "grad_norm": 0.21996289491653442, "learning_rate": 6.377566933208934e-05, "loss": 1.9618, "step": 305670 }, { "epoch": 1.1634935255741723, "grad_norm": 0.19845467805862427, "learning_rate": 6.372791364409603e-05, "loss": 1.9532, "step": 305680 }, { "epoch": 1.163531588042295, "grad_norm": 0.18662182986736298, "learning_rate": 6.368016318301551e-05, "loss": 1.9682, "step": 305690 }, { "epoch": 1.1635696505104176, "grad_norm": 0.17204593122005463, "learning_rate": 6.363241794713175e-05, "loss": 1.9621, "step": 305700 }, { "epoch": 1.1636077129785405, "grad_norm": 0.157108873128891, "learning_rate": 6.358467793473e-05, "loss": 1.95, "step": 305710 }, { "epoch": 1.1636457754466631, "grad_norm": 0.2213294804096222, "learning_rate": 6.353694314409609e-05, "loss": 1.9537, "step": 305720 }, { "epoch": 1.1636838379147858, "grad_norm": 0.19779251515865326, "learning_rate": 6.3489213573517e-05, "loss": 1.9694, "step": 305730 }, { "epoch": 1.1637219003829085, "grad_norm": 0.21293361485004425, "learning_rate": 6.344148922128052e-05, "loss": 1.9813, "step": 305740 }, { "epoch": 1.1637599628510311, "grad_norm": 0.171671062707901, "learning_rate": 6.339377008567548e-05, "loss": 1.9632, "step": 305750 }, { "epoch": 1.1637980253191538, "grad_norm": 0.19532741606235504, "learning_rate": 6.334605616499162e-05, "loss": 1.9601, "step": 305760 }, { "epoch": 1.1638360877872764, "grad_norm": 0.17116063833236694, "learning_rate": 6.329834745751956e-05, "loss": 1.9591, "step": 305770 }, { "epoch": 1.163874150255399, "grad_norm": 0.19638307392597198, "learning_rate": 6.325064396155089e-05, "loss": 1.9599, "step": 305780 }, { "epoch": 1.1639122127235217, "grad_norm": 0.18028929829597473, "learning_rate": 6.320294567537816e-05, "loss": 1.9614, "step": 305790 }, { "epoch": 1.1639502751916446, "grad_norm": 0.1766701340675354, "learning_rate": 6.315525259729477e-05, "loss": 1.9643, "step": 305800 }, { "epoch": 1.1639883376597673, "grad_norm": 0.16383618116378784, "learning_rate": 6.31075647255952e-05, "loss": 1.9609, "step": 305810 }, { "epoch": 1.16402640012789, "grad_norm": 0.2566216289997101, "learning_rate": 6.305988205857465e-05, "loss": 1.9509, "step": 305820 }, { "epoch": 1.1640644625960126, "grad_norm": 0.19184941053390503, "learning_rate": 6.30122045945295e-05, "loss": 1.9533, "step": 305830 }, { "epoch": 1.1641025250641353, "grad_norm": 0.1650419682264328, "learning_rate": 6.296453233175686e-05, "loss": 1.9633, "step": 305840 }, { "epoch": 1.164140587532258, "grad_norm": 0.19763387739658356, "learning_rate": 6.291686526855484e-05, "loss": 1.9599, "step": 305850 }, { "epoch": 1.1641786500003806, "grad_norm": 0.19652384519577026, "learning_rate": 6.286920340322244e-05, "loss": 1.9632, "step": 305860 }, { "epoch": 1.1642167124685032, "grad_norm": 0.180561825633049, "learning_rate": 6.282154673405965e-05, "loss": 1.9648, "step": 305870 }, { "epoch": 1.164254774936626, "grad_norm": 0.18484221398830414, "learning_rate": 6.277389525936733e-05, "loss": 1.9591, "step": 305880 }, { "epoch": 1.1642928374047488, "grad_norm": 0.21535907685756683, "learning_rate": 6.272624897744739e-05, "loss": 1.9443, "step": 305890 }, { "epoch": 1.1643308998728714, "grad_norm": 0.16880010068416595, "learning_rate": 6.267860788660246e-05, "loss": 1.9568, "step": 305900 }, { "epoch": 1.164368962340994, "grad_norm": 0.19901631772518158, "learning_rate": 6.263097198513623e-05, "loss": 1.9607, "step": 305910 }, { "epoch": 1.1644070248091167, "grad_norm": 0.16724003851413727, "learning_rate": 6.258334127135335e-05, "loss": 1.9688, "step": 305920 }, { "epoch": 1.1644450872772394, "grad_norm": 0.16612428426742554, "learning_rate": 6.253571574355927e-05, "loss": 1.9612, "step": 305930 }, { "epoch": 1.164483149745362, "grad_norm": 0.1706397384405136, "learning_rate": 6.248809540006034e-05, "loss": 1.9509, "step": 305940 }, { "epoch": 1.1645212122134847, "grad_norm": 0.1672465056180954, "learning_rate": 6.24404802391641e-05, "loss": 1.9615, "step": 305950 }, { "epoch": 1.1645592746816074, "grad_norm": 0.17334552109241486, "learning_rate": 6.239287025917872e-05, "loss": 1.9665, "step": 305960 }, { "epoch": 1.16459733714973, "grad_norm": 0.16249677538871765, "learning_rate": 6.234526545841335e-05, "loss": 1.9494, "step": 305970 }, { "epoch": 1.164635399617853, "grad_norm": 0.18276648223400116, "learning_rate": 6.22976658351782e-05, "loss": 1.9656, "step": 305980 }, { "epoch": 1.1646734620859756, "grad_norm": 0.2105034440755844, "learning_rate": 6.225007138778427e-05, "loss": 1.9517, "step": 305990 }, { "epoch": 1.1647115245540982, "grad_norm": 0.16858388483524323, "learning_rate": 6.220248211454344e-05, "loss": 1.9651, "step": 306000 }, { "epoch": 1.1647495870222209, "grad_norm": 0.19777953624725342, "learning_rate": 6.215489801376861e-05, "loss": 1.9811, "step": 306010 }, { "epoch": 1.1647876494903435, "grad_norm": 0.17725993692874908, "learning_rate": 6.210731908377365e-05, "loss": 1.9575, "step": 306020 }, { "epoch": 1.1648257119584662, "grad_norm": 0.1922709047794342, "learning_rate": 6.205974532287323e-05, "loss": 1.9556, "step": 306030 }, { "epoch": 1.1648637744265888, "grad_norm": 0.1779535710811615, "learning_rate": 6.201217672938287e-05, "loss": 1.9557, "step": 306040 }, { "epoch": 1.1649018368947117, "grad_norm": 0.18894916772842407, "learning_rate": 6.196461330161924e-05, "loss": 1.9579, "step": 306050 }, { "epoch": 1.1649398993628344, "grad_norm": 0.237869530916214, "learning_rate": 6.191705503789962e-05, "loss": 1.9727, "step": 306060 }, { "epoch": 1.164977961830957, "grad_norm": 0.1724383383989334, "learning_rate": 6.18695019365425e-05, "loss": 1.9686, "step": 306070 }, { "epoch": 1.1650160242990797, "grad_norm": 0.15821924805641174, "learning_rate": 6.18219539958671e-05, "loss": 1.959, "step": 306080 }, { "epoch": 1.1650540867672023, "grad_norm": 0.14829407632350922, "learning_rate": 6.177441121419363e-05, "loss": 1.9554, "step": 306090 }, { "epoch": 1.165092149235325, "grad_norm": 0.19108377397060394, "learning_rate": 6.172687358984314e-05, "loss": 1.9553, "step": 306100 }, { "epoch": 1.1651302117034477, "grad_norm": 0.1724005788564682, "learning_rate": 6.167934112113766e-05, "loss": 1.95, "step": 306110 }, { "epoch": 1.1651682741715703, "grad_norm": 0.22939327359199524, "learning_rate": 6.16318138064001e-05, "loss": 1.9561, "step": 306120 }, { "epoch": 1.165206336639693, "grad_norm": 0.18707819283008575, "learning_rate": 6.158429164395429e-05, "loss": 1.9672, "step": 306130 }, { "epoch": 1.1652443991078156, "grad_norm": 0.17966648936271667, "learning_rate": 6.153677463212493e-05, "loss": 1.9533, "step": 306140 }, { "epoch": 1.1652824615759385, "grad_norm": 0.18556983768939972, "learning_rate": 6.148926276923772e-05, "loss": 1.9532, "step": 306150 }, { "epoch": 1.1653205240440612, "grad_norm": 0.18378737568855286, "learning_rate": 6.144175605361912e-05, "loss": 1.9693, "step": 306160 }, { "epoch": 1.1653585865121838, "grad_norm": 0.18640461564064026, "learning_rate": 6.139425448359665e-05, "loss": 1.9565, "step": 306170 }, { "epoch": 1.1653966489803065, "grad_norm": 0.1846592277288437, "learning_rate": 6.134675805749856e-05, "loss": 1.9603, "step": 306180 }, { "epoch": 1.1654347114484291, "grad_norm": 0.1734076589345932, "learning_rate": 6.129926677365427e-05, "loss": 1.9705, "step": 306190 }, { "epoch": 1.1654727739165518, "grad_norm": 0.19703339040279388, "learning_rate": 6.125178063039388e-05, "loss": 1.9509, "step": 306200 }, { "epoch": 1.1655108363846745, "grad_norm": 0.1685038059949875, "learning_rate": 6.120429962604845e-05, "loss": 1.9646, "step": 306210 }, { "epoch": 1.1655488988527973, "grad_norm": 0.16940629482269287, "learning_rate": 6.115682375894994e-05, "loss": 1.9422, "step": 306220 }, { "epoch": 1.16558696132092, "grad_norm": 0.24949117004871368, "learning_rate": 6.110935302743121e-05, "loss": 1.9393, "step": 306230 }, { "epoch": 1.1656250237890426, "grad_norm": 0.17381447553634644, "learning_rate": 6.106188742982605e-05, "loss": 1.9629, "step": 306240 }, { "epoch": 1.1656630862571653, "grad_norm": 0.18676424026489258, "learning_rate": 6.101442696446918e-05, "loss": 1.9591, "step": 306250 }, { "epoch": 1.165701148725288, "grad_norm": 0.1781821846961975, "learning_rate": 6.0966971629696167e-05, "loss": 1.9586, "step": 306260 }, { "epoch": 1.1657392111934106, "grad_norm": 0.19924336671829224, "learning_rate": 6.091952142384344e-05, "loss": 1.9532, "step": 306270 }, { "epoch": 1.1657772736615333, "grad_norm": 0.213816300034523, "learning_rate": 6.087207634524844e-05, "loss": 1.9674, "step": 306280 }, { "epoch": 1.165815336129656, "grad_norm": 0.19950461387634277, "learning_rate": 6.0824636392249335e-05, "loss": 1.9465, "step": 306290 }, { "epoch": 1.1658533985977786, "grad_norm": 0.1716541051864624, "learning_rate": 6.077720156318539e-05, "loss": 1.9426, "step": 306300 }, { "epoch": 1.1658914610659012, "grad_norm": 0.22820167243480682, "learning_rate": 6.072977185639666e-05, "loss": 1.9545, "step": 306310 }, { "epoch": 1.1659295235340241, "grad_norm": 0.27457278966903687, "learning_rate": 6.068234727022409e-05, "loss": 1.9686, "step": 306320 }, { "epoch": 1.1659675860021468, "grad_norm": 0.18272149562835693, "learning_rate": 6.06349278030095e-05, "loss": 1.9673, "step": 306330 }, { "epoch": 1.1660056484702694, "grad_norm": 0.5270007252693176, "learning_rate": 6.058751345309571e-05, "loss": 1.9778, "step": 306340 }, { "epoch": 1.166043710938392, "grad_norm": 0.17469589412212372, "learning_rate": 6.0540104218826385e-05, "loss": 1.9526, "step": 306350 }, { "epoch": 1.1660817734065148, "grad_norm": 0.21892161667346954, "learning_rate": 6.049270009854596e-05, "loss": 1.9649, "step": 306360 }, { "epoch": 1.1661198358746374, "grad_norm": 0.24794617295265198, "learning_rate": 6.044530109059992e-05, "loss": 1.9562, "step": 306370 }, { "epoch": 1.16615789834276, "grad_norm": 0.17378780245780945, "learning_rate": 6.039790719333465e-05, "loss": 1.9506, "step": 306380 }, { "epoch": 1.1661959608108827, "grad_norm": 0.20277273654937744, "learning_rate": 6.035051840509731e-05, "loss": 1.9633, "step": 306390 }, { "epoch": 1.1662340232790056, "grad_norm": 0.23824036121368408, "learning_rate": 6.030313472423604e-05, "loss": 1.957, "step": 306400 }, { "epoch": 1.1662720857471283, "grad_norm": 0.20040448009967804, "learning_rate": 6.025575614909978e-05, "loss": 1.9453, "step": 306410 }, { "epoch": 1.166310148215251, "grad_norm": 0.16171133518218994, "learning_rate": 6.0208382678038465e-05, "loss": 1.9657, "step": 306420 }, { "epoch": 1.1663482106833736, "grad_norm": 0.2363443672657013, "learning_rate": 6.0161014309402854e-05, "loss": 1.9467, "step": 306430 }, { "epoch": 1.1663862731514962, "grad_norm": 0.22468698024749756, "learning_rate": 6.0113651041544594e-05, "loss": 1.9753, "step": 306440 }, { "epoch": 1.166424335619619, "grad_norm": 0.17263805866241455, "learning_rate": 6.006629287281629e-05, "loss": 1.9626, "step": 306450 }, { "epoch": 1.1664623980877415, "grad_norm": 0.1878524273633957, "learning_rate": 6.00189398015713e-05, "loss": 1.9435, "step": 306460 }, { "epoch": 1.1665004605558642, "grad_norm": 0.24534186720848083, "learning_rate": 5.9971591826164016e-05, "loss": 1.9672, "step": 306470 }, { "epoch": 1.1665385230239869, "grad_norm": 0.16337037086486816, "learning_rate": 5.992424894494963e-05, "loss": 1.9647, "step": 306480 }, { "epoch": 1.1665765854921097, "grad_norm": 0.1678982526063919, "learning_rate": 5.9876911156284185e-05, "loss": 1.9514, "step": 306490 }, { "epoch": 1.1666146479602324, "grad_norm": 0.1978108137845993, "learning_rate": 5.982957845852477e-05, "loss": 1.9553, "step": 306500 }, { "epoch": 1.166652710428355, "grad_norm": 0.2141515016555786, "learning_rate": 5.978225085002914e-05, "loss": 1.9429, "step": 306510 }, { "epoch": 1.1666907728964777, "grad_norm": 0.19807757437229156, "learning_rate": 5.9734928329156115e-05, "loss": 1.9559, "step": 306520 }, { "epoch": 1.1667288353646004, "grad_norm": 0.23704470694065094, "learning_rate": 5.968761089426522e-05, "loss": 1.9582, "step": 306530 }, { "epoch": 1.166766897832723, "grad_norm": 0.2034177929162979, "learning_rate": 5.9640298543717056e-05, "loss": 1.9695, "step": 306540 }, { "epoch": 1.1668049603008457, "grad_norm": 0.24106188118457794, "learning_rate": 5.9592991275872924e-05, "loss": 1.9601, "step": 306550 }, { "epoch": 1.1668430227689683, "grad_norm": 0.8812771439552307, "learning_rate": 5.954568908909519e-05, "loss": 1.9633, "step": 306560 }, { "epoch": 1.1668810852370912, "grad_norm": 0.18400143086910248, "learning_rate": 5.9498391981746946e-05, "loss": 1.9466, "step": 306570 }, { "epoch": 1.1669191477052139, "grad_norm": 0.2550337016582489, "learning_rate": 5.9451099952192225e-05, "loss": 1.9706, "step": 306580 }, { "epoch": 1.1669572101733365, "grad_norm": 0.2880820333957672, "learning_rate": 5.940381299879594e-05, "loss": 1.9525, "step": 306590 }, { "epoch": 1.1669952726414592, "grad_norm": 0.2965538203716278, "learning_rate": 5.935653111992384e-05, "loss": 1.9653, "step": 306600 }, { "epoch": 1.1670333351095818, "grad_norm": 0.19963648915290833, "learning_rate": 5.9309254313942576e-05, "loss": 1.9417, "step": 306610 }, { "epoch": 1.1670713975777045, "grad_norm": 0.17213891446590424, "learning_rate": 5.926198257921972e-05, "loss": 1.9497, "step": 306620 }, { "epoch": 1.1671094600458272, "grad_norm": 0.18678686022758484, "learning_rate": 5.92147159141237e-05, "loss": 1.9553, "step": 306630 }, { "epoch": 1.1671475225139498, "grad_norm": 0.1734098196029663, "learning_rate": 5.916745431702375e-05, "loss": 1.966, "step": 306640 }, { "epoch": 1.1671855849820725, "grad_norm": 1.143925428390503, "learning_rate": 5.9120197786290075e-05, "loss": 1.9547, "step": 306650 }, { "epoch": 1.1672236474501954, "grad_norm": 0.19502100348472595, "learning_rate": 5.9072946320293644e-05, "loss": 1.9582, "step": 306660 }, { "epoch": 1.167261709918318, "grad_norm": 0.2442169040441513, "learning_rate": 5.902569991740636e-05, "loss": 1.956, "step": 306670 }, { "epoch": 1.1672997723864407, "grad_norm": 0.16955025494098663, "learning_rate": 5.8978458576001096e-05, "loss": 1.9544, "step": 306680 }, { "epoch": 1.1673378348545633, "grad_norm": 0.19239042699337006, "learning_rate": 5.8931222294451356e-05, "loss": 1.9573, "step": 306690 }, { "epoch": 1.167375897322686, "grad_norm": 0.17494860291481018, "learning_rate": 5.888399107113179e-05, "loss": 1.9526, "step": 306700 }, { "epoch": 1.1674139597908086, "grad_norm": 0.16352291405200958, "learning_rate": 5.883676490441775e-05, "loss": 1.9569, "step": 306710 }, { "epoch": 1.1674520222589313, "grad_norm": 0.17652276158332825, "learning_rate": 5.878954379268542e-05, "loss": 1.9541, "step": 306720 }, { "epoch": 1.167490084727054, "grad_norm": 0.17422421276569366, "learning_rate": 5.874232773431199e-05, "loss": 1.9595, "step": 306730 }, { "epoch": 1.1675281471951768, "grad_norm": 0.16758720576763153, "learning_rate": 5.869511672767541e-05, "loss": 1.9633, "step": 306740 }, { "epoch": 1.1675662096632995, "grad_norm": 0.2105366438627243, "learning_rate": 5.8647910771154664e-05, "loss": 1.9614, "step": 306750 }, { "epoch": 1.1676042721314221, "grad_norm": 0.1798621416091919, "learning_rate": 5.860070986312932e-05, "loss": 1.9383, "step": 306760 }, { "epoch": 1.1676423345995448, "grad_norm": 0.1724506914615631, "learning_rate": 5.855351400198011e-05, "loss": 1.9671, "step": 306770 }, { "epoch": 1.1676803970676675, "grad_norm": 0.1844310313463211, "learning_rate": 5.85063231860884e-05, "loss": 1.9546, "step": 306780 }, { "epoch": 1.1677184595357901, "grad_norm": 0.22345036268234253, "learning_rate": 5.845913741383646e-05, "loss": 1.9597, "step": 306790 }, { "epoch": 1.1677565220039128, "grad_norm": 0.20319147408008575, "learning_rate": 5.841195668360766e-05, "loss": 1.9604, "step": 306800 }, { "epoch": 1.1677945844720354, "grad_norm": 0.19100074470043182, "learning_rate": 5.836478099378595e-05, "loss": 1.9528, "step": 306810 }, { "epoch": 1.167832646940158, "grad_norm": 0.1862342208623886, "learning_rate": 5.831761034275623e-05, "loss": 1.9621, "step": 306820 }, { "epoch": 1.1678707094082807, "grad_norm": 0.16333100199699402, "learning_rate": 5.827044472890431e-05, "loss": 1.9546, "step": 306830 }, { "epoch": 1.1679087718764036, "grad_norm": 0.25692513585090637, "learning_rate": 5.822328415061684e-05, "loss": 1.9577, "step": 306840 }, { "epoch": 1.1679468343445263, "grad_norm": 0.19265873730182648, "learning_rate": 5.8176128606281244e-05, "loss": 1.9613, "step": 306850 }, { "epoch": 1.167984896812649, "grad_norm": 0.19025906920433044, "learning_rate": 5.8128978094285967e-05, "loss": 1.9544, "step": 306860 }, { "epoch": 1.1680229592807716, "grad_norm": 0.1604529321193695, "learning_rate": 5.8081832613020246e-05, "loss": 1.9609, "step": 306870 }, { "epoch": 1.1680610217488943, "grad_norm": 0.1947263926267624, "learning_rate": 5.8034692160874095e-05, "loss": 1.9538, "step": 306880 }, { "epoch": 1.168099084217017, "grad_norm": 0.18386346101760864, "learning_rate": 5.798755673623851e-05, "loss": 1.9615, "step": 306890 }, { "epoch": 1.1681371466851396, "grad_norm": 0.1659373790025711, "learning_rate": 5.7940426337505234e-05, "loss": 1.9615, "step": 306900 }, { "epoch": 1.1681752091532625, "grad_norm": 0.18153296411037445, "learning_rate": 5.789330096306688e-05, "loss": 1.962, "step": 306910 }, { "epoch": 1.168213271621385, "grad_norm": 0.15782102942466736, "learning_rate": 5.7846180611317136e-05, "loss": 1.9554, "step": 306920 }, { "epoch": 1.1682513340895078, "grad_norm": 0.235306054353714, "learning_rate": 5.779906528065021e-05, "loss": 1.9783, "step": 306930 }, { "epoch": 1.1682893965576304, "grad_norm": 0.24597300589084625, "learning_rate": 5.775195496946134e-05, "loss": 1.9586, "step": 306940 }, { "epoch": 1.168327459025753, "grad_norm": 0.18952709436416626, "learning_rate": 5.77048496761467e-05, "loss": 1.9446, "step": 306950 }, { "epoch": 1.1683655214938757, "grad_norm": 0.1808987855911255, "learning_rate": 5.765774939910312e-05, "loss": 1.9617, "step": 306960 }, { "epoch": 1.1684035839619984, "grad_norm": 0.21182473003864288, "learning_rate": 5.761065413672839e-05, "loss": 1.964, "step": 306970 }, { "epoch": 1.168441646430121, "grad_norm": 0.19097618758678436, "learning_rate": 5.756356388742123e-05, "loss": 1.9718, "step": 306980 }, { "epoch": 1.1684797088982437, "grad_norm": 0.19493243098258972, "learning_rate": 5.751647864958104e-05, "loss": 1.9593, "step": 306990 }, { "epoch": 1.1685177713663664, "grad_norm": 0.2880631685256958, "learning_rate": 5.74693984216082e-05, "loss": 1.9523, "step": 307000 }, { "epoch": 1.1685558338344892, "grad_norm": 0.21112750470638275, "learning_rate": 5.7422323201903934e-05, "loss": 1.974, "step": 307010 }, { "epoch": 1.168593896302612, "grad_norm": 0.29016757011413574, "learning_rate": 5.7375252988870195e-05, "loss": 1.9517, "step": 307020 }, { "epoch": 1.1686319587707346, "grad_norm": 0.1702297329902649, "learning_rate": 5.7328187780909926e-05, "loss": 1.9525, "step": 307030 }, { "epoch": 1.1686700212388572, "grad_norm": 0.26800990104675293, "learning_rate": 5.7281127576426897e-05, "loss": 1.9656, "step": 307040 }, { "epoch": 1.1687080837069799, "grad_norm": 0.1691807359457016, "learning_rate": 5.7234072373825663e-05, "loss": 1.9621, "step": 307050 }, { "epoch": 1.1687461461751025, "grad_norm": 0.17145681381225586, "learning_rate": 5.7187022171511615e-05, "loss": 1.9594, "step": 307060 }, { "epoch": 1.1687842086432252, "grad_norm": 0.18351706862449646, "learning_rate": 5.713997696789114e-05, "loss": 1.9485, "step": 307070 }, { "epoch": 1.168822271111348, "grad_norm": 0.20628760755062103, "learning_rate": 5.709293676137128e-05, "loss": 1.9634, "step": 307080 }, { "epoch": 1.1688603335794707, "grad_norm": 0.26568111777305603, "learning_rate": 5.704590155036005e-05, "loss": 1.9536, "step": 307090 }, { "epoch": 1.1688983960475934, "grad_norm": 0.1946471780538559, "learning_rate": 5.699887133326625e-05, "loss": 1.9555, "step": 307100 }, { "epoch": 1.168936458515716, "grad_norm": 0.1880960464477539, "learning_rate": 5.695184610849957e-05, "loss": 1.9573, "step": 307110 }, { "epoch": 1.1689745209838387, "grad_norm": 0.20076966285705566, "learning_rate": 5.690482587447049e-05, "loss": 1.9584, "step": 307120 }, { "epoch": 1.1690125834519614, "grad_norm": 0.21330180764198303, "learning_rate": 5.685781062959039e-05, "loss": 1.9517, "step": 307130 }, { "epoch": 1.169050645920084, "grad_norm": 0.18850235641002655, "learning_rate": 5.681080037227143e-05, "loss": 1.9545, "step": 307140 }, { "epoch": 1.1690887083882067, "grad_norm": 0.17607633769512177, "learning_rate": 5.6763795100926666e-05, "loss": 1.9583, "step": 307150 }, { "epoch": 1.1691267708563293, "grad_norm": 0.16168035566806793, "learning_rate": 5.671679481396996e-05, "loss": 1.964, "step": 307160 }, { "epoch": 1.169164833324452, "grad_norm": 0.16363447904586792, "learning_rate": 5.66697995098161e-05, "loss": 1.9628, "step": 307170 }, { "epoch": 1.1692028957925749, "grad_norm": 0.20630410313606262, "learning_rate": 5.662280918688056e-05, "loss": 1.9481, "step": 307180 }, { "epoch": 1.1692409582606975, "grad_norm": 0.18285779654979706, "learning_rate": 5.657582384357979e-05, "loss": 1.9669, "step": 307190 }, { "epoch": 1.1692790207288202, "grad_norm": 0.20902836322784424, "learning_rate": 5.652884347833098e-05, "loss": 1.9534, "step": 307200 }, { "epoch": 1.1693170831969428, "grad_norm": 0.214891254901886, "learning_rate": 5.6481868089552244e-05, "loss": 1.9623, "step": 307210 }, { "epoch": 1.1693551456650655, "grad_norm": 0.20295549929141998, "learning_rate": 5.643489767566251e-05, "loss": 1.9576, "step": 307220 }, { "epoch": 1.1693932081331881, "grad_norm": 0.22516366839408875, "learning_rate": 5.638793223508148e-05, "loss": 1.956, "step": 307230 }, { "epoch": 1.1694312706013108, "grad_norm": 0.22936879098415375, "learning_rate": 5.634097176622977e-05, "loss": 1.9409, "step": 307240 }, { "epoch": 1.1694693330694335, "grad_norm": 0.2069147825241089, "learning_rate": 5.629401626752878e-05, "loss": 1.9515, "step": 307250 }, { "epoch": 1.1695073955375563, "grad_norm": 0.19395551085472107, "learning_rate": 5.6247065737400846e-05, "loss": 1.9583, "step": 307260 }, { "epoch": 1.169545458005679, "grad_norm": 0.18069936335086823, "learning_rate": 5.6200120174268896e-05, "loss": 1.9577, "step": 307270 }, { "epoch": 1.1695835204738017, "grad_norm": 0.17336305975914001, "learning_rate": 5.615317957655708e-05, "loss": 1.9519, "step": 307280 }, { "epoch": 1.1696215829419243, "grad_norm": 0.179020956158638, "learning_rate": 5.610624394268998e-05, "loss": 1.9509, "step": 307290 }, { "epoch": 1.169659645410047, "grad_norm": 0.1873224973678589, "learning_rate": 5.605931327109326e-05, "loss": 1.9532, "step": 307300 }, { "epoch": 1.1696977078781696, "grad_norm": 0.18882738053798676, "learning_rate": 5.601238756019328e-05, "loss": 1.9581, "step": 307310 }, { "epoch": 1.1697357703462923, "grad_norm": 0.233427032828331, "learning_rate": 5.596546680841741e-05, "loss": 1.9634, "step": 307320 }, { "epoch": 1.169773832814415, "grad_norm": 0.17924568057060242, "learning_rate": 5.591855101419363e-05, "loss": 1.9435, "step": 307330 }, { "epoch": 1.1698118952825376, "grad_norm": 0.21077688038349152, "learning_rate": 5.587164017595092e-05, "loss": 1.9613, "step": 307340 }, { "epoch": 1.1698499577506605, "grad_norm": 0.16858820617198944, "learning_rate": 5.582473429211904e-05, "loss": 1.9592, "step": 307350 }, { "epoch": 1.1698880202187831, "grad_norm": 0.23580597341060638, "learning_rate": 5.577783336112846e-05, "loss": 1.9504, "step": 307360 }, { "epoch": 1.1699260826869058, "grad_norm": 0.2029874324798584, "learning_rate": 5.5730937381410716e-05, "loss": 1.948, "step": 307370 }, { "epoch": 1.1699641451550284, "grad_norm": 0.16511982679367065, "learning_rate": 5.568404635139801e-05, "loss": 1.9501, "step": 307380 }, { "epoch": 1.170002207623151, "grad_norm": 0.19717973470687866, "learning_rate": 5.5637160269523255e-05, "loss": 1.9478, "step": 307390 }, { "epoch": 1.1700402700912738, "grad_norm": 0.1704729199409485, "learning_rate": 5.559027913422055e-05, "loss": 1.9724, "step": 307400 }, { "epoch": 1.1700783325593964, "grad_norm": 0.19611027836799622, "learning_rate": 5.554340294392452e-05, "loss": 1.965, "step": 307410 }, { "epoch": 1.170116395027519, "grad_norm": 0.21575294435024261, "learning_rate": 5.549653169707064e-05, "loss": 1.9538, "step": 307420 }, { "epoch": 1.170154457495642, "grad_norm": 0.178140789270401, "learning_rate": 5.5449665392095335e-05, "loss": 1.9614, "step": 307430 }, { "epoch": 1.1701925199637646, "grad_norm": 0.17497879266738892, "learning_rate": 5.5402804027435806e-05, "loss": 1.9578, "step": 307440 }, { "epoch": 1.1702305824318873, "grad_norm": 0.25354599952697754, "learning_rate": 5.535594760153001e-05, "loss": 1.9485, "step": 307450 }, { "epoch": 1.17026864490001, "grad_norm": 0.18424321711063385, "learning_rate": 5.530909611281682e-05, "loss": 1.9586, "step": 307460 }, { "epoch": 1.1703067073681326, "grad_norm": 0.1908847540616989, "learning_rate": 5.526224955973585e-05, "loss": 1.9467, "step": 307470 }, { "epoch": 1.1703447698362552, "grad_norm": 0.18996630609035492, "learning_rate": 5.521540794072766e-05, "loss": 1.9628, "step": 307480 }, { "epoch": 1.170382832304378, "grad_norm": 0.2502085566520691, "learning_rate": 5.5168571254233415e-05, "loss": 1.9529, "step": 307490 }, { "epoch": 1.1704208947725006, "grad_norm": 0.17994414269924164, "learning_rate": 5.512173949869537e-05, "loss": 1.9512, "step": 307500 }, { "epoch": 1.1704589572406232, "grad_norm": 0.1656319797039032, "learning_rate": 5.5074912672556374e-05, "loss": 1.9551, "step": 307510 }, { "epoch": 1.170497019708746, "grad_norm": 0.18443243205547333, "learning_rate": 5.502809077426019e-05, "loss": 1.9529, "step": 307520 }, { "epoch": 1.1705350821768687, "grad_norm": 0.18963739275932312, "learning_rate": 5.498127380225149e-05, "loss": 1.9496, "step": 307530 }, { "epoch": 1.1705731446449914, "grad_norm": 0.22023898363113403, "learning_rate": 5.4934461754975517e-05, "loss": 1.9699, "step": 307540 }, { "epoch": 1.170611207113114, "grad_norm": 0.17199300229549408, "learning_rate": 5.488765463087864e-05, "loss": 1.9604, "step": 307550 }, { "epoch": 1.1706492695812367, "grad_norm": 0.1808360069990158, "learning_rate": 5.484085242840775e-05, "loss": 1.9503, "step": 307560 }, { "epoch": 1.1706873320493594, "grad_norm": 0.15934909880161285, "learning_rate": 5.479405514601077e-05, "loss": 1.9497, "step": 307570 }, { "epoch": 1.170725394517482, "grad_norm": 0.17372657358646393, "learning_rate": 5.4747262782136264e-05, "loss": 1.9602, "step": 307580 }, { "epoch": 1.1707634569856047, "grad_norm": 0.17633219063282013, "learning_rate": 5.470047533523387e-05, "loss": 1.9561, "step": 307590 }, { "epoch": 1.1708015194537276, "grad_norm": 0.2467532753944397, "learning_rate": 5.4653692803753765e-05, "loss": 1.9538, "step": 307600 }, { "epoch": 1.1708395819218502, "grad_norm": 0.19640734791755676, "learning_rate": 5.460691518614708e-05, "loss": 1.9673, "step": 307610 }, { "epoch": 1.1708776443899729, "grad_norm": 0.20919203758239746, "learning_rate": 5.4560142480865715e-05, "loss": 1.9555, "step": 307620 }, { "epoch": 1.1709157068580955, "grad_norm": 0.20885075628757477, "learning_rate": 5.451337468636241e-05, "loss": 1.9728, "step": 307630 }, { "epoch": 1.1709537693262182, "grad_norm": 0.183942511677742, "learning_rate": 5.4466611801090695e-05, "loss": 1.9515, "step": 307640 }, { "epoch": 1.1709918317943409, "grad_norm": 0.1857999712228775, "learning_rate": 5.4419853823505e-05, "loss": 1.9585, "step": 307650 }, { "epoch": 1.1710298942624635, "grad_norm": 0.17464017868041992, "learning_rate": 5.437310075206037e-05, "loss": 1.9645, "step": 307660 }, { "epoch": 1.1710679567305862, "grad_norm": 0.2077675610780716, "learning_rate": 5.4326352585212845e-05, "loss": 1.9431, "step": 307670 }, { "epoch": 1.1711060191987088, "grad_norm": 0.16267137229442596, "learning_rate": 5.4279609321419234e-05, "loss": 1.9511, "step": 307680 }, { "epoch": 1.1711440816668315, "grad_norm": 0.1626701056957245, "learning_rate": 5.423287095913709e-05, "loss": 1.9489, "step": 307690 }, { "epoch": 1.1711821441349544, "grad_norm": 0.16942808032035828, "learning_rate": 5.418613749682477e-05, "loss": 1.9686, "step": 307700 }, { "epoch": 1.171220206603077, "grad_norm": 0.2459169328212738, "learning_rate": 5.413940893294161e-05, "loss": 1.9626, "step": 307710 }, { "epoch": 1.1712582690711997, "grad_norm": 0.175147145986557, "learning_rate": 5.409268526594752e-05, "loss": 1.9528, "step": 307720 }, { "epoch": 1.1712963315393223, "grad_norm": 0.18634077906608582, "learning_rate": 5.404596649430338e-05, "loss": 1.9408, "step": 307730 }, { "epoch": 1.171334394007445, "grad_norm": 0.2112835794687271, "learning_rate": 5.399925261647076e-05, "loss": 1.9431, "step": 307740 }, { "epoch": 1.1713724564755676, "grad_norm": 0.21302802860736847, "learning_rate": 5.395254363091218e-05, "loss": 1.9582, "step": 307750 }, { "epoch": 1.1714105189436903, "grad_norm": 0.18202129006385803, "learning_rate": 5.390583953609074e-05, "loss": 1.9528, "step": 307760 }, { "epoch": 1.1714485814118132, "grad_norm": 0.17398393154144287, "learning_rate": 5.385914033047068e-05, "loss": 1.9627, "step": 307770 }, { "epoch": 1.1714866438799358, "grad_norm": 0.16768161952495575, "learning_rate": 5.3812446012516666e-05, "loss": 1.9476, "step": 307780 }, { "epoch": 1.1715247063480585, "grad_norm": 0.3693370819091797, "learning_rate": 5.376575658069449e-05, "loss": 1.9576, "step": 307790 }, { "epoch": 1.1715627688161812, "grad_norm": 0.18076057732105255, "learning_rate": 5.371907203347054e-05, "loss": 1.9466, "step": 307800 }, { "epoch": 1.1716008312843038, "grad_norm": 0.28043287992477417, "learning_rate": 5.3672392369312094e-05, "loss": 1.961, "step": 307810 }, { "epoch": 1.1716388937524265, "grad_norm": 0.17701946198940277, "learning_rate": 5.362571758668711e-05, "loss": 1.9509, "step": 307820 }, { "epoch": 1.1716769562205491, "grad_norm": 0.17819836735725403, "learning_rate": 5.357904768406457e-05, "loss": 1.9577, "step": 307830 }, { "epoch": 1.1717150186886718, "grad_norm": 0.19525788724422455, "learning_rate": 5.353238265991417e-05, "loss": 1.9572, "step": 307840 }, { "epoch": 1.1717530811567944, "grad_norm": 0.2147541493177414, "learning_rate": 5.3485722512706226e-05, "loss": 1.9744, "step": 307850 }, { "epoch": 1.171791143624917, "grad_norm": 0.18522879481315613, "learning_rate": 5.343906724091208e-05, "loss": 1.938, "step": 307860 }, { "epoch": 1.17182920609304, "grad_norm": 0.1954905092716217, "learning_rate": 5.339241684300378e-05, "loss": 1.9528, "step": 307870 }, { "epoch": 1.1718672685611626, "grad_norm": 0.29039400815963745, "learning_rate": 5.3345771317454105e-05, "loss": 1.9531, "step": 307880 }, { "epoch": 1.1719053310292853, "grad_norm": 0.23559370636940002, "learning_rate": 5.329913066273684e-05, "loss": 1.957, "step": 307890 }, { "epoch": 1.171943393497408, "grad_norm": 0.1852443665266037, "learning_rate": 5.325249487732631e-05, "loss": 1.955, "step": 307900 }, { "epoch": 1.1719814559655306, "grad_norm": 0.2008378654718399, "learning_rate": 5.3205863959697894e-05, "loss": 1.9596, "step": 307910 }, { "epoch": 1.1720195184336533, "grad_norm": 0.17911696434020996, "learning_rate": 5.3159237908327496e-05, "loss": 1.9486, "step": 307920 }, { "epoch": 1.172057580901776, "grad_norm": 0.18760210275650024, "learning_rate": 5.311261672169204e-05, "loss": 1.9505, "step": 307930 }, { "epoch": 1.1720956433698988, "grad_norm": 0.1668512225151062, "learning_rate": 5.306600039826909e-05, "loss": 1.9575, "step": 307940 }, { "epoch": 1.1721337058380215, "grad_norm": 0.17411920428276062, "learning_rate": 5.3019388936537126e-05, "loss": 1.9437, "step": 307950 }, { "epoch": 1.1721717683061441, "grad_norm": 0.18603424727916718, "learning_rate": 5.297278233497532e-05, "loss": 1.97, "step": 307960 }, { "epoch": 1.1722098307742668, "grad_norm": 0.1917838305234909, "learning_rate": 5.2926180592063725e-05, "loss": 1.9572, "step": 307970 }, { "epoch": 1.1722478932423894, "grad_norm": 0.16847239434719086, "learning_rate": 5.287958370628315e-05, "loss": 1.9621, "step": 307980 }, { "epoch": 1.172285955710512, "grad_norm": 0.1744316965341568, "learning_rate": 5.2832991676115105e-05, "loss": 1.9345, "step": 307990 }, { "epoch": 1.1723240181786347, "grad_norm": 0.18387804925441742, "learning_rate": 5.2786404500042076e-05, "loss": 1.9495, "step": 308000 }, { "epoch": 1.1723620806467574, "grad_norm": 0.18154680728912354, "learning_rate": 5.273982217654716e-05, "loss": 1.9527, "step": 308010 }, { "epoch": 1.17240014311488, "grad_norm": 0.18909908831119537, "learning_rate": 5.269324470411435e-05, "loss": 1.9495, "step": 308020 }, { "epoch": 1.1724382055830027, "grad_norm": 0.16833332180976868, "learning_rate": 5.264667208122842e-05, "loss": 1.9584, "step": 308030 }, { "epoch": 1.1724762680511256, "grad_norm": 0.21265791356563568, "learning_rate": 5.2600104306374855e-05, "loss": 1.9436, "step": 308040 }, { "epoch": 1.1725143305192482, "grad_norm": 0.21704767644405365, "learning_rate": 5.255354137804002e-05, "loss": 1.9588, "step": 308050 }, { "epoch": 1.172552392987371, "grad_norm": 0.17958103120326996, "learning_rate": 5.2506983294711044e-05, "loss": 1.9394, "step": 308060 }, { "epoch": 1.1725904554554936, "grad_norm": 0.15790031850337982, "learning_rate": 5.246043005487583e-05, "loss": 1.9654, "step": 308070 }, { "epoch": 1.1726285179236162, "grad_norm": 0.1757218837738037, "learning_rate": 5.24138816570231e-05, "loss": 1.9596, "step": 308080 }, { "epoch": 1.1726665803917389, "grad_norm": 0.1712648719549179, "learning_rate": 5.2367338099642225e-05, "loss": 1.9405, "step": 308090 }, { "epoch": 1.1727046428598615, "grad_norm": 0.18772435188293457, "learning_rate": 5.232079938122358e-05, "loss": 1.9544, "step": 308100 }, { "epoch": 1.1727427053279842, "grad_norm": 0.1672012358903885, "learning_rate": 5.227426550025815e-05, "loss": 1.9627, "step": 308110 }, { "epoch": 1.172780767796107, "grad_norm": 0.17659921944141388, "learning_rate": 5.222773645523776e-05, "loss": 1.9638, "step": 308120 }, { "epoch": 1.1728188302642297, "grad_norm": 0.18108125030994415, "learning_rate": 5.218121224465505e-05, "loss": 1.9514, "step": 308130 }, { "epoch": 1.1728568927323524, "grad_norm": 0.18479257822036743, "learning_rate": 5.2134692867003455e-05, "loss": 1.9452, "step": 308140 }, { "epoch": 1.172894955200475, "grad_norm": 0.19881558418273926, "learning_rate": 5.2088178320777056e-05, "loss": 1.9725, "step": 308150 }, { "epoch": 1.1729330176685977, "grad_norm": 0.2701951265335083, "learning_rate": 5.2041668604470906e-05, "loss": 1.9486, "step": 308160 }, { "epoch": 1.1729710801367204, "grad_norm": 0.16124291718006134, "learning_rate": 5.19951637165807e-05, "loss": 1.9452, "step": 308170 }, { "epoch": 1.173009142604843, "grad_norm": 0.17446215450763702, "learning_rate": 5.194866365560297e-05, "loss": 1.9543, "step": 308180 }, { "epoch": 1.1730472050729657, "grad_norm": 0.18593032658100128, "learning_rate": 5.190216842003498e-05, "loss": 1.9547, "step": 308190 }, { "epoch": 1.1730852675410883, "grad_norm": 0.215800940990448, "learning_rate": 5.185567800837493e-05, "loss": 1.9664, "step": 308200 }, { "epoch": 1.1731233300092112, "grad_norm": 0.16304409503936768, "learning_rate": 5.1809192419121575e-05, "loss": 1.963, "step": 308210 }, { "epoch": 1.1731613924773339, "grad_norm": 0.18333649635314941, "learning_rate": 5.176271165077456e-05, "loss": 1.9536, "step": 308220 }, { "epoch": 1.1731994549454565, "grad_norm": 0.1928211748600006, "learning_rate": 5.171623570183437e-05, "loss": 1.9775, "step": 308230 }, { "epoch": 1.1732375174135792, "grad_norm": 0.22613413631916046, "learning_rate": 5.166976457080208e-05, "loss": 1.9617, "step": 308240 }, { "epoch": 1.1732755798817018, "grad_norm": 0.1687794327735901, "learning_rate": 5.162329825617984e-05, "loss": 1.9512, "step": 308250 }, { "epoch": 1.1733136423498245, "grad_norm": 0.16178713738918304, "learning_rate": 5.157683675647023e-05, "loss": 1.9437, "step": 308260 }, { "epoch": 1.1733517048179471, "grad_norm": 0.2514328062534332, "learning_rate": 5.1530380070176906e-05, "loss": 1.9582, "step": 308270 }, { "epoch": 1.1733897672860698, "grad_norm": 0.20378462970256805, "learning_rate": 5.148392819580405e-05, "loss": 1.962, "step": 308280 }, { "epoch": 1.1734278297541927, "grad_norm": 0.19982340931892395, "learning_rate": 5.143748113185681e-05, "loss": 1.9499, "step": 308290 }, { "epoch": 1.1734658922223153, "grad_norm": 0.19612081348896027, "learning_rate": 5.139103887684099e-05, "loss": 1.9618, "step": 308300 }, { "epoch": 1.173503954690438, "grad_norm": 0.26761767268180847, "learning_rate": 5.1344601429263225e-05, "loss": 1.9622, "step": 308310 }, { "epoch": 1.1735420171585607, "grad_norm": 0.34418386220932007, "learning_rate": 5.129816878763094e-05, "loss": 1.9521, "step": 308320 }, { "epoch": 1.1735800796266833, "grad_norm": 0.19532065093517303, "learning_rate": 5.125174095045226e-05, "loss": 1.9598, "step": 308330 }, { "epoch": 1.173618142094806, "grad_norm": 0.16967004537582397, "learning_rate": 5.120531791623617e-05, "loss": 1.9729, "step": 308340 }, { "epoch": 1.1736562045629286, "grad_norm": 0.20884713530540466, "learning_rate": 5.11588996834923e-05, "loss": 1.9408, "step": 308350 }, { "epoch": 1.1736942670310513, "grad_norm": 0.19751541316509247, "learning_rate": 5.111248625073117e-05, "loss": 1.9677, "step": 308360 }, { "epoch": 1.173732329499174, "grad_norm": 0.2124282270669937, "learning_rate": 5.106607761646403e-05, "loss": 1.9533, "step": 308370 }, { "epoch": 1.1737703919672968, "grad_norm": 0.16762572526931763, "learning_rate": 5.101967377920297e-05, "loss": 1.9353, "step": 308380 }, { "epoch": 1.1738084544354195, "grad_norm": 0.1747930943965912, "learning_rate": 5.097327473746066e-05, "loss": 1.9539, "step": 308390 }, { "epoch": 1.1738465169035421, "grad_norm": 0.274648517370224, "learning_rate": 5.0926880489750697e-05, "loss": 1.9545, "step": 308400 }, { "epoch": 1.1738845793716648, "grad_norm": 0.16737380623817444, "learning_rate": 5.0880491034587415e-05, "loss": 1.9434, "step": 308410 }, { "epoch": 1.1739226418397875, "grad_norm": 0.24273249506950378, "learning_rate": 5.083410637048585e-05, "loss": 1.9465, "step": 308420 }, { "epoch": 1.17396070430791, "grad_norm": 0.22172129154205322, "learning_rate": 5.0787726495961975e-05, "loss": 1.9555, "step": 308430 }, { "epoch": 1.1739987667760328, "grad_norm": 0.20090153813362122, "learning_rate": 5.0741351409532286e-05, "loss": 1.9495, "step": 308440 }, { "epoch": 1.1740368292441554, "grad_norm": 0.2024862915277481, "learning_rate": 5.069498110971427e-05, "loss": 1.9506, "step": 308450 }, { "epoch": 1.1740748917122783, "grad_norm": 0.19691678881645203, "learning_rate": 5.064861559502604e-05, "loss": 1.9438, "step": 308460 }, { "epoch": 1.174112954180401, "grad_norm": 0.16210220754146576, "learning_rate": 5.060225486398651e-05, "loss": 1.9568, "step": 308470 }, { "epoch": 1.1741510166485236, "grad_norm": 0.1802566796541214, "learning_rate": 5.0555898915115365e-05, "loss": 1.9371, "step": 308480 }, { "epoch": 1.1741890791166463, "grad_norm": 0.1668899655342102, "learning_rate": 5.050954774693306e-05, "loss": 1.9509, "step": 308490 }, { "epoch": 1.174227141584769, "grad_norm": 0.23647716641426086, "learning_rate": 5.046320135796073e-05, "loss": 1.9583, "step": 308500 }, { "epoch": 1.1742652040528916, "grad_norm": 0.17502707242965698, "learning_rate": 5.04168597467205e-05, "loss": 1.9474, "step": 308510 }, { "epoch": 1.1743032665210142, "grad_norm": 0.1797052025794983, "learning_rate": 5.0370522911734936e-05, "loss": 1.9501, "step": 308520 }, { "epoch": 1.174341328989137, "grad_norm": 0.1689724177122116, "learning_rate": 5.032419085152767e-05, "loss": 1.9597, "step": 308530 }, { "epoch": 1.1743793914572596, "grad_norm": 0.20890824496746063, "learning_rate": 5.0277863564622826e-05, "loss": 1.9407, "step": 308540 }, { "epoch": 1.1744174539253822, "grad_norm": 0.17047648131847382, "learning_rate": 5.0231541049545526e-05, "loss": 1.9345, "step": 308550 }, { "epoch": 1.174455516393505, "grad_norm": 0.19897052645683289, "learning_rate": 5.018522330482145e-05, "loss": 1.9541, "step": 308560 }, { "epoch": 1.1744935788616278, "grad_norm": 0.18051151931285858, "learning_rate": 5.013891032897722e-05, "loss": 1.9606, "step": 308570 }, { "epoch": 1.1745316413297504, "grad_norm": 0.1798298954963684, "learning_rate": 5.009260212054007e-05, "loss": 1.9582, "step": 308580 }, { "epoch": 1.174569703797873, "grad_norm": 0.22572393715381622, "learning_rate": 5.004629867803806e-05, "loss": 1.9626, "step": 308590 }, { "epoch": 1.1746077662659957, "grad_norm": 0.2541802227497101, "learning_rate": 4.999999999999999e-05, "loss": 1.9604, "step": 308600 }, { "epoch": 1.1746458287341184, "grad_norm": 0.16402609646320343, "learning_rate": 4.995370608495542e-05, "loss": 1.9521, "step": 308610 }, { "epoch": 1.174683891202241, "grad_norm": 0.19087038934230804, "learning_rate": 4.9907416931434745e-05, "loss": 1.9573, "step": 308620 }, { "epoch": 1.174721953670364, "grad_norm": 0.18596763908863068, "learning_rate": 4.9861132537968924e-05, "loss": 1.9596, "step": 308630 }, { "epoch": 1.1747600161384866, "grad_norm": 0.1944790929555893, "learning_rate": 4.981485290308979e-05, "loss": 1.9552, "step": 308640 }, { "epoch": 1.1747980786066092, "grad_norm": 0.18553079664707184, "learning_rate": 4.976857802533002e-05, "loss": 1.9522, "step": 308650 }, { "epoch": 1.1748361410747319, "grad_norm": 0.18692132830619812, "learning_rate": 4.9722307903222906e-05, "loss": 1.9618, "step": 308660 }, { "epoch": 1.1748742035428545, "grad_norm": 0.1631677895784378, "learning_rate": 4.96760425353025e-05, "loss": 1.941, "step": 308670 }, { "epoch": 1.1749122660109772, "grad_norm": 0.1892775148153305, "learning_rate": 4.96297819201037e-05, "loss": 1.9443, "step": 308680 }, { "epoch": 1.1749503284790999, "grad_norm": 0.2765691876411438, "learning_rate": 4.958352605616212e-05, "loss": 1.9605, "step": 308690 }, { "epoch": 1.1749883909472225, "grad_norm": 0.2970679998397827, "learning_rate": 4.9537274942014045e-05, "loss": 1.9544, "step": 308700 }, { "epoch": 1.1750264534153452, "grad_norm": 0.16758288443088531, "learning_rate": 4.949102857619658e-05, "loss": 1.9628, "step": 308710 }, { "epoch": 1.1750645158834678, "grad_norm": 0.1633490025997162, "learning_rate": 4.944478695724758e-05, "loss": 1.949, "step": 308720 }, { "epoch": 1.1751025783515907, "grad_norm": 0.19200217723846436, "learning_rate": 4.9398550083705695e-05, "loss": 1.9588, "step": 308730 }, { "epoch": 1.1751406408197134, "grad_norm": 0.18912263214588165, "learning_rate": 4.935231795411027e-05, "loss": 1.9559, "step": 308740 }, { "epoch": 1.175178703287836, "grad_norm": 0.23634260892868042, "learning_rate": 4.930609056700136e-05, "loss": 1.9593, "step": 308750 }, { "epoch": 1.1752167657559587, "grad_norm": 0.16199201345443726, "learning_rate": 4.9259867920919796e-05, "loss": 1.9608, "step": 308760 }, { "epoch": 1.1752548282240813, "grad_norm": 0.19153495132923126, "learning_rate": 4.9213650014407254e-05, "loss": 1.9599, "step": 308770 }, { "epoch": 1.175292890692204, "grad_norm": 0.20420241355895996, "learning_rate": 4.9167436846005996e-05, "loss": 1.9383, "step": 308780 }, { "epoch": 1.1753309531603267, "grad_norm": 0.16810394823551178, "learning_rate": 4.9121228414259145e-05, "loss": 1.949, "step": 308790 }, { "epoch": 1.1753690156284495, "grad_norm": 0.16946613788604736, "learning_rate": 4.907502471771058e-05, "loss": 1.9318, "step": 308800 }, { "epoch": 1.1754070780965722, "grad_norm": 0.20083031058311462, "learning_rate": 4.90288257549048e-05, "loss": 1.9548, "step": 308810 }, { "epoch": 1.1754451405646948, "grad_norm": 0.16079193353652954, "learning_rate": 4.8982631524387255e-05, "loss": 1.9533, "step": 308820 }, { "epoch": 1.1754832030328175, "grad_norm": 0.21811576187610626, "learning_rate": 4.8936442024703875e-05, "loss": 1.9406, "step": 308830 }, { "epoch": 1.1755212655009402, "grad_norm": 0.16867418587207794, "learning_rate": 4.8890257254401607e-05, "loss": 1.9484, "step": 308840 }, { "epoch": 1.1755593279690628, "grad_norm": 0.1909358650445938, "learning_rate": 4.8844077212027884e-05, "loss": 1.9523, "step": 308850 }, { "epoch": 1.1755973904371855, "grad_norm": 0.1731940060853958, "learning_rate": 4.879790189613109e-05, "loss": 1.9482, "step": 308860 }, { "epoch": 1.1756354529053081, "grad_norm": 0.16265356540679932, "learning_rate": 4.875173130526028e-05, "loss": 1.9485, "step": 308870 }, { "epoch": 1.1756735153734308, "grad_norm": 0.23693326115608215, "learning_rate": 4.8705565437965224e-05, "loss": 1.9544, "step": 308880 }, { "epoch": 1.1757115778415534, "grad_norm": 0.18974530696868896, "learning_rate": 4.865940429279647e-05, "loss": 1.9717, "step": 308890 }, { "epoch": 1.1757496403096763, "grad_norm": 0.22193016111850739, "learning_rate": 4.861324786830529e-05, "loss": 1.945, "step": 308900 }, { "epoch": 1.175787702777799, "grad_norm": 0.19164617359638214, "learning_rate": 4.8567096163043665e-05, "loss": 1.9572, "step": 308910 }, { "epoch": 1.1758257652459216, "grad_norm": 0.18589046597480774, "learning_rate": 4.8520949175564376e-05, "loss": 1.9483, "step": 308920 }, { "epoch": 1.1758638277140443, "grad_norm": 0.18081121146678925, "learning_rate": 4.847480690442091e-05, "loss": 1.9562, "step": 308930 }, { "epoch": 1.175901890182167, "grad_norm": 0.21181713044643402, "learning_rate": 4.842866934816747e-05, "loss": 1.9296, "step": 308940 }, { "epoch": 1.1759399526502896, "grad_norm": 0.18270161747932434, "learning_rate": 4.8382536505359065e-05, "loss": 1.947, "step": 308950 }, { "epoch": 1.1759780151184123, "grad_norm": 0.24431847035884857, "learning_rate": 4.8336408374551446e-05, "loss": 1.9363, "step": 308960 }, { "epoch": 1.176016077586535, "grad_norm": 0.16476482152938843, "learning_rate": 4.829028495430088e-05, "loss": 1.9545, "step": 308970 }, { "epoch": 1.1760541400546578, "grad_norm": 0.16913557052612305, "learning_rate": 4.82441662431648e-05, "loss": 1.9538, "step": 308980 }, { "epoch": 1.1760922025227805, "grad_norm": 0.23285558819770813, "learning_rate": 4.8198052239700916e-05, "loss": 1.962, "step": 308990 }, { "epoch": 1.1761302649909031, "grad_norm": 0.20442961156368256, "learning_rate": 4.815194294246805e-05, "loss": 1.9402, "step": 309000 }, { "epoch": 1.1761683274590258, "grad_norm": 0.2592919170856476, "learning_rate": 4.810583835002547e-05, "loss": 1.9595, "step": 309010 }, { "epoch": 1.1762063899271484, "grad_norm": 0.2001858800649643, "learning_rate": 4.805973846093331e-05, "loss": 1.9597, "step": 309020 }, { "epoch": 1.176244452395271, "grad_norm": 0.2916547656059265, "learning_rate": 4.801364327375246e-05, "loss": 1.9454, "step": 309030 }, { "epoch": 1.1762825148633937, "grad_norm": 0.20770905911922455, "learning_rate": 4.796755278704456e-05, "loss": 1.9377, "step": 309040 }, { "epoch": 1.1763205773315164, "grad_norm": 0.17296180129051208, "learning_rate": 4.792146699937189e-05, "loss": 1.9505, "step": 309050 }, { "epoch": 1.176358639799639, "grad_norm": 0.19396503269672394, "learning_rate": 4.787538590929752e-05, "loss": 1.9531, "step": 309060 }, { "epoch": 1.176396702267762, "grad_norm": 0.17859074473381042, "learning_rate": 4.782930951538516e-05, "loss": 1.9766, "step": 309070 }, { "epoch": 1.1764347647358846, "grad_norm": 0.20435374975204468, "learning_rate": 4.778323781619948e-05, "loss": 1.9643, "step": 309080 }, { "epoch": 1.1764728272040073, "grad_norm": 0.20311976969242096, "learning_rate": 4.77371708103056e-05, "loss": 1.9517, "step": 309090 }, { "epoch": 1.17651088967213, "grad_norm": 0.1713547557592392, "learning_rate": 4.769110849626962e-05, "loss": 1.9468, "step": 309100 }, { "epoch": 1.1765489521402526, "grad_norm": 0.23550809919834137, "learning_rate": 4.764505087265819e-05, "loss": 1.9523, "step": 309110 }, { "epoch": 1.1765870146083752, "grad_norm": 0.26078513264656067, "learning_rate": 4.759899793803874e-05, "loss": 1.9644, "step": 309120 }, { "epoch": 1.1766250770764979, "grad_norm": 0.23422762751579285, "learning_rate": 4.755294969097951e-05, "loss": 1.9594, "step": 309130 }, { "epoch": 1.1766631395446205, "grad_norm": 0.1606295108795166, "learning_rate": 4.750690613004932e-05, "loss": 1.9612, "step": 309140 }, { "epoch": 1.1767012020127434, "grad_norm": 0.18316294252872467, "learning_rate": 4.746086725381782e-05, "loss": 1.9563, "step": 309150 }, { "epoch": 1.176739264480866, "grad_norm": 0.17885041236877441, "learning_rate": 4.741483306085548e-05, "loss": 1.9468, "step": 309160 }, { "epoch": 1.1767773269489887, "grad_norm": 0.18115143477916718, "learning_rate": 4.736880354973322e-05, "loss": 1.9709, "step": 309170 }, { "epoch": 1.1768153894171114, "grad_norm": 0.21183575689792633, "learning_rate": 4.7322778719022965e-05, "loss": 1.9343, "step": 309180 }, { "epoch": 1.176853451885234, "grad_norm": 0.16498693823814392, "learning_rate": 4.7276758567297185e-05, "loss": 1.9554, "step": 309190 }, { "epoch": 1.1768915143533567, "grad_norm": 0.16676482558250427, "learning_rate": 4.723074309312919e-05, "loss": 1.9722, "step": 309200 }, { "epoch": 1.1769295768214794, "grad_norm": 0.18812629580497742, "learning_rate": 4.7184732295092906e-05, "loss": 1.9486, "step": 309210 }, { "epoch": 1.176967639289602, "grad_norm": 0.18773570656776428, "learning_rate": 4.7138726171763124e-05, "loss": 1.9511, "step": 309220 }, { "epoch": 1.1770057017577247, "grad_norm": 0.1896013617515564, "learning_rate": 4.709272472171527e-05, "loss": 1.9579, "step": 309230 }, { "epoch": 1.1770437642258476, "grad_norm": 0.23378810286521912, "learning_rate": 4.7046727943525435e-05, "loss": 1.9618, "step": 309240 }, { "epoch": 1.1770818266939702, "grad_norm": 0.19067463278770447, "learning_rate": 4.700073583577058e-05, "loss": 1.9452, "step": 309250 }, { "epoch": 1.1771198891620929, "grad_norm": 0.35932278633117676, "learning_rate": 4.695474839702823e-05, "loss": 1.9587, "step": 309260 }, { "epoch": 1.1771579516302155, "grad_norm": 0.2065085619688034, "learning_rate": 4.6908765625876756e-05, "loss": 1.9413, "step": 309270 }, { "epoch": 1.1771960140983382, "grad_norm": 0.1663128137588501, "learning_rate": 4.6862787520895236e-05, "loss": 1.9552, "step": 309280 }, { "epoch": 1.1772340765664608, "grad_norm": 0.1832168996334076, "learning_rate": 4.681681408066341e-05, "loss": 1.9498, "step": 309290 }, { "epoch": 1.1772721390345835, "grad_norm": 0.1732766032218933, "learning_rate": 4.677084530376174e-05, "loss": 1.9633, "step": 309300 }, { "epoch": 1.1773102015027062, "grad_norm": 0.17328311502933502, "learning_rate": 4.6724881188771496e-05, "loss": 1.9516, "step": 309310 }, { "epoch": 1.177348263970829, "grad_norm": 0.21596384048461914, "learning_rate": 4.6678921734274514e-05, "loss": 1.9495, "step": 309320 }, { "epoch": 1.1773863264389517, "grad_norm": 0.16711944341659546, "learning_rate": 4.663296693885355e-05, "loss": 1.9413, "step": 309330 }, { "epoch": 1.1774243889070743, "grad_norm": 0.18340061604976654, "learning_rate": 4.6587016801091896e-05, "loss": 1.9523, "step": 309340 }, { "epoch": 1.177462451375197, "grad_norm": 0.25791123509407043, "learning_rate": 4.654107131957363e-05, "loss": 1.9586, "step": 309350 }, { "epoch": 1.1775005138433197, "grad_norm": 0.2621697187423706, "learning_rate": 4.649513049288362e-05, "loss": 1.9548, "step": 309360 }, { "epoch": 1.1775385763114423, "grad_norm": 0.2004096806049347, "learning_rate": 4.6449194319607316e-05, "loss": 1.9617, "step": 309370 }, { "epoch": 1.177576638779565, "grad_norm": 0.20095586776733398, "learning_rate": 4.640326279833096e-05, "loss": 1.9636, "step": 309380 }, { "epoch": 1.1776147012476876, "grad_norm": 0.16082298755645752, "learning_rate": 4.635733592764152e-05, "loss": 1.9451, "step": 309390 }, { "epoch": 1.1776527637158103, "grad_norm": 0.17891064286231995, "learning_rate": 4.631141370612668e-05, "loss": 1.9619, "step": 309400 }, { "epoch": 1.177690826183933, "grad_norm": 0.19131295382976532, "learning_rate": 4.62654961323748e-05, "loss": 1.9525, "step": 309410 }, { "epoch": 1.1777288886520558, "grad_norm": 0.21167497336864471, "learning_rate": 4.621958320497493e-05, "loss": 1.931, "step": 309420 }, { "epoch": 1.1777669511201785, "grad_norm": 0.15909339487552643, "learning_rate": 4.617367492251695e-05, "loss": 1.9569, "step": 309430 }, { "epoch": 1.1778050135883011, "grad_norm": 0.1648852676153183, "learning_rate": 4.61277712835913e-05, "loss": 1.9531, "step": 309440 }, { "epoch": 1.1778430760564238, "grad_norm": 0.17828480899333954, "learning_rate": 4.608187228678928e-05, "loss": 1.961, "step": 309450 }, { "epoch": 1.1778811385245465, "grad_norm": 0.21379193663597107, "learning_rate": 4.60359779307028e-05, "loss": 1.9505, "step": 309460 }, { "epoch": 1.1779192009926691, "grad_norm": 0.16374193131923676, "learning_rate": 4.599008821392459e-05, "loss": 1.9543, "step": 309470 }, { "epoch": 1.1779572634607918, "grad_norm": 0.18641433119773865, "learning_rate": 4.594420313504788e-05, "loss": 1.9334, "step": 309480 }, { "epoch": 1.1779953259289146, "grad_norm": 0.16827502846717834, "learning_rate": 4.589832269266686e-05, "loss": 1.9557, "step": 309490 }, { "epoch": 1.1780333883970373, "grad_norm": 0.17855706810951233, "learning_rate": 4.5852446885376266e-05, "loss": 1.9442, "step": 309500 }, { "epoch": 1.17807145086516, "grad_norm": 0.16724631190299988, "learning_rate": 4.5806575711771606e-05, "loss": 1.941, "step": 309510 }, { "epoch": 1.1781095133332826, "grad_norm": 0.17224138975143433, "learning_rate": 4.576070917044906e-05, "loss": 1.9512, "step": 309520 }, { "epoch": 1.1781475758014053, "grad_norm": 0.16810820996761322, "learning_rate": 4.571484726000563e-05, "loss": 1.946, "step": 309530 }, { "epoch": 1.178185638269528, "grad_norm": 0.24897336959838867, "learning_rate": 4.56689899790389e-05, "loss": 1.9474, "step": 309540 }, { "epoch": 1.1782237007376506, "grad_norm": 0.20070362091064453, "learning_rate": 4.5623137326147144e-05, "loss": 1.9635, "step": 309550 }, { "epoch": 1.1782617632057732, "grad_norm": 0.1960253268480301, "learning_rate": 4.557728929992949e-05, "loss": 1.9475, "step": 309560 }, { "epoch": 1.178299825673896, "grad_norm": 0.20311059057712555, "learning_rate": 4.5531445898985556e-05, "loss": 1.9517, "step": 309570 }, { "epoch": 1.1783378881420186, "grad_norm": 0.19430354237556458, "learning_rate": 4.548560712191596e-05, "loss": 1.9538, "step": 309580 }, { "epoch": 1.1783759506101414, "grad_norm": 0.24194493889808655, "learning_rate": 4.543977296732177e-05, "loss": 1.9608, "step": 309590 }, { "epoch": 1.178414013078264, "grad_norm": 0.22881710529327393, "learning_rate": 4.539394343380482e-05, "loss": 1.9712, "step": 309600 }, { "epoch": 1.1784520755463868, "grad_norm": 0.16793493926525116, "learning_rate": 4.534811851996773e-05, "loss": 1.9553, "step": 309610 }, { "epoch": 1.1784901380145094, "grad_norm": 0.1682312786579132, "learning_rate": 4.530229822441373e-05, "loss": 1.9439, "step": 309620 }, { "epoch": 1.178528200482632, "grad_norm": 0.2150745689868927, "learning_rate": 4.5256482545746825e-05, "loss": 1.9562, "step": 309630 }, { "epoch": 1.1785662629507547, "grad_norm": 0.16091136634349823, "learning_rate": 4.521067148257168e-05, "loss": 1.9522, "step": 309640 }, { "epoch": 1.1786043254188774, "grad_norm": 0.20136001706123352, "learning_rate": 4.5164865033493705e-05, "loss": 1.9651, "step": 309650 }, { "epoch": 1.1786423878870003, "grad_norm": 0.21942678093910217, "learning_rate": 4.511906319711895e-05, "loss": 1.9523, "step": 309660 }, { "epoch": 1.178680450355123, "grad_norm": 0.1780799776315689, "learning_rate": 4.507326597205419e-05, "loss": 1.9447, "step": 309670 }, { "epoch": 1.1787185128232456, "grad_norm": 0.17522697150707245, "learning_rate": 4.502747335690699e-05, "loss": 1.9513, "step": 309680 }, { "epoch": 1.1787565752913682, "grad_norm": 0.19580373167991638, "learning_rate": 4.498168535028541e-05, "loss": 1.9591, "step": 309690 }, { "epoch": 1.178794637759491, "grad_norm": 0.16261278092861176, "learning_rate": 4.493590195079844e-05, "loss": 1.9678, "step": 309700 }, { "epoch": 1.1788327002276136, "grad_norm": 0.17700830101966858, "learning_rate": 4.48901231570556e-05, "loss": 1.9639, "step": 309710 }, { "epoch": 1.1788707626957362, "grad_norm": 0.16604109108448029, "learning_rate": 4.4844348967667256e-05, "loss": 1.9421, "step": 309720 }, { "epoch": 1.1789088251638589, "grad_norm": 0.18127460777759552, "learning_rate": 4.479857938124432e-05, "loss": 1.955, "step": 309730 }, { "epoch": 1.1789468876319815, "grad_norm": 0.17123474180698395, "learning_rate": 4.47528143963985e-05, "loss": 1.9516, "step": 309740 }, { "epoch": 1.1789849501001042, "grad_norm": 0.21618498861789703, "learning_rate": 4.470705401174213e-05, "loss": 1.948, "step": 309750 }, { "epoch": 1.179023012568227, "grad_norm": 0.24029800295829773, "learning_rate": 4.466129822588838e-05, "loss": 1.9471, "step": 309760 }, { "epoch": 1.1790610750363497, "grad_norm": 0.1724097579717636, "learning_rate": 4.461554703745102e-05, "loss": 1.968, "step": 309770 }, { "epoch": 1.1790991375044724, "grad_norm": 0.18432177603244781, "learning_rate": 4.456980044504444e-05, "loss": 1.9507, "step": 309780 }, { "epoch": 1.179137199972595, "grad_norm": 0.22263087332248688, "learning_rate": 4.45240584472838e-05, "loss": 1.9441, "step": 309790 }, { "epoch": 1.1791752624407177, "grad_norm": 0.19197730720043182, "learning_rate": 4.447832104278504e-05, "loss": 1.9578, "step": 309800 }, { "epoch": 1.1792133249088403, "grad_norm": 0.17583264410495758, "learning_rate": 4.4432588230164716e-05, "loss": 1.9589, "step": 309810 }, { "epoch": 1.179251387376963, "grad_norm": 0.20924118161201477, "learning_rate": 4.438686000803999e-05, "loss": 1.9488, "step": 309820 }, { "epoch": 1.1792894498450857, "grad_norm": 0.17755398154258728, "learning_rate": 4.43411363750289e-05, "loss": 1.9513, "step": 309830 }, { "epoch": 1.1793275123132085, "grad_norm": 0.16246619820594788, "learning_rate": 4.4295417329750065e-05, "loss": 1.9466, "step": 309840 }, { "epoch": 1.1793655747813312, "grad_norm": 0.16991542279720306, "learning_rate": 4.424970287082275e-05, "loss": 1.9484, "step": 309850 }, { "epoch": 1.1794036372494539, "grad_norm": 0.19759690761566162, "learning_rate": 4.420399299686706e-05, "loss": 1.9442, "step": 309860 }, { "epoch": 1.1794416997175765, "grad_norm": 0.17393594980239868, "learning_rate": 4.4158287706503654e-05, "loss": 1.9552, "step": 309870 }, { "epoch": 1.1794797621856992, "grad_norm": 0.19019247591495514, "learning_rate": 4.411258699835402e-05, "loss": 1.9498, "step": 309880 }, { "epoch": 1.1795178246538218, "grad_norm": 0.16410709917545319, "learning_rate": 4.4066890871040154e-05, "loss": 1.9563, "step": 309890 }, { "epoch": 1.1795558871219445, "grad_norm": 0.25346609950065613, "learning_rate": 4.4021199323184935e-05, "loss": 1.9568, "step": 309900 }, { "epoch": 1.1795939495900671, "grad_norm": 0.17545926570892334, "learning_rate": 4.3975512353411736e-05, "loss": 1.9559, "step": 309910 }, { "epoch": 1.1796320120581898, "grad_norm": 0.16444379091262817, "learning_rate": 4.3929829960344834e-05, "loss": 1.9442, "step": 309920 }, { "epoch": 1.1796700745263127, "grad_norm": 0.21553288400173187, "learning_rate": 4.388415214260899e-05, "loss": 1.9604, "step": 309930 }, { "epoch": 1.1797081369944353, "grad_norm": 0.19073207676410675, "learning_rate": 4.38384788988298e-05, "loss": 1.9669, "step": 309940 }, { "epoch": 1.179746199462558, "grad_norm": 0.19403015077114105, "learning_rate": 4.379281022763354e-05, "loss": 1.9532, "step": 309950 }, { "epoch": 1.1797842619306806, "grad_norm": 0.20546835660934448, "learning_rate": 4.374714612764702e-05, "loss": 1.9527, "step": 309960 }, { "epoch": 1.1798223243988033, "grad_norm": 0.17869624495506287, "learning_rate": 4.370148659749795e-05, "loss": 1.9535, "step": 309970 }, { "epoch": 1.179860386866926, "grad_norm": 0.2714109718799591, "learning_rate": 4.3655831635814604e-05, "loss": 1.9581, "step": 309980 }, { "epoch": 1.1798984493350486, "grad_norm": 0.1904449611902237, "learning_rate": 4.36101812412259e-05, "loss": 1.9384, "step": 309990 }, { "epoch": 1.1799365118031713, "grad_norm": 0.2278692126274109, "learning_rate": 4.356453541236155e-05, "loss": 1.9505, "step": 310000 }, { "epoch": 1.1799745742712942, "grad_norm": 0.2904459834098816, "learning_rate": 4.351889414785193e-05, "loss": 1.9501, "step": 310010 }, { "epoch": 1.1800126367394168, "grad_norm": 0.16368386149406433, "learning_rate": 4.347325744632807e-05, "loss": 1.9456, "step": 310020 }, { "epoch": 1.1800506992075395, "grad_norm": 0.19859318435192108, "learning_rate": 4.3427625306421635e-05, "loss": 1.9482, "step": 310030 }, { "epoch": 1.1800887616756621, "grad_norm": 0.254438579082489, "learning_rate": 4.338199772676504e-05, "loss": 1.9418, "step": 310040 }, { "epoch": 1.1801268241437848, "grad_norm": 0.16224907338619232, "learning_rate": 4.333637470599144e-05, "loss": 1.9517, "step": 310050 }, { "epoch": 1.1801648866119074, "grad_norm": 0.18530027568340302, "learning_rate": 4.329075624273454e-05, "loss": 1.947, "step": 310060 }, { "epoch": 1.18020294908003, "grad_norm": 0.18948470056056976, "learning_rate": 4.324514233562876e-05, "loss": 1.939, "step": 310070 }, { "epoch": 1.1802410115481528, "grad_norm": 0.17915013432502747, "learning_rate": 4.319953298330937e-05, "loss": 1.9549, "step": 310080 }, { "epoch": 1.1802790740162754, "grad_norm": 0.17633748054504395, "learning_rate": 4.315392818441205e-05, "loss": 1.9545, "step": 310090 }, { "epoch": 1.1803171364843983, "grad_norm": 0.1659269481897354, "learning_rate": 4.310832793757335e-05, "loss": 1.9408, "step": 310100 }, { "epoch": 1.180355198952521, "grad_norm": 0.20943781733512878, "learning_rate": 4.306273224143048e-05, "loss": 1.9491, "step": 310110 }, { "epoch": 1.1803932614206436, "grad_norm": 0.200185164809227, "learning_rate": 4.301714109462118e-05, "loss": 1.95, "step": 310120 }, { "epoch": 1.1804313238887663, "grad_norm": 0.19608174264431, "learning_rate": 4.297155449578416e-05, "loss": 1.9572, "step": 310130 }, { "epoch": 1.180469386356889, "grad_norm": 0.1803928017616272, "learning_rate": 4.2925972443558444e-05, "loss": 1.955, "step": 310140 }, { "epoch": 1.1805074488250116, "grad_norm": 0.19274504482746124, "learning_rate": 4.2880394936584076e-05, "loss": 1.9401, "step": 310150 }, { "epoch": 1.1805455112931342, "grad_norm": 0.18078354001045227, "learning_rate": 4.283482197350158e-05, "loss": 1.9405, "step": 310160 }, { "epoch": 1.1805835737612569, "grad_norm": 0.1925884485244751, "learning_rate": 4.278925355295216e-05, "loss": 1.939, "step": 310170 }, { "epoch": 1.1806216362293798, "grad_norm": 0.301413893699646, "learning_rate": 4.2743689673577776e-05, "loss": 1.9515, "step": 310180 }, { "epoch": 1.1806596986975024, "grad_norm": 0.2065173089504242, "learning_rate": 4.269813033402103e-05, "loss": 1.949, "step": 310190 }, { "epoch": 1.180697761165625, "grad_norm": 0.17483308911323547, "learning_rate": 4.265257553292523e-05, "loss": 1.9521, "step": 310200 }, { "epoch": 1.1807358236337477, "grad_norm": 0.17191389203071594, "learning_rate": 4.2607025268934284e-05, "loss": 1.9715, "step": 310210 }, { "epoch": 1.1807738861018704, "grad_norm": 0.21063725650310516, "learning_rate": 4.2561479540692835e-05, "loss": 1.9425, "step": 310220 }, { "epoch": 1.180811948569993, "grad_norm": 0.19254496693611145, "learning_rate": 4.25159383468462e-05, "loss": 1.9571, "step": 310230 }, { "epoch": 1.1808500110381157, "grad_norm": 0.17268456518650055, "learning_rate": 4.247040168604027e-05, "loss": 1.9453, "step": 310240 }, { "epoch": 1.1808880735062384, "grad_norm": 0.17198698222637177, "learning_rate": 4.242486955692182e-05, "loss": 1.9469, "step": 310250 }, { "epoch": 1.180926135974361, "grad_norm": 0.24686148762702942, "learning_rate": 4.237934195813814e-05, "loss": 1.9493, "step": 310260 }, { "epoch": 1.1809641984424837, "grad_norm": 0.16179026663303375, "learning_rate": 4.233381888833721e-05, "loss": 1.9527, "step": 310270 }, { "epoch": 1.1810022609106066, "grad_norm": 0.20546956360340118, "learning_rate": 4.228830034616771e-05, "loss": 1.9437, "step": 310280 }, { "epoch": 1.1810403233787292, "grad_norm": 0.18097162246704102, "learning_rate": 4.2242786330278946e-05, "loss": 1.9525, "step": 310290 }, { "epoch": 1.1810783858468519, "grad_norm": 0.19285722076892853, "learning_rate": 4.2197276839320954e-05, "loss": 1.9466, "step": 310300 }, { "epoch": 1.1811164483149745, "grad_norm": 0.18017497658729553, "learning_rate": 4.215177187194446e-05, "loss": 1.9542, "step": 310310 }, { "epoch": 1.1811545107830972, "grad_norm": 0.1642260104417801, "learning_rate": 4.210627142680073e-05, "loss": 1.943, "step": 310320 }, { "epoch": 1.1811925732512198, "grad_norm": 0.19965514540672302, "learning_rate": 4.206077550254189e-05, "loss": 1.9523, "step": 310330 }, { "epoch": 1.1812306357193425, "grad_norm": 0.16829894483089447, "learning_rate": 4.201528409782057e-05, "loss": 1.9378, "step": 310340 }, { "epoch": 1.1812686981874654, "grad_norm": 0.21287326514720917, "learning_rate": 4.1969797211290184e-05, "loss": 1.9502, "step": 310350 }, { "epoch": 1.181306760655588, "grad_norm": 0.18998737633228302, "learning_rate": 4.192431484160464e-05, "loss": 1.9561, "step": 310360 }, { "epoch": 1.1813448231237107, "grad_norm": 0.32120609283447266, "learning_rate": 4.187883698741879e-05, "loss": 1.964, "step": 310370 }, { "epoch": 1.1813828855918334, "grad_norm": 0.19673456251621246, "learning_rate": 4.1833363647387936e-05, "loss": 1.9589, "step": 310380 }, { "epoch": 1.181420948059956, "grad_norm": 0.26058077812194824, "learning_rate": 4.1787894820168095e-05, "loss": 1.9454, "step": 310390 }, { "epoch": 1.1814590105280787, "grad_norm": 0.20470654964447021, "learning_rate": 4.1742430504416005e-05, "loss": 1.9355, "step": 310400 }, { "epoch": 1.1814970729962013, "grad_norm": 0.1853208690881729, "learning_rate": 4.1696970698789024e-05, "loss": 1.9549, "step": 310410 }, { "epoch": 1.181535135464324, "grad_norm": 0.2150898277759552, "learning_rate": 4.165151540194512e-05, "loss": 1.9366, "step": 310420 }, { "epoch": 1.1815731979324466, "grad_norm": 0.16874505579471588, "learning_rate": 4.160606461254313e-05, "loss": 1.9421, "step": 310430 }, { "epoch": 1.1816112604005693, "grad_norm": 0.29402124881744385, "learning_rate": 4.1560618329242295e-05, "loss": 1.9467, "step": 310440 }, { "epoch": 1.1816493228686922, "grad_norm": 0.17367145419120789, "learning_rate": 4.151517655070269e-05, "loss": 1.9415, "step": 310450 }, { "epoch": 1.1816873853368148, "grad_norm": 0.23331019282341003, "learning_rate": 4.1469739275585e-05, "loss": 1.9572, "step": 310460 }, { "epoch": 1.1817254478049375, "grad_norm": 0.19143813848495483, "learning_rate": 4.142430650255058e-05, "loss": 1.9506, "step": 310470 }, { "epoch": 1.1817635102730601, "grad_norm": 0.18256507813930511, "learning_rate": 4.1378878230261486e-05, "loss": 1.9535, "step": 310480 }, { "epoch": 1.1818015727411828, "grad_norm": 0.17464636266231537, "learning_rate": 4.133345445738035e-05, "loss": 1.9631, "step": 310490 }, { "epoch": 1.1818396352093055, "grad_norm": 0.23930378258228302, "learning_rate": 4.1288035182570516e-05, "loss": 1.955, "step": 310500 }, { "epoch": 1.1818776976774281, "grad_norm": 0.251952201128006, "learning_rate": 4.1242620404496e-05, "loss": 1.9445, "step": 310510 }, { "epoch": 1.181915760145551, "grad_norm": 0.17035020887851715, "learning_rate": 4.119721012182154e-05, "loss": 1.954, "step": 310520 }, { "epoch": 1.1819538226136737, "grad_norm": 0.17202967405319214, "learning_rate": 4.115180433321236e-05, "loss": 1.9375, "step": 310530 }, { "epoch": 1.1819918850817963, "grad_norm": 0.17133724689483643, "learning_rate": 4.1106403037334425e-05, "loss": 1.9619, "step": 310540 }, { "epoch": 1.182029947549919, "grad_norm": 0.21714834868907928, "learning_rate": 4.106100623285452e-05, "loss": 1.9432, "step": 310550 }, { "epoch": 1.1820680100180416, "grad_norm": 0.2697466015815735, "learning_rate": 4.1015613918439834e-05, "loss": 1.9442, "step": 310560 }, { "epoch": 1.1821060724861643, "grad_norm": 0.22082401812076569, "learning_rate": 4.097022609275841e-05, "loss": 1.9457, "step": 310570 }, { "epoch": 1.182144134954287, "grad_norm": 0.1688980758190155, "learning_rate": 4.092484275447883e-05, "loss": 1.9488, "step": 310580 }, { "epoch": 1.1821821974224096, "grad_norm": 0.17043808102607727, "learning_rate": 4.087946390227038e-05, "loss": 1.9441, "step": 310590 }, { "epoch": 1.1822202598905323, "grad_norm": 0.20406900346279144, "learning_rate": 4.083408953480294e-05, "loss": 1.9499, "step": 310600 }, { "epoch": 1.182258322358655, "grad_norm": 0.16481229662895203, "learning_rate": 4.078871965074726e-05, "loss": 1.9488, "step": 310610 }, { "epoch": 1.1822963848267778, "grad_norm": 0.15880829095840454, "learning_rate": 4.074335424877446e-05, "loss": 1.9451, "step": 310620 }, { "epoch": 1.1823344472949004, "grad_norm": 0.22673694789409637, "learning_rate": 4.069799332755653e-05, "loss": 1.9517, "step": 310630 }, { "epoch": 1.182372509763023, "grad_norm": 0.18625514209270477, "learning_rate": 4.0652636885765924e-05, "loss": 1.9469, "step": 310640 }, { "epoch": 1.1824105722311458, "grad_norm": 0.16471987962722778, "learning_rate": 4.060728492207599e-05, "loss": 1.946, "step": 310650 }, { "epoch": 1.1824486346992684, "grad_norm": 0.20095032453536987, "learning_rate": 4.056193743516051e-05, "loss": 1.9425, "step": 310660 }, { "epoch": 1.182486697167391, "grad_norm": 0.17171847820281982, "learning_rate": 4.0516594423694094e-05, "loss": 1.9447, "step": 310670 }, { "epoch": 1.1825247596355137, "grad_norm": 0.17001047730445862, "learning_rate": 4.047125588635192e-05, "loss": 1.9569, "step": 310680 }, { "epoch": 1.1825628221036366, "grad_norm": 0.21695543825626373, "learning_rate": 4.042592182180976e-05, "loss": 1.9419, "step": 310690 }, { "epoch": 1.1826008845717593, "grad_norm": 0.2746710777282715, "learning_rate": 4.038059222874413e-05, "loss": 1.9388, "step": 310700 }, { "epoch": 1.182638947039882, "grad_norm": 0.16558623313903809, "learning_rate": 4.033526710583218e-05, "loss": 1.9475, "step": 310710 }, { "epoch": 1.1826770095080046, "grad_norm": 0.18636375665664673, "learning_rate": 4.028994645175166e-05, "loss": 1.9405, "step": 310720 }, { "epoch": 1.1827150719761272, "grad_norm": 0.1795215904712677, "learning_rate": 4.0244630265181106e-05, "loss": 1.9454, "step": 310730 }, { "epoch": 1.18275313444425, "grad_norm": 0.17834648489952087, "learning_rate": 4.019931854479958e-05, "loss": 1.9597, "step": 310740 }, { "epoch": 1.1827911969123726, "grad_norm": 0.20142875611782074, "learning_rate": 4.015401128928686e-05, "loss": 1.9508, "step": 310750 }, { "epoch": 1.1828292593804952, "grad_norm": 0.320974737405777, "learning_rate": 4.010870849732323e-05, "loss": 1.947, "step": 310760 }, { "epoch": 1.1828673218486179, "grad_norm": 0.19955416023731232, "learning_rate": 4.006341016758991e-05, "loss": 1.9422, "step": 310770 }, { "epoch": 1.1829053843167405, "grad_norm": 0.2023458629846573, "learning_rate": 4.0018116298768395e-05, "loss": 1.9383, "step": 310780 }, { "epoch": 1.1829434467848634, "grad_norm": 0.1666051596403122, "learning_rate": 3.997282688954124e-05, "loss": 1.9527, "step": 310790 }, { "epoch": 1.182981509252986, "grad_norm": 0.19157986342906952, "learning_rate": 3.992754193859127e-05, "loss": 1.9395, "step": 310800 }, { "epoch": 1.1830195717211087, "grad_norm": 0.2026226967573166, "learning_rate": 3.988226144460228e-05, "loss": 1.9464, "step": 310810 }, { "epoch": 1.1830576341892314, "grad_norm": 0.2299056202173233, "learning_rate": 3.9836985406258466e-05, "loss": 1.9761, "step": 310820 }, { "epoch": 1.183095696657354, "grad_norm": 0.19825097918510437, "learning_rate": 3.979171382224478e-05, "loss": 1.9366, "step": 310830 }, { "epoch": 1.1831337591254767, "grad_norm": 0.20664182305335999, "learning_rate": 3.974644669124677e-05, "loss": 1.9402, "step": 310840 }, { "epoch": 1.1831718215935993, "grad_norm": 0.1697346717119217, "learning_rate": 3.9701184011950776e-05, "loss": 1.9308, "step": 310850 }, { "epoch": 1.183209884061722, "grad_norm": 0.20206105709075928, "learning_rate": 3.965592578304361e-05, "loss": 1.9618, "step": 310860 }, { "epoch": 1.1832479465298449, "grad_norm": 0.17904360592365265, "learning_rate": 3.9610672003212776e-05, "loss": 1.9319, "step": 310870 }, { "epoch": 1.1832860089979675, "grad_norm": 0.1792462319135666, "learning_rate": 3.956542267114649e-05, "loss": 1.958, "step": 310880 }, { "epoch": 1.1833240714660902, "grad_norm": 0.1927379071712494, "learning_rate": 3.9520177785533516e-05, "loss": 1.9403, "step": 310890 }, { "epoch": 1.1833621339342129, "grad_norm": 0.3455449342727661, "learning_rate": 3.947493734506335e-05, "loss": 1.9403, "step": 310900 }, { "epoch": 1.1834001964023355, "grad_norm": 0.2741151750087738, "learning_rate": 3.942970134842605e-05, "loss": 1.956, "step": 310910 }, { "epoch": 1.1834382588704582, "grad_norm": 0.1895100325345993, "learning_rate": 3.938446979431243e-05, "loss": 1.9479, "step": 310920 }, { "epoch": 1.1834763213385808, "grad_norm": 0.2061176300048828, "learning_rate": 3.933924268141387e-05, "loss": 1.9514, "step": 310930 }, { "epoch": 1.1835143838067035, "grad_norm": 0.21165236830711365, "learning_rate": 3.92940200084223e-05, "loss": 1.9597, "step": 310940 }, { "epoch": 1.1835524462748261, "grad_norm": 0.1790468394756317, "learning_rate": 3.924880177403051e-05, "loss": 1.9593, "step": 310950 }, { "epoch": 1.183590508742949, "grad_norm": 0.17601050436496735, "learning_rate": 3.920358797693169e-05, "loss": 1.9462, "step": 310960 }, { "epoch": 1.1836285712110717, "grad_norm": 0.17719541490077972, "learning_rate": 3.915837861581989e-05, "loss": 1.9449, "step": 310970 }, { "epoch": 1.1836666336791943, "grad_norm": 0.17760923504829407, "learning_rate": 3.911317368938972e-05, "loss": 1.9537, "step": 310980 }, { "epoch": 1.183704696147317, "grad_norm": 0.22732368111610413, "learning_rate": 3.9067973196336314e-05, "loss": 1.956, "step": 310990 }, { "epoch": 1.1837427586154396, "grad_norm": 0.23127682507038116, "learning_rate": 3.902277713535563e-05, "loss": 1.9452, "step": 311000 }, { "epoch": 1.1837808210835623, "grad_norm": 0.21646471321582794, "learning_rate": 3.897758550514419e-05, "loss": 1.9532, "step": 311010 }, { "epoch": 1.183818883551685, "grad_norm": 0.1624293029308319, "learning_rate": 3.8932398304399045e-05, "loss": 1.9427, "step": 311020 }, { "epoch": 1.1838569460198076, "grad_norm": 0.1677110195159912, "learning_rate": 3.888721553181806e-05, "loss": 1.9521, "step": 311030 }, { "epoch": 1.1838950084879305, "grad_norm": 0.17499026656150818, "learning_rate": 3.884203718609969e-05, "loss": 1.9535, "step": 311040 }, { "epoch": 1.1839330709560532, "grad_norm": 0.2630564272403717, "learning_rate": 3.879686326594295e-05, "loss": 1.9484, "step": 311050 }, { "epoch": 1.1839711334241758, "grad_norm": 0.18801864981651306, "learning_rate": 3.875169377004756e-05, "loss": 1.9652, "step": 311060 }, { "epoch": 1.1840091958922985, "grad_norm": 0.16058778762817383, "learning_rate": 3.870652869711383e-05, "loss": 1.956, "step": 311070 }, { "epoch": 1.1840472583604211, "grad_norm": 0.2152181714773178, "learning_rate": 3.8661368045842746e-05, "loss": 1.955, "step": 311080 }, { "epoch": 1.1840853208285438, "grad_norm": 0.20467229187488556, "learning_rate": 3.861621181493602e-05, "loss": 1.9561, "step": 311090 }, { "epoch": 1.1841233832966664, "grad_norm": 0.24097484350204468, "learning_rate": 3.8571060003095734e-05, "loss": 1.9561, "step": 311100 }, { "epoch": 1.184161445764789, "grad_norm": 0.19084009528160095, "learning_rate": 3.8525912609024884e-05, "loss": 1.9436, "step": 311110 }, { "epoch": 1.1841995082329118, "grad_norm": 0.18321532011032104, "learning_rate": 3.848076963142694e-05, "loss": 1.9416, "step": 311120 }, { "epoch": 1.1842375707010344, "grad_norm": 0.20860257744789124, "learning_rate": 3.843563106900611e-05, "loss": 1.9656, "step": 311130 }, { "epoch": 1.1842756331691573, "grad_norm": 0.16122521460056305, "learning_rate": 3.839049692046703e-05, "loss": 1.9442, "step": 311140 }, { "epoch": 1.18431369563728, "grad_norm": 0.1690884232521057, "learning_rate": 3.834536718451531e-05, "loss": 1.9653, "step": 311150 }, { "epoch": 1.1843517581054026, "grad_norm": 0.17913898825645447, "learning_rate": 3.8300241859856865e-05, "loss": 1.9435, "step": 311160 }, { "epoch": 1.1843898205735253, "grad_norm": 0.22345957159996033, "learning_rate": 3.825512094519845e-05, "loss": 1.9274, "step": 311170 }, { "epoch": 1.184427883041648, "grad_norm": 0.1835993528366089, "learning_rate": 3.8210004439247325e-05, "loss": 1.9451, "step": 311180 }, { "epoch": 1.1844659455097706, "grad_norm": 0.2462897002696991, "learning_rate": 3.8164892340711464e-05, "loss": 1.9447, "step": 311190 }, { "epoch": 1.1845040079778932, "grad_norm": 0.2044731229543686, "learning_rate": 3.811978464829935e-05, "loss": 1.9594, "step": 311200 }, { "epoch": 1.1845420704460161, "grad_norm": 0.19596067070960999, "learning_rate": 3.8074681360720396e-05, "loss": 1.9486, "step": 311210 }, { "epoch": 1.1845801329141388, "grad_norm": 0.16007938981056213, "learning_rate": 3.8029582476684254e-05, "loss": 1.9511, "step": 311220 }, { "epoch": 1.1846181953822614, "grad_norm": 0.17175820469856262, "learning_rate": 3.798448799490151e-05, "loss": 1.945, "step": 311230 }, { "epoch": 1.184656257850384, "grad_norm": 0.1731996387243271, "learning_rate": 3.793939791408313e-05, "loss": 1.9533, "step": 311240 }, { "epoch": 1.1846943203185067, "grad_norm": 0.1798241287469864, "learning_rate": 3.7894312232940986e-05, "loss": 1.9383, "step": 311250 }, { "epoch": 1.1847323827866294, "grad_norm": 0.22271887958049774, "learning_rate": 3.7849230950187275e-05, "loss": 1.9644, "step": 311260 }, { "epoch": 1.184770445254752, "grad_norm": 0.2261335551738739, "learning_rate": 3.780415406453508e-05, "loss": 1.9441, "step": 311270 }, { "epoch": 1.1848085077228747, "grad_norm": 0.20029690861701965, "learning_rate": 3.775908157469804e-05, "loss": 1.9342, "step": 311280 }, { "epoch": 1.1848465701909974, "grad_norm": 0.23057448863983154, "learning_rate": 3.77140134793903e-05, "loss": 1.9427, "step": 311290 }, { "epoch": 1.18488463265912, "grad_norm": 0.1756334900856018, "learning_rate": 3.7668949777326766e-05, "loss": 1.932, "step": 311300 }, { "epoch": 1.184922695127243, "grad_norm": 0.18955238163471222, "learning_rate": 3.762389046722292e-05, "loss": 1.9363, "step": 311310 }, { "epoch": 1.1849607575953656, "grad_norm": 0.18324987590312958, "learning_rate": 3.75788355477949e-05, "loss": 1.9385, "step": 311320 }, { "epoch": 1.1849988200634882, "grad_norm": 0.3313455283641815, "learning_rate": 3.753378501775939e-05, "loss": 1.9337, "step": 311330 }, { "epoch": 1.1850368825316109, "grad_norm": 0.16478504240512848, "learning_rate": 3.748873887583376e-05, "loss": 1.939, "step": 311340 }, { "epoch": 1.1850749449997335, "grad_norm": 0.19854886829853058, "learning_rate": 3.7443697120736084e-05, "loss": 1.9477, "step": 311350 }, { "epoch": 1.1851130074678562, "grad_norm": 0.1801048219203949, "learning_rate": 3.7398659751184894e-05, "loss": 1.9489, "step": 311360 }, { "epoch": 1.1851510699359789, "grad_norm": 0.1807958483695984, "learning_rate": 3.735362676589948e-05, "loss": 1.9418, "step": 311370 }, { "epoch": 1.1851891324041017, "grad_norm": 0.1839410960674286, "learning_rate": 3.730859816359961e-05, "loss": 1.9475, "step": 311380 }, { "epoch": 1.1852271948722244, "grad_norm": 0.1806168407201767, "learning_rate": 3.726357394300583e-05, "loss": 1.955, "step": 311390 }, { "epoch": 1.185265257340347, "grad_norm": 0.25393858551979065, "learning_rate": 3.721855410283931e-05, "loss": 1.9487, "step": 311400 }, { "epoch": 1.1853033198084697, "grad_norm": 0.17133808135986328, "learning_rate": 3.7173538641821655e-05, "loss": 1.9371, "step": 311410 }, { "epoch": 1.1853413822765924, "grad_norm": 0.19476234912872314, "learning_rate": 3.7128527558675294e-05, "loss": 1.9417, "step": 311420 }, { "epoch": 1.185379444744715, "grad_norm": 0.17833714187145233, "learning_rate": 3.708352085212319e-05, "loss": 1.9461, "step": 311430 }, { "epoch": 1.1854175072128377, "grad_norm": 0.17181991040706635, "learning_rate": 3.703851852088891e-05, "loss": 1.955, "step": 311440 }, { "epoch": 1.1854555696809603, "grad_norm": 0.19017015397548676, "learning_rate": 3.699352056369665e-05, "loss": 1.9537, "step": 311450 }, { "epoch": 1.185493632149083, "grad_norm": 0.1896897703409195, "learning_rate": 3.6948526979271255e-05, "loss": 1.956, "step": 311460 }, { "epoch": 1.1855316946172056, "grad_norm": 0.283040851354599, "learning_rate": 3.69035377663382e-05, "loss": 1.9381, "step": 311470 }, { "epoch": 1.1855697570853285, "grad_norm": 0.17958037555217743, "learning_rate": 3.685855292362356e-05, "loss": 1.9513, "step": 311480 }, { "epoch": 1.1856078195534512, "grad_norm": 0.23295582830905914, "learning_rate": 3.681357244985401e-05, "loss": 1.9404, "step": 311490 }, { "epoch": 1.1856458820215738, "grad_norm": 0.17441517114639282, "learning_rate": 3.676859634375679e-05, "loss": 1.9539, "step": 311500 }, { "epoch": 1.1856839444896965, "grad_norm": 0.26736241579055786, "learning_rate": 3.672362460405992e-05, "loss": 1.9524, "step": 311510 }, { "epoch": 1.1857220069578192, "grad_norm": 0.1684333086013794, "learning_rate": 3.6678657229491875e-05, "loss": 1.9404, "step": 311520 }, { "epoch": 1.1857600694259418, "grad_norm": 0.22045107185840607, "learning_rate": 3.663369421878188e-05, "loss": 1.9431, "step": 311530 }, { "epoch": 1.1857981318940645, "grad_norm": 0.16330958902835846, "learning_rate": 3.658873557065967e-05, "loss": 1.9505, "step": 311540 }, { "epoch": 1.1858361943621873, "grad_norm": 0.16610394418239594, "learning_rate": 3.65437812838556e-05, "loss": 1.9432, "step": 311550 }, { "epoch": 1.18587425683031, "grad_norm": 0.17661607265472412, "learning_rate": 3.649883135710075e-05, "loss": 1.9375, "step": 311560 }, { "epoch": 1.1859123192984327, "grad_norm": 0.18251265585422516, "learning_rate": 3.645388578912667e-05, "loss": 1.9434, "step": 311570 }, { "epoch": 1.1859503817665553, "grad_norm": 0.21383360028266907, "learning_rate": 3.640894457866567e-05, "loss": 1.9609, "step": 311580 }, { "epoch": 1.185988444234678, "grad_norm": 0.21023496985435486, "learning_rate": 3.636400772445059e-05, "loss": 1.9483, "step": 311590 }, { "epoch": 1.1860265067028006, "grad_norm": 0.18044514954090118, "learning_rate": 3.6319075225214827e-05, "loss": 1.934, "step": 311600 }, { "epoch": 1.1860645691709233, "grad_norm": 0.18245013058185577, "learning_rate": 3.627414707969251e-05, "loss": 1.938, "step": 311610 }, { "epoch": 1.186102631639046, "grad_norm": 0.18348990380764008, "learning_rate": 3.6229223286618316e-05, "loss": 1.9436, "step": 311620 }, { "epoch": 1.1861406941071686, "grad_norm": 0.18171392381191254, "learning_rate": 3.618430384472754e-05, "loss": 1.9557, "step": 311630 }, { "epoch": 1.1861787565752913, "grad_norm": 0.16605229675769806, "learning_rate": 3.613938875275618e-05, "loss": 1.9441, "step": 311640 }, { "epoch": 1.1862168190434141, "grad_norm": 0.19410014152526855, "learning_rate": 3.6094478009440654e-05, "loss": 1.96, "step": 311650 }, { "epoch": 1.1862548815115368, "grad_norm": 0.18071909248828888, "learning_rate": 3.604957161351818e-05, "loss": 1.9384, "step": 311660 }, { "epoch": 1.1862929439796595, "grad_norm": 0.28662827610969543, "learning_rate": 3.60046695637265e-05, "loss": 1.94, "step": 311670 }, { "epoch": 1.186331006447782, "grad_norm": 0.19265218079090118, "learning_rate": 3.5959771858803895e-05, "loss": 1.9385, "step": 311680 }, { "epoch": 1.1863690689159048, "grad_norm": 0.22032389044761658, "learning_rate": 3.591487849748942e-05, "loss": 1.9678, "step": 311690 }, { "epoch": 1.1864071313840274, "grad_norm": 0.184286430478096, "learning_rate": 3.5869989478522655e-05, "loss": 1.941, "step": 311700 }, { "epoch": 1.18644519385215, "grad_norm": 0.18217714130878448, "learning_rate": 3.582510480064377e-05, "loss": 1.9475, "step": 311710 }, { "epoch": 1.1864832563202727, "grad_norm": 0.18673953413963318, "learning_rate": 3.57802244625936e-05, "loss": 1.9584, "step": 311720 }, { "epoch": 1.1865213187883956, "grad_norm": 0.23944932222366333, "learning_rate": 3.573534846311349e-05, "loss": 1.956, "step": 311730 }, { "epoch": 1.1865593812565183, "grad_norm": 0.18649595975875854, "learning_rate": 3.5690476800945506e-05, "loss": 1.9469, "step": 311740 }, { "epoch": 1.186597443724641, "grad_norm": 0.20941564440727234, "learning_rate": 3.564560947483225e-05, "loss": 1.943, "step": 311750 }, { "epoch": 1.1866355061927636, "grad_norm": 0.21465104818344116, "learning_rate": 3.5600746483516966e-05, "loss": 1.9346, "step": 311760 }, { "epoch": 1.1866735686608862, "grad_norm": 0.2321460098028183, "learning_rate": 3.555588782574354e-05, "loss": 1.9361, "step": 311770 }, { "epoch": 1.186711631129009, "grad_norm": 0.20696842670440674, "learning_rate": 3.5511033500256306e-05, "loss": 1.9359, "step": 311780 }, { "epoch": 1.1867496935971316, "grad_norm": 0.18398690223693848, "learning_rate": 3.546618350580044e-05, "loss": 1.9403, "step": 311790 }, { "epoch": 1.1867877560652542, "grad_norm": 0.2288619577884674, "learning_rate": 3.542133784112156e-05, "loss": 1.9522, "step": 311800 }, { "epoch": 1.1868258185333769, "grad_norm": 0.2083914875984192, "learning_rate": 3.5376496504965825e-05, "loss": 1.9457, "step": 311810 }, { "epoch": 1.1868638810014998, "grad_norm": 0.252368301153183, "learning_rate": 3.53316594960803e-05, "loss": 1.9451, "step": 311820 }, { "epoch": 1.1869019434696224, "grad_norm": 0.18113096058368683, "learning_rate": 3.5286826813212324e-05, "loss": 1.9409, "step": 311830 }, { "epoch": 1.186940005937745, "grad_norm": 0.16996903717517853, "learning_rate": 3.524199845511e-05, "loss": 1.9494, "step": 311840 }, { "epoch": 1.1869780684058677, "grad_norm": 0.16921067237854004, "learning_rate": 3.519717442052201e-05, "loss": 1.9507, "step": 311850 }, { "epoch": 1.1870161308739904, "grad_norm": 0.19410954415798187, "learning_rate": 3.5152354708197606e-05, "loss": 1.9522, "step": 311860 }, { "epoch": 1.187054193342113, "grad_norm": 0.17984320223331451, "learning_rate": 3.510753931688676e-05, "loss": 1.9501, "step": 311870 }, { "epoch": 1.1870922558102357, "grad_norm": 0.17801634967327118, "learning_rate": 3.5062728245339904e-05, "loss": 1.9544, "step": 311880 }, { "epoch": 1.1871303182783584, "grad_norm": 0.16607418656349182, "learning_rate": 3.5017921492308146e-05, "loss": 1.9503, "step": 311890 }, { "epoch": 1.1871683807464812, "grad_norm": 0.24475719034671783, "learning_rate": 3.4973119056543153e-05, "loss": 1.9596, "step": 311900 }, { "epoch": 1.187206443214604, "grad_norm": 0.23632270097732544, "learning_rate": 3.492832093679727e-05, "loss": 1.9537, "step": 311910 }, { "epoch": 1.1872445056827265, "grad_norm": 0.17317236959934235, "learning_rate": 3.488352713182336e-05, "loss": 1.9299, "step": 311920 }, { "epoch": 1.1872825681508492, "grad_norm": 0.20168110728263855, "learning_rate": 3.4838737640374876e-05, "loss": 1.955, "step": 311930 }, { "epoch": 1.1873206306189719, "grad_norm": 0.20480570197105408, "learning_rate": 3.479395246120598e-05, "loss": 1.9501, "step": 311940 }, { "epoch": 1.1873586930870945, "grad_norm": 0.17610032856464386, "learning_rate": 3.474917159307139e-05, "loss": 1.9372, "step": 311950 }, { "epoch": 1.1873967555552172, "grad_norm": 0.18562458455562592, "learning_rate": 3.470439503472633e-05, "loss": 1.9624, "step": 311960 }, { "epoch": 1.1874348180233398, "grad_norm": 0.18058809638023376, "learning_rate": 3.465962278492674e-05, "loss": 1.9325, "step": 311970 }, { "epoch": 1.1874728804914625, "grad_norm": 0.20649303495883942, "learning_rate": 3.46148548424291e-05, "loss": 1.9451, "step": 311980 }, { "epoch": 1.1875109429595851, "grad_norm": 0.19297026097774506, "learning_rate": 3.457009120599047e-05, "loss": 1.9423, "step": 311990 }, { "epoch": 1.187549005427708, "grad_norm": 0.21698220074176788, "learning_rate": 3.452533187436863e-05, "loss": 1.9408, "step": 312000 }, { "epoch": 1.1875870678958307, "grad_norm": 0.16666904091835022, "learning_rate": 3.4480576846321776e-05, "loss": 1.947, "step": 312010 }, { "epoch": 1.1876251303639533, "grad_norm": 0.17687274515628815, "learning_rate": 3.44358261206088e-05, "loss": 1.9673, "step": 312020 }, { "epoch": 1.187663192832076, "grad_norm": 0.16840630769729614, "learning_rate": 3.4391079695989205e-05, "loss": 1.9574, "step": 312030 }, { "epoch": 1.1877012553001987, "grad_norm": 0.18446031212806702, "learning_rate": 3.4346337571223076e-05, "loss": 1.9394, "step": 312040 }, { "epoch": 1.1877393177683213, "grad_norm": 0.16199266910552979, "learning_rate": 3.430159974507102e-05, "loss": 1.9382, "step": 312050 }, { "epoch": 1.187777380236444, "grad_norm": 0.1947520673274994, "learning_rate": 3.4256866216294424e-05, "loss": 1.9438, "step": 312060 }, { "epoch": 1.1878154427045668, "grad_norm": 0.17160986363887787, "learning_rate": 3.4212136983655054e-05, "loss": 1.9554, "step": 312070 }, { "epoch": 1.1878535051726895, "grad_norm": 0.165254607796669, "learning_rate": 3.416741204591539e-05, "loss": 1.947, "step": 312080 }, { "epoch": 1.1878915676408122, "grad_norm": 0.1807015836238861, "learning_rate": 3.4122691401838434e-05, "loss": 1.946, "step": 312090 }, { "epoch": 1.1879296301089348, "grad_norm": 0.18120074272155762, "learning_rate": 3.4077975050187894e-05, "loss": 1.9529, "step": 312100 }, { "epoch": 1.1879676925770575, "grad_norm": 0.16498395800590515, "learning_rate": 3.4033262989727974e-05, "loss": 1.9464, "step": 312110 }, { "epoch": 1.1880057550451801, "grad_norm": 0.16716605424880981, "learning_rate": 3.398855521922356e-05, "loss": 1.9524, "step": 312120 }, { "epoch": 1.1880438175133028, "grad_norm": 0.19660872220993042, "learning_rate": 3.394385173743997e-05, "loss": 1.9343, "step": 312130 }, { "epoch": 1.1880818799814254, "grad_norm": 0.17590086162090302, "learning_rate": 3.3899152543143294e-05, "loss": 1.9378, "step": 312140 }, { "epoch": 1.188119942449548, "grad_norm": 0.23901164531707764, "learning_rate": 3.385445763510014e-05, "loss": 1.9423, "step": 312150 }, { "epoch": 1.1881580049176708, "grad_norm": 0.18615446984767914, "learning_rate": 3.3809767012077666e-05, "loss": 1.9621, "step": 312160 }, { "epoch": 1.1881960673857936, "grad_norm": 0.22386965155601501, "learning_rate": 3.3765080672843615e-05, "loss": 1.9405, "step": 312170 }, { "epoch": 1.1882341298539163, "grad_norm": 0.16738446056842804, "learning_rate": 3.372039861616649e-05, "loss": 1.9445, "step": 312180 }, { "epoch": 1.188272192322039, "grad_norm": 0.17396967113018036, "learning_rate": 3.367572084081516e-05, "loss": 1.9474, "step": 312190 }, { "epoch": 1.1883102547901616, "grad_norm": 0.24288448691368103, "learning_rate": 3.3631047345559273e-05, "loss": 1.9487, "step": 312200 }, { "epoch": 1.1883483172582843, "grad_norm": 0.18360836803913116, "learning_rate": 3.358637812916887e-05, "loss": 1.932, "step": 312210 }, { "epoch": 1.188386379726407, "grad_norm": 0.24498017132282257, "learning_rate": 3.3541713190414714e-05, "loss": 1.9589, "step": 312220 }, { "epoch": 1.1884244421945296, "grad_norm": 0.25882506370544434, "learning_rate": 3.349705252806812e-05, "loss": 1.9496, "step": 312230 }, { "epoch": 1.1884625046626525, "grad_norm": 0.2343091368675232, "learning_rate": 3.345239614090112e-05, "loss": 1.929, "step": 312240 }, { "epoch": 1.1885005671307751, "grad_norm": 0.16979889571666718, "learning_rate": 3.340774402768604e-05, "loss": 1.9386, "step": 312250 }, { "epoch": 1.1885386295988978, "grad_norm": 0.2081635296344757, "learning_rate": 3.336309618719607e-05, "loss": 1.9534, "step": 312260 }, { "epoch": 1.1885766920670204, "grad_norm": 0.1663932502269745, "learning_rate": 3.331845261820493e-05, "loss": 1.9548, "step": 312270 }, { "epoch": 1.188614754535143, "grad_norm": 0.2160760760307312, "learning_rate": 3.3273813319486755e-05, "loss": 1.9401, "step": 312280 }, { "epoch": 1.1886528170032657, "grad_norm": 0.1771797239780426, "learning_rate": 3.322917828981642e-05, "loss": 1.9519, "step": 312290 }, { "epoch": 1.1886908794713884, "grad_norm": 0.2006005495786667, "learning_rate": 3.31845475279694e-05, "loss": 1.9413, "step": 312300 }, { "epoch": 1.188728941939511, "grad_norm": 0.18026748299598694, "learning_rate": 3.313992103272168e-05, "loss": 1.9469, "step": 312310 }, { "epoch": 1.1887670044076337, "grad_norm": 0.1619405448436737, "learning_rate": 3.3095298802849895e-05, "loss": 1.9395, "step": 312320 }, { "epoch": 1.1888050668757564, "grad_norm": 0.1686805933713913, "learning_rate": 3.305068083713125e-05, "loss": 1.9428, "step": 312330 }, { "epoch": 1.1888431293438793, "grad_norm": 0.24637804925441742, "learning_rate": 3.3006067134343456e-05, "loss": 1.9311, "step": 312340 }, { "epoch": 1.188881191812002, "grad_norm": 0.1797938495874405, "learning_rate": 3.2961457693264865e-05, "loss": 1.9514, "step": 312350 }, { "epoch": 1.1889192542801246, "grad_norm": 0.18646450340747833, "learning_rate": 3.291685251267451e-05, "loss": 1.9462, "step": 312360 }, { "epoch": 1.1889573167482472, "grad_norm": 0.1743801236152649, "learning_rate": 3.287225159135182e-05, "loss": 1.9308, "step": 312370 }, { "epoch": 1.1889953792163699, "grad_norm": 0.17725461721420288, "learning_rate": 3.2827654928076875e-05, "loss": 1.9493, "step": 312380 }, { "epoch": 1.1890334416844925, "grad_norm": 0.17935842275619507, "learning_rate": 3.278306252163049e-05, "loss": 1.9426, "step": 312390 }, { "epoch": 1.1890715041526152, "grad_norm": 0.16997292637825012, "learning_rate": 3.27384743707938e-05, "loss": 1.9395, "step": 312400 }, { "epoch": 1.189109566620738, "grad_norm": 0.17147625982761383, "learning_rate": 3.269389047434868e-05, "loss": 1.9554, "step": 312410 }, { "epoch": 1.1891476290888607, "grad_norm": 0.1671450138092041, "learning_rate": 3.264931083107764e-05, "loss": 1.9463, "step": 312420 }, { "epoch": 1.1891856915569834, "grad_norm": 0.18187834322452545, "learning_rate": 3.260473543976367e-05, "loss": 1.9425, "step": 312430 }, { "epoch": 1.189223754025106, "grad_norm": 0.17444400489330292, "learning_rate": 3.256016429919029e-05, "loss": 1.965, "step": 312440 }, { "epoch": 1.1892618164932287, "grad_norm": 0.20525328814983368, "learning_rate": 3.25155974081417e-05, "loss": 1.9463, "step": 312450 }, { "epoch": 1.1892998789613514, "grad_norm": 0.21237628161907196, "learning_rate": 3.247103476540264e-05, "loss": 1.9546, "step": 312460 }, { "epoch": 1.189337941429474, "grad_norm": 0.2117680311203003, "learning_rate": 3.2426476369758484e-05, "loss": 1.9507, "step": 312470 }, { "epoch": 1.1893760038975967, "grad_norm": 0.1771915704011917, "learning_rate": 3.238192221999514e-05, "loss": 1.926, "step": 312480 }, { "epoch": 1.1894140663657193, "grad_norm": 0.21037092804908752, "learning_rate": 3.2337372314899084e-05, "loss": 1.9375, "step": 312490 }, { "epoch": 1.189452128833842, "grad_norm": 0.23058432340621948, "learning_rate": 3.2292826653257336e-05, "loss": 1.9567, "step": 312500 }, { "epoch": 1.1894901913019649, "grad_norm": 0.23512980341911316, "learning_rate": 3.224828523385759e-05, "loss": 1.9493, "step": 312510 }, { "epoch": 1.1895282537700875, "grad_norm": 0.26605743169784546, "learning_rate": 3.220374805548803e-05, "loss": 1.9509, "step": 312520 }, { "epoch": 1.1895663162382102, "grad_norm": 0.1657746434211731, "learning_rate": 3.215921511693748e-05, "loss": 1.9511, "step": 312530 }, { "epoch": 1.1896043787063328, "grad_norm": 0.20481301844120026, "learning_rate": 3.2114686416995276e-05, "loss": 1.9573, "step": 312540 }, { "epoch": 1.1896424411744555, "grad_norm": 0.16254596412181854, "learning_rate": 3.207016195445139e-05, "loss": 1.9208, "step": 312550 }, { "epoch": 1.1896805036425782, "grad_norm": 0.1650143563747406, "learning_rate": 3.202564172809641e-05, "loss": 1.9371, "step": 312560 }, { "epoch": 1.1897185661107008, "grad_norm": 0.18093803524971008, "learning_rate": 3.19811257367213e-05, "loss": 1.9401, "step": 312570 }, { "epoch": 1.1897566285788235, "grad_norm": 0.16682739555835724, "learning_rate": 3.1936613979117846e-05, "loss": 1.9504, "step": 312580 }, { "epoch": 1.1897946910469464, "grad_norm": 0.19406753778457642, "learning_rate": 3.189210645407825e-05, "loss": 1.9407, "step": 312590 }, { "epoch": 1.189832753515069, "grad_norm": 0.17175889015197754, "learning_rate": 3.184760316039536e-05, "loss": 1.9384, "step": 312600 }, { "epoch": 1.1898708159831917, "grad_norm": 0.18349462747573853, "learning_rate": 3.1803104096862546e-05, "loss": 1.931, "step": 312610 }, { "epoch": 1.1899088784513143, "grad_norm": 0.18410073220729828, "learning_rate": 3.175860926227381e-05, "loss": 1.9407, "step": 312620 }, { "epoch": 1.189946940919437, "grad_norm": 0.21190586686134338, "learning_rate": 3.1714118655423616e-05, "loss": 1.9425, "step": 312630 }, { "epoch": 1.1899850033875596, "grad_norm": 0.1733289361000061, "learning_rate": 3.166963227510716e-05, "loss": 1.9366, "step": 312640 }, { "epoch": 1.1900230658556823, "grad_norm": 0.18026097118854523, "learning_rate": 3.162515012012013e-05, "loss": 1.9445, "step": 312650 }, { "epoch": 1.190061128323805, "grad_norm": 0.17617754638195038, "learning_rate": 3.1580672189258756e-05, "loss": 1.9361, "step": 312660 }, { "epoch": 1.1900991907919276, "grad_norm": 0.20476016402244568, "learning_rate": 3.153619848131989e-05, "loss": 1.9575, "step": 312670 }, { "epoch": 1.1901372532600505, "grad_norm": 0.19444191455841064, "learning_rate": 3.1491728995100885e-05, "loss": 1.9584, "step": 312680 }, { "epoch": 1.1901753157281731, "grad_norm": 0.18735110759735107, "learning_rate": 3.144726372939977e-05, "loss": 1.9506, "step": 312690 }, { "epoch": 1.1902133781962958, "grad_norm": 0.2517315447330475, "learning_rate": 3.140280268301504e-05, "loss": 1.9419, "step": 312700 }, { "epoch": 1.1902514406644185, "grad_norm": 0.1806691586971283, "learning_rate": 3.13583458547459e-05, "loss": 1.9516, "step": 312710 }, { "epoch": 1.1902895031325411, "grad_norm": 0.16991178691387177, "learning_rate": 3.131389324339185e-05, "loss": 1.9428, "step": 312720 }, { "epoch": 1.1903275656006638, "grad_norm": 0.20859868824481964, "learning_rate": 3.126944484775335e-05, "loss": 1.9492, "step": 312730 }, { "epoch": 1.1903656280687864, "grad_norm": 0.17317542433738708, "learning_rate": 3.1225000666631084e-05, "loss": 1.94, "step": 312740 }, { "epoch": 1.190403690536909, "grad_norm": 0.16412481665611267, "learning_rate": 3.118056069882652e-05, "loss": 1.9554, "step": 312750 }, { "epoch": 1.190441753005032, "grad_norm": 0.18478161096572876, "learning_rate": 3.113612494314161e-05, "loss": 1.9357, "step": 312760 }, { "epoch": 1.1904798154731546, "grad_norm": 0.18147684633731842, "learning_rate": 3.1091693398378806e-05, "loss": 1.9553, "step": 312770 }, { "epoch": 1.1905178779412773, "grad_norm": 0.15987755358219147, "learning_rate": 3.104726606334124e-05, "loss": 1.9422, "step": 312780 }, { "epoch": 1.1905559404094, "grad_norm": 0.2043231874704361, "learning_rate": 3.1002842936832584e-05, "loss": 1.9432, "step": 312790 }, { "epoch": 1.1905940028775226, "grad_norm": 0.22990889847278595, "learning_rate": 3.095842401765703e-05, "loss": 1.9499, "step": 312800 }, { "epoch": 1.1906320653456453, "grad_norm": 0.16571252048015594, "learning_rate": 3.091400930461946e-05, "loss": 1.9481, "step": 312810 }, { "epoch": 1.190670127813768, "grad_norm": 0.20370744168758392, "learning_rate": 3.086959879652512e-05, "loss": 1.9402, "step": 312820 }, { "epoch": 1.1907081902818906, "grad_norm": 0.21053428947925568, "learning_rate": 3.082519249217997e-05, "loss": 1.9536, "step": 312830 }, { "epoch": 1.1907462527500132, "grad_norm": 0.21404720842838287, "learning_rate": 3.07807903903905e-05, "loss": 1.9445, "step": 312840 }, { "epoch": 1.1907843152181359, "grad_norm": 0.16291579604148865, "learning_rate": 3.073639248996374e-05, "loss": 1.9357, "step": 312850 }, { "epoch": 1.1908223776862588, "grad_norm": 0.1799086183309555, "learning_rate": 3.069199878970741e-05, "loss": 1.957, "step": 312860 }, { "epoch": 1.1908604401543814, "grad_norm": 0.16775865852832794, "learning_rate": 3.064760928842958e-05, "loss": 1.947, "step": 312870 }, { "epoch": 1.190898502622504, "grad_norm": 0.18161626160144806, "learning_rate": 3.0603223984939034e-05, "loss": 1.9356, "step": 312880 }, { "epoch": 1.1909365650906267, "grad_norm": 0.20884421467781067, "learning_rate": 3.055884287804506e-05, "loss": 1.9548, "step": 312890 }, { "epoch": 1.1909746275587494, "grad_norm": 0.21153324842453003, "learning_rate": 3.0514465966557548e-05, "loss": 1.9429, "step": 312900 }, { "epoch": 1.191012690026872, "grad_norm": 0.18954439461231232, "learning_rate": 3.04700932492869e-05, "loss": 1.9546, "step": 312910 }, { "epoch": 1.1910507524949947, "grad_norm": 0.18886590003967285, "learning_rate": 3.042572472504418e-05, "loss": 1.935, "step": 312920 }, { "epoch": 1.1910888149631176, "grad_norm": 0.18550170958042145, "learning_rate": 3.038136039264089e-05, "loss": 1.9363, "step": 312930 }, { "epoch": 1.1911268774312402, "grad_norm": 0.18696042895317078, "learning_rate": 3.0337000250889148e-05, "loss": 1.9352, "step": 312940 }, { "epoch": 1.191164939899363, "grad_norm": 0.2550421953201294, "learning_rate": 3.0292644298601624e-05, "loss": 1.9396, "step": 312950 }, { "epoch": 1.1912030023674856, "grad_norm": 0.23013365268707275, "learning_rate": 3.0248292534591547e-05, "loss": 1.9346, "step": 312960 }, { "epoch": 1.1912410648356082, "grad_norm": 0.18774259090423584, "learning_rate": 3.0203944957672812e-05, "loss": 1.9354, "step": 312970 }, { "epoch": 1.1912791273037309, "grad_norm": 0.20159746706485748, "learning_rate": 3.0159601566659646e-05, "loss": 1.9391, "step": 312980 }, { "epoch": 1.1913171897718535, "grad_norm": 0.21470539271831512, "learning_rate": 3.01152623603671e-05, "loss": 1.939, "step": 312990 }, { "epoch": 1.1913552522399762, "grad_norm": 0.1757175177335739, "learning_rate": 3.0070927337610522e-05, "loss": 1.9355, "step": 313000 }, { "epoch": 1.1913933147080988, "grad_norm": 0.23895221948623657, "learning_rate": 3.0026596497206016e-05, "loss": 1.9392, "step": 313010 }, { "epoch": 1.1914313771762215, "grad_norm": 0.16913849115371704, "learning_rate": 2.99822698379702e-05, "loss": 1.9458, "step": 313020 }, { "epoch": 1.1914694396443444, "grad_norm": 0.1750878542661667, "learning_rate": 2.9937947358720187e-05, "loss": 1.9409, "step": 313030 }, { "epoch": 1.191507502112467, "grad_norm": 0.1695113629102707, "learning_rate": 2.989362905827364e-05, "loss": 1.9365, "step": 313040 }, { "epoch": 1.1915455645805897, "grad_norm": 0.20208555459976196, "learning_rate": 2.9849314935448957e-05, "loss": 1.9481, "step": 313050 }, { "epoch": 1.1915836270487123, "grad_norm": 0.1992395520210266, "learning_rate": 2.9805004989064856e-05, "loss": 1.9431, "step": 313060 }, { "epoch": 1.191621689516835, "grad_norm": 0.18389198184013367, "learning_rate": 2.976069921794078e-05, "loss": 1.9201, "step": 313070 }, { "epoch": 1.1916597519849577, "grad_norm": 0.21777984499931335, "learning_rate": 2.9716397620896564e-05, "loss": 1.9528, "step": 313080 }, { "epoch": 1.1916978144530803, "grad_norm": 0.17072694003582, "learning_rate": 2.9672100196752814e-05, "loss": 1.929, "step": 313090 }, { "epoch": 1.1917358769212032, "grad_norm": 0.20009645819664001, "learning_rate": 2.962780694433054e-05, "loss": 1.962, "step": 313100 }, { "epoch": 1.1917739393893259, "grad_norm": 0.2427327036857605, "learning_rate": 2.9583517862451338e-05, "loss": 1.9386, "step": 313110 }, { "epoch": 1.1918120018574485, "grad_norm": 0.2267937809228897, "learning_rate": 2.9539232949937324e-05, "loss": 1.9516, "step": 313120 }, { "epoch": 1.1918500643255712, "grad_norm": 0.2037351280450821, "learning_rate": 2.9494952205611326e-05, "loss": 1.9547, "step": 313130 }, { "epoch": 1.1918881267936938, "grad_norm": 0.18010416626930237, "learning_rate": 2.9450675628296454e-05, "loss": 1.9486, "step": 313140 }, { "epoch": 1.1919261892618165, "grad_norm": 0.1703222244977951, "learning_rate": 2.9406403216816645e-05, "loss": 1.9436, "step": 313150 }, { "epoch": 1.1919642517299391, "grad_norm": 0.17430193722248077, "learning_rate": 2.936213496999629e-05, "loss": 1.937, "step": 313160 }, { "epoch": 1.1920023141980618, "grad_norm": 0.21456824243068695, "learning_rate": 2.9317870886660215e-05, "loss": 1.9408, "step": 313170 }, { "epoch": 1.1920403766661845, "grad_norm": 0.18208642303943634, "learning_rate": 2.9273610965633966e-05, "loss": 1.9482, "step": 313180 }, { "epoch": 1.192078439134307, "grad_norm": 0.23920206725597382, "learning_rate": 2.9229355205743546e-05, "loss": 1.9449, "step": 313190 }, { "epoch": 1.19211650160243, "grad_norm": 0.16931700706481934, "learning_rate": 2.91851036058155e-05, "loss": 1.9462, "step": 313200 }, { "epoch": 1.1921545640705526, "grad_norm": 0.18483975529670715, "learning_rate": 2.9140856164677043e-05, "loss": 1.9318, "step": 313210 }, { "epoch": 1.1921926265386753, "grad_norm": 0.21092703938484192, "learning_rate": 2.9096612881155837e-05, "loss": 1.9254, "step": 313220 }, { "epoch": 1.192230689006798, "grad_norm": 0.21394091844558716, "learning_rate": 2.9052373754080096e-05, "loss": 1.9402, "step": 313230 }, { "epoch": 1.1922687514749206, "grad_norm": 0.20954100787639618, "learning_rate": 2.900813878227859e-05, "loss": 1.9382, "step": 313240 }, { "epoch": 1.1923068139430433, "grad_norm": 0.17000707983970642, "learning_rate": 2.8963907964580705e-05, "loss": 1.9459, "step": 313250 }, { "epoch": 1.192344876411166, "grad_norm": 0.24068133533000946, "learning_rate": 2.8919681299816258e-05, "loss": 1.9385, "step": 313260 }, { "epoch": 1.1923829388792888, "grad_norm": 0.20357105135917664, "learning_rate": 2.8875458786815745e-05, "loss": 1.936, "step": 313270 }, { "epoch": 1.1924210013474115, "grad_norm": 0.16180942952632904, "learning_rate": 2.8831240424410153e-05, "loss": 1.9408, "step": 313280 }, { "epoch": 1.1924590638155341, "grad_norm": 0.19413597881793976, "learning_rate": 2.878702621143092e-05, "loss": 1.9403, "step": 313290 }, { "epoch": 1.1924971262836568, "grad_norm": 0.17977626621723175, "learning_rate": 2.874281614671026e-05, "loss": 1.9362, "step": 313300 }, { "epoch": 1.1925351887517794, "grad_norm": 0.18004660308361053, "learning_rate": 2.8698610229080712e-05, "loss": 1.9421, "step": 313310 }, { "epoch": 1.192573251219902, "grad_norm": 0.1666060835123062, "learning_rate": 2.8654408457375436e-05, "loss": 1.951, "step": 313320 }, { "epoch": 1.1926113136880248, "grad_norm": 0.1712903380393982, "learning_rate": 2.8610210830428194e-05, "loss": 1.9472, "step": 313330 }, { "epoch": 1.1926493761561474, "grad_norm": 0.1879504919052124, "learning_rate": 2.8566017347073316e-05, "loss": 1.9448, "step": 313340 }, { "epoch": 1.19268743862427, "grad_norm": 0.20638255774974823, "learning_rate": 2.85218280061455e-05, "loss": 1.9425, "step": 313350 }, { "epoch": 1.1927255010923927, "grad_norm": 0.20174163579940796, "learning_rate": 2.847764280648013e-05, "loss": 1.9428, "step": 313360 }, { "epoch": 1.1927635635605156, "grad_norm": 0.16518354415893555, "learning_rate": 2.843346174691319e-05, "loss": 1.9474, "step": 313370 }, { "epoch": 1.1928016260286383, "grad_norm": 0.174319326877594, "learning_rate": 2.838928482628106e-05, "loss": 1.9429, "step": 313380 }, { "epoch": 1.192839688496761, "grad_norm": 0.177846297621727, "learning_rate": 2.834511204342072e-05, "loss": 1.9572, "step": 313390 }, { "epoch": 1.1928777509648836, "grad_norm": 0.1643485277891159, "learning_rate": 2.8300943397169822e-05, "loss": 1.9542, "step": 313400 }, { "epoch": 1.1929158134330062, "grad_norm": 0.2616300880908966, "learning_rate": 2.8256778886366352e-05, "loss": 1.9307, "step": 313410 }, { "epoch": 1.192953875901129, "grad_norm": 0.1790495663881302, "learning_rate": 2.8212618509848962e-05, "loss": 1.9297, "step": 313420 }, { "epoch": 1.1929919383692515, "grad_norm": 0.2060040533542633, "learning_rate": 2.8168462266456808e-05, "loss": 1.9326, "step": 313430 }, { "epoch": 1.1930300008373742, "grad_norm": 0.1828863024711609, "learning_rate": 2.8124310155029644e-05, "loss": 1.9327, "step": 313440 }, { "epoch": 1.193068063305497, "grad_norm": 0.20013552904129028, "learning_rate": 2.8080162174407674e-05, "loss": 1.9428, "step": 313450 }, { "epoch": 1.1931061257736197, "grad_norm": 0.18757100403308868, "learning_rate": 2.803601832343178e-05, "loss": 1.9338, "step": 313460 }, { "epoch": 1.1931441882417424, "grad_norm": 0.2295101135969162, "learning_rate": 2.7991878600943264e-05, "loss": 1.9467, "step": 313470 }, { "epoch": 1.193182250709865, "grad_norm": 0.20257122814655304, "learning_rate": 2.794774300578401e-05, "loss": 1.9423, "step": 313480 }, { "epoch": 1.1932203131779877, "grad_norm": 0.16395969688892365, "learning_rate": 2.7903611536796436e-05, "loss": 1.9381, "step": 313490 }, { "epoch": 1.1932583756461104, "grad_norm": 0.19475962221622467, "learning_rate": 2.7859484192823526e-05, "loss": 1.9327, "step": 313500 }, { "epoch": 1.193296438114233, "grad_norm": 0.18422411382198334, "learning_rate": 2.781536097270876e-05, "loss": 1.9462, "step": 313510 }, { "epoch": 1.1933345005823557, "grad_norm": 0.2034992128610611, "learning_rate": 2.7771241875296226e-05, "loss": 1.9518, "step": 313520 }, { "epoch": 1.1933725630504783, "grad_norm": 0.20844464004039764, "learning_rate": 2.7727126899430465e-05, "loss": 1.927, "step": 313530 }, { "epoch": 1.1934106255186012, "grad_norm": 0.26188117265701294, "learning_rate": 2.7683016043956622e-05, "loss": 1.9378, "step": 313540 }, { "epoch": 1.1934486879867239, "grad_norm": 0.19190119206905365, "learning_rate": 2.7638909307720394e-05, "loss": 1.9423, "step": 313550 }, { "epoch": 1.1934867504548465, "grad_norm": 0.2001878321170807, "learning_rate": 2.7594806689567932e-05, "loss": 1.9494, "step": 313560 }, { "epoch": 1.1935248129229692, "grad_norm": 0.18613621592521667, "learning_rate": 2.7550708188346042e-05, "loss": 1.9432, "step": 313570 }, { "epoch": 1.1935628753910918, "grad_norm": 0.17761319875717163, "learning_rate": 2.750661380290198e-05, "loss": 1.9453, "step": 313580 }, { "epoch": 1.1936009378592145, "grad_norm": 0.1721399426460266, "learning_rate": 2.746252353208356e-05, "loss": 1.9284, "step": 313590 }, { "epoch": 1.1936390003273372, "grad_norm": 0.21422426402568817, "learning_rate": 2.7418437374739146e-05, "loss": 1.9455, "step": 313600 }, { "epoch": 1.1936770627954598, "grad_norm": 0.1898270547389984, "learning_rate": 2.7374355329717658e-05, "loss": 1.9361, "step": 313610 }, { "epoch": 1.1937151252635827, "grad_norm": 0.17938223481178284, "learning_rate": 2.7330277395868463e-05, "loss": 1.9244, "step": 313620 }, { "epoch": 1.1937531877317054, "grad_norm": 0.1766996681690216, "learning_rate": 2.7286203572041534e-05, "loss": 1.94, "step": 313630 }, { "epoch": 1.193791250199828, "grad_norm": 0.20516465604305267, "learning_rate": 2.724213385708746e-05, "loss": 1.9421, "step": 313640 }, { "epoch": 1.1938293126679507, "grad_norm": 0.19619545340538025, "learning_rate": 2.7198068249857212e-05, "loss": 1.9409, "step": 313650 }, { "epoch": 1.1938673751360733, "grad_norm": 0.16490577161312103, "learning_rate": 2.7154006749202374e-05, "loss": 1.9446, "step": 313660 }, { "epoch": 1.193905437604196, "grad_norm": 0.1935669481754303, "learning_rate": 2.7109949353975093e-05, "loss": 1.952, "step": 313670 }, { "epoch": 1.1939435000723186, "grad_norm": 0.18772977590560913, "learning_rate": 2.706589606302795e-05, "loss": 1.9411, "step": 313680 }, { "epoch": 1.1939815625404413, "grad_norm": 0.16066856682300568, "learning_rate": 2.7021846875214196e-05, "loss": 1.9517, "step": 313690 }, { "epoch": 1.194019625008564, "grad_norm": 0.22064299881458282, "learning_rate": 2.6977801789387467e-05, "loss": 1.9477, "step": 313700 }, { "epoch": 1.1940576874766868, "grad_norm": 0.16915158927440643, "learning_rate": 2.6933760804402074e-05, "loss": 1.9415, "step": 313710 }, { "epoch": 1.1940957499448095, "grad_norm": 0.17556576430797577, "learning_rate": 2.688972391911276e-05, "loss": 1.9364, "step": 313720 }, { "epoch": 1.1941338124129321, "grad_norm": 0.16539603471755981, "learning_rate": 2.6845691132374893e-05, "loss": 1.9474, "step": 313730 }, { "epoch": 1.1941718748810548, "grad_norm": 0.22699984908103943, "learning_rate": 2.6801662443044216e-05, "loss": 1.9348, "step": 313740 }, { "epoch": 1.1942099373491775, "grad_norm": 0.1771809309720993, "learning_rate": 2.67576378499772e-05, "loss": 1.9524, "step": 313750 }, { "epoch": 1.1942479998173001, "grad_norm": 0.2590922713279724, "learning_rate": 2.6713617352030706e-05, "loss": 1.9448, "step": 313760 }, { "epoch": 1.1942860622854228, "grad_norm": 0.19297786056995392, "learning_rate": 2.6669600948062202e-05, "loss": 1.9471, "step": 313770 }, { "epoch": 1.1943241247535454, "grad_norm": 0.2514785826206207, "learning_rate": 2.6625588636929655e-05, "loss": 1.9442, "step": 313780 }, { "epoch": 1.1943621872216683, "grad_norm": 0.1699245721101761, "learning_rate": 2.658158041749159e-05, "loss": 1.9483, "step": 313790 }, { "epoch": 1.194400249689791, "grad_norm": 0.19253213703632355, "learning_rate": 2.653757628860698e-05, "loss": 1.9327, "step": 313800 }, { "epoch": 1.1944383121579136, "grad_norm": 0.1772114783525467, "learning_rate": 2.64935762491354e-05, "loss": 1.9418, "step": 313810 }, { "epoch": 1.1944763746260363, "grad_norm": 0.1950380653142929, "learning_rate": 2.6449580297937036e-05, "loss": 1.9528, "step": 313820 }, { "epoch": 1.194514437094159, "grad_norm": 0.17496085166931152, "learning_rate": 2.6405588433872363e-05, "loss": 1.9434, "step": 313830 }, { "epoch": 1.1945524995622816, "grad_norm": 0.18000604212284088, "learning_rate": 2.6361600655802674e-05, "loss": 1.9442, "step": 313840 }, { "epoch": 1.1945905620304043, "grad_norm": 0.19483064115047455, "learning_rate": 2.6317616962589607e-05, "loss": 1.9437, "step": 313850 }, { "epoch": 1.194628624498527, "grad_norm": 0.18538741767406464, "learning_rate": 2.6273637353095293e-05, "loss": 1.9333, "step": 313860 }, { "epoch": 1.1946666869666496, "grad_norm": 0.1824004203081131, "learning_rate": 2.622966182618258e-05, "loss": 1.943, "step": 313870 }, { "epoch": 1.1947047494347722, "grad_norm": 0.16097493469715118, "learning_rate": 2.6185690380714667e-05, "loss": 1.9396, "step": 313880 }, { "epoch": 1.194742811902895, "grad_norm": 0.1737389713525772, "learning_rate": 2.614172301555534e-05, "loss": 1.9427, "step": 313890 }, { "epoch": 1.1947808743710178, "grad_norm": 0.2533680498600006, "learning_rate": 2.609775972956896e-05, "loss": 1.9475, "step": 313900 }, { "epoch": 1.1948189368391404, "grad_norm": 0.21982336044311523, "learning_rate": 2.6053800521620373e-05, "loss": 1.9465, "step": 313910 }, { "epoch": 1.194856999307263, "grad_norm": 0.16938644647598267, "learning_rate": 2.6009845390574938e-05, "loss": 1.9566, "step": 313920 }, { "epoch": 1.1948950617753857, "grad_norm": 0.17015881836414337, "learning_rate": 2.5965894335298556e-05, "loss": 1.9493, "step": 313930 }, { "epoch": 1.1949331242435084, "grad_norm": 0.21307599544525146, "learning_rate": 2.5921947354657637e-05, "loss": 1.9352, "step": 313940 }, { "epoch": 1.194971186711631, "grad_norm": 0.23158085346221924, "learning_rate": 2.5878004447519144e-05, "loss": 1.9418, "step": 313950 }, { "epoch": 1.195009249179754, "grad_norm": 0.20640724897384644, "learning_rate": 2.583406561275059e-05, "loss": 1.9333, "step": 313960 }, { "epoch": 1.1950473116478766, "grad_norm": 0.20745491981506348, "learning_rate": 2.5790130849219882e-05, "loss": 1.9252, "step": 313970 }, { "epoch": 1.1950853741159992, "grad_norm": 0.21391427516937256, "learning_rate": 2.5746200155795652e-05, "loss": 1.933, "step": 313980 }, { "epoch": 1.195123436584122, "grad_norm": 0.19119727611541748, "learning_rate": 2.5702273531346853e-05, "loss": 1.9459, "step": 313990 }, { "epoch": 1.1951614990522446, "grad_norm": 0.17244857549667358, "learning_rate": 2.5658350974743118e-05, "loss": 1.9333, "step": 314000 }, { "epoch": 1.1951995615203672, "grad_norm": 0.18785542249679565, "learning_rate": 2.561443248485451e-05, "loss": 1.9409, "step": 314010 }, { "epoch": 1.1952376239884899, "grad_norm": 0.17021456360816956, "learning_rate": 2.5570518060551662e-05, "loss": 1.9427, "step": 314020 }, { "epoch": 1.1952756864566125, "grad_norm": 0.17522631585597992, "learning_rate": 2.5526607700705752e-05, "loss": 1.9404, "step": 314030 }, { "epoch": 1.1953137489247352, "grad_norm": 0.2075173407793045, "learning_rate": 2.5482701404188346e-05, "loss": 1.9444, "step": 314040 }, { "epoch": 1.1953518113928578, "grad_norm": 0.17764155566692352, "learning_rate": 2.543879916987174e-05, "loss": 1.9352, "step": 314050 }, { "epoch": 1.1953898738609807, "grad_norm": 0.17194154858589172, "learning_rate": 2.5394900996628557e-05, "loss": 1.9521, "step": 314060 }, { "epoch": 1.1954279363291034, "grad_norm": 0.16174156963825226, "learning_rate": 2.5351006883332085e-05, "loss": 1.9441, "step": 314070 }, { "epoch": 1.195465998797226, "grad_norm": 0.17151190340518951, "learning_rate": 2.5307116828856058e-05, "loss": 1.9378, "step": 314080 }, { "epoch": 1.1955040612653487, "grad_norm": 0.20262061059474945, "learning_rate": 2.5263230832074712e-05, "loss": 1.9404, "step": 314090 }, { "epoch": 1.1955421237334714, "grad_norm": 0.2190033197402954, "learning_rate": 2.5219348891862836e-05, "loss": 1.9277, "step": 314100 }, { "epoch": 1.195580186201594, "grad_norm": 0.2151559442281723, "learning_rate": 2.5175471007095775e-05, "loss": 1.9598, "step": 314110 }, { "epoch": 1.1956182486697167, "grad_norm": 0.3101893961429596, "learning_rate": 2.513159717664937e-05, "loss": 1.9379, "step": 314120 }, { "epoch": 1.1956563111378395, "grad_norm": 0.19195280969142914, "learning_rate": 2.5087727399399963e-05, "loss": 1.9548, "step": 314130 }, { "epoch": 1.1956943736059622, "grad_norm": 0.19188041985034943, "learning_rate": 2.5043861674224344e-05, "loss": 1.9448, "step": 314140 }, { "epoch": 1.1957324360740849, "grad_norm": 0.16497381031513214, "learning_rate": 2.500000000000002e-05, "loss": 1.9446, "step": 314150 }, { "epoch": 1.1957704985422075, "grad_norm": 0.1741824448108673, "learning_rate": 2.4956142375604785e-05, "loss": 1.9393, "step": 314160 }, { "epoch": 1.1958085610103302, "grad_norm": 0.19525335729122162, "learning_rate": 2.491228879991714e-05, "loss": 1.9339, "step": 314170 }, { "epoch": 1.1958466234784528, "grad_norm": 0.23857097327709198, "learning_rate": 2.486843927181598e-05, "loss": 1.9416, "step": 314180 }, { "epoch": 1.1958846859465755, "grad_norm": 0.21334300935268402, "learning_rate": 2.4824593790180814e-05, "loss": 1.9469, "step": 314190 }, { "epoch": 1.1959227484146981, "grad_norm": 0.1724768429994583, "learning_rate": 2.47807523538916e-05, "loss": 1.9473, "step": 314200 }, { "epoch": 1.1959608108828208, "grad_norm": 0.17365297675132751, "learning_rate": 2.4736914961828783e-05, "loss": 1.9458, "step": 314210 }, { "epoch": 1.1959988733509435, "grad_norm": 0.18585903942584991, "learning_rate": 2.469308161287337e-05, "loss": 1.9241, "step": 314220 }, { "epoch": 1.1960369358190663, "grad_norm": 0.1692325323820114, "learning_rate": 2.464925230590692e-05, "loss": 1.9273, "step": 314230 }, { "epoch": 1.196074998287189, "grad_norm": 0.1854136884212494, "learning_rate": 2.46054270398115e-05, "loss": 1.9353, "step": 314240 }, { "epoch": 1.1961130607553117, "grad_norm": 0.17451083660125732, "learning_rate": 2.456160581346961e-05, "loss": 1.9396, "step": 314250 }, { "epoch": 1.1961511232234343, "grad_norm": 0.23344092071056366, "learning_rate": 2.4517788625764314e-05, "loss": 1.9489, "step": 314260 }, { "epoch": 1.196189185691557, "grad_norm": 0.17425473034381866, "learning_rate": 2.447397547557928e-05, "loss": 1.944, "step": 314270 }, { "epoch": 1.1962272481596796, "grad_norm": 0.18360301852226257, "learning_rate": 2.443016636179851e-05, "loss": 1.9276, "step": 314280 }, { "epoch": 1.1962653106278023, "grad_norm": 0.16981305181980133, "learning_rate": 2.4386361283306623e-05, "loss": 1.9358, "step": 314290 }, { "epoch": 1.196303373095925, "grad_norm": 0.16044333577156067, "learning_rate": 2.4342560238988788e-05, "loss": 1.9372, "step": 314300 }, { "epoch": 1.1963414355640478, "grad_norm": 0.21252797544002533, "learning_rate": 2.429876322773067e-05, "loss": 1.9511, "step": 314310 }, { "epoch": 1.1963794980321705, "grad_norm": 0.17850229144096375, "learning_rate": 2.425497024841833e-05, "loss": 1.9317, "step": 314320 }, { "epoch": 1.1964175605002931, "grad_norm": 0.16965432465076447, "learning_rate": 2.421118129993849e-05, "loss": 1.9541, "step": 314330 }, { "epoch": 1.1964556229684158, "grad_norm": 0.17304007709026337, "learning_rate": 2.416739638117832e-05, "loss": 1.942, "step": 314340 }, { "epoch": 1.1964936854365384, "grad_norm": 0.19845229387283325, "learning_rate": 2.4123615491025486e-05, "loss": 1.939, "step": 314350 }, { "epoch": 1.196531747904661, "grad_norm": 0.17881131172180176, "learning_rate": 2.4079838628368268e-05, "loss": 1.9412, "step": 314360 }, { "epoch": 1.1965698103727838, "grad_norm": 0.21299217641353607, "learning_rate": 2.4036065792095274e-05, "loss": 1.9476, "step": 314370 }, { "epoch": 1.1966078728409064, "grad_norm": 0.24780753254890442, "learning_rate": 2.3992296981095786e-05, "loss": 1.9381, "step": 314380 }, { "epoch": 1.196645935309029, "grad_norm": 0.1839221566915512, "learning_rate": 2.3948532194259465e-05, "loss": 1.9472, "step": 314390 }, { "epoch": 1.196683997777152, "grad_norm": 0.2312176525592804, "learning_rate": 2.3904771430476702e-05, "loss": 1.9393, "step": 314400 }, { "epoch": 1.1967220602452746, "grad_norm": 0.1622578650712967, "learning_rate": 2.386101468863805e-05, "loss": 1.9414, "step": 314410 }, { "epoch": 1.1967601227133973, "grad_norm": 0.16811257600784302, "learning_rate": 2.381726196763495e-05, "loss": 1.9453, "step": 314420 }, { "epoch": 1.19679818518152, "grad_norm": 0.17034026980400085, "learning_rate": 2.377351326635907e-05, "loss": 1.9404, "step": 314430 }, { "epoch": 1.1968362476496426, "grad_norm": 0.1621735841035843, "learning_rate": 2.3729768583702737e-05, "loss": 1.959, "step": 314440 }, { "epoch": 1.1968743101177652, "grad_norm": 0.20885998010635376, "learning_rate": 2.3686027918558727e-05, "loss": 1.9579, "step": 314450 }, { "epoch": 1.196912372585888, "grad_norm": 0.18931780755519867, "learning_rate": 2.364229126982037e-05, "loss": 1.9541, "step": 314460 }, { "epoch": 1.1969504350540106, "grad_norm": 0.1982506960630417, "learning_rate": 2.3598558636381386e-05, "loss": 1.9369, "step": 314470 }, { "epoch": 1.1969884975221334, "grad_norm": 0.21738344430923462, "learning_rate": 2.3554830017136153e-05, "loss": 1.9427, "step": 314480 }, { "epoch": 1.197026559990256, "grad_norm": 0.18937720358371735, "learning_rate": 2.3511105410979505e-05, "loss": 1.9383, "step": 314490 }, { "epoch": 1.1970646224583787, "grad_norm": 0.2373971939086914, "learning_rate": 2.346738481680677e-05, "loss": 1.9378, "step": 314500 }, { "epoch": 1.1971026849265014, "grad_norm": 0.21745845675468445, "learning_rate": 2.342366823351372e-05, "loss": 1.9449, "step": 314510 }, { "epoch": 1.197140747394624, "grad_norm": 0.17506791651248932, "learning_rate": 2.3379955659996732e-05, "loss": 1.9456, "step": 314520 }, { "epoch": 1.1971788098627467, "grad_norm": 0.17458729445934296, "learning_rate": 2.3336247095152697e-05, "loss": 1.9456, "step": 314530 }, { "epoch": 1.1972168723308694, "grad_norm": 0.17563146352767944, "learning_rate": 2.329254253787888e-05, "loss": 1.9325, "step": 314540 }, { "epoch": 1.197254934798992, "grad_norm": 0.18956497311592102, "learning_rate": 2.3248841987073222e-05, "loss": 1.9448, "step": 314550 }, { "epoch": 1.1972929972671147, "grad_norm": 0.17544913291931152, "learning_rate": 2.3205145441634046e-05, "loss": 1.9445, "step": 314560 }, { "epoch": 1.1973310597352376, "grad_norm": 0.2312907725572586, "learning_rate": 2.3161452900460235e-05, "loss": 1.9474, "step": 314570 }, { "epoch": 1.1973691222033602, "grad_norm": 0.1695864498615265, "learning_rate": 2.3117764362451167e-05, "loss": 1.9376, "step": 314580 }, { "epoch": 1.1974071846714829, "grad_norm": 0.2052493542432785, "learning_rate": 2.3074079826506668e-05, "loss": 1.9466, "step": 314590 }, { "epoch": 1.1974452471396055, "grad_norm": 0.1754942685365677, "learning_rate": 2.303039929152717e-05, "loss": 1.9307, "step": 314600 }, { "epoch": 1.1974833096077282, "grad_norm": 0.16554835438728333, "learning_rate": 2.298672275641356e-05, "loss": 1.9401, "step": 314610 }, { "epoch": 1.1975213720758509, "grad_norm": 0.1665649712085724, "learning_rate": 2.2943050220067207e-05, "loss": 1.9384, "step": 314620 }, { "epoch": 1.1975594345439735, "grad_norm": 0.17551663517951965, "learning_rate": 2.2899381681389997e-05, "loss": 1.9471, "step": 314630 }, { "epoch": 1.1975974970120962, "grad_norm": 0.1639355570077896, "learning_rate": 2.2855717139284304e-05, "loss": 1.9447, "step": 314640 }, { "epoch": 1.197635559480219, "grad_norm": 0.19529812037944794, "learning_rate": 2.2812056592653064e-05, "loss": 1.9564, "step": 314650 }, { "epoch": 1.1976736219483417, "grad_norm": 0.21838751435279846, "learning_rate": 2.276840004039965e-05, "loss": 1.9413, "step": 314660 }, { "epoch": 1.1977116844164644, "grad_norm": 0.16338661313056946, "learning_rate": 2.2724747481428e-05, "loss": 1.9572, "step": 314670 }, { "epoch": 1.197749746884587, "grad_norm": 0.2040199339389801, "learning_rate": 2.2681098914642483e-05, "loss": 1.9375, "step": 314680 }, { "epoch": 1.1977878093527097, "grad_norm": 0.2077571004629135, "learning_rate": 2.263745433894798e-05, "loss": 1.9341, "step": 314690 }, { "epoch": 1.1978258718208323, "grad_norm": 0.16745197772979736, "learning_rate": 2.259381375324987e-05, "loss": 1.9454, "step": 314700 }, { "epoch": 1.197863934288955, "grad_norm": 0.22090986371040344, "learning_rate": 2.255017715645413e-05, "loss": 1.9298, "step": 314710 }, { "epoch": 1.1979019967570776, "grad_norm": 0.2772158086299896, "learning_rate": 2.250654454746709e-05, "loss": 1.9289, "step": 314720 }, { "epoch": 1.1979400592252003, "grad_norm": 0.2125716656446457, "learning_rate": 2.2462915925195727e-05, "loss": 1.9379, "step": 314730 }, { "epoch": 1.197978121693323, "grad_norm": 0.21410690248012543, "learning_rate": 2.2419291288547417e-05, "loss": 1.9402, "step": 314740 }, { "epoch": 1.1980161841614458, "grad_norm": 0.25055932998657227, "learning_rate": 2.237567063642998e-05, "loss": 1.953, "step": 314750 }, { "epoch": 1.1980542466295685, "grad_norm": 0.2538086473941803, "learning_rate": 2.233205396775195e-05, "loss": 1.9401, "step": 314760 }, { "epoch": 1.1980923090976912, "grad_norm": 0.19334672391414642, "learning_rate": 2.2288441281422045e-05, "loss": 1.9182, "step": 314770 }, { "epoch": 1.1981303715658138, "grad_norm": 0.18246856331825256, "learning_rate": 2.224483257634985e-05, "loss": 1.9641, "step": 314780 }, { "epoch": 1.1981684340339365, "grad_norm": 0.16561700403690338, "learning_rate": 2.2201227851445126e-05, "loss": 1.956, "step": 314790 }, { "epoch": 1.1982064965020591, "grad_norm": 0.182077556848526, "learning_rate": 2.2157627105618307e-05, "loss": 1.9251, "step": 314800 }, { "epoch": 1.1982445589701818, "grad_norm": 0.21854285895824432, "learning_rate": 2.2114030337780257e-05, "loss": 1.9317, "step": 314810 }, { "epoch": 1.1982826214383047, "grad_norm": 0.1683615893125534, "learning_rate": 2.2070437546842403e-05, "loss": 1.9418, "step": 314820 }, { "epoch": 1.1983206839064273, "grad_norm": 0.20173421502113342, "learning_rate": 2.2026848731716566e-05, "loss": 1.9415, "step": 314830 }, { "epoch": 1.19835874637455, "grad_norm": 0.17751389741897583, "learning_rate": 2.198326389131522e-05, "loss": 1.9401, "step": 314840 }, { "epoch": 1.1983968088426726, "grad_norm": 0.1780787855386734, "learning_rate": 2.1939683024551128e-05, "loss": 1.9293, "step": 314850 }, { "epoch": 1.1984348713107953, "grad_norm": 0.1698933094739914, "learning_rate": 2.1896106130337657e-05, "loss": 1.9442, "step": 314860 }, { "epoch": 1.198472933778918, "grad_norm": 0.18079432845115662, "learning_rate": 2.1852533207588786e-05, "loss": 1.9448, "step": 314870 }, { "epoch": 1.1985109962470406, "grad_norm": 0.19679990410804749, "learning_rate": 2.1808964255218778e-05, "loss": 1.9552, "step": 314880 }, { "epoch": 1.1985490587151633, "grad_norm": 0.19070780277252197, "learning_rate": 2.176539927214244e-05, "loss": 1.9346, "step": 314890 }, { "epoch": 1.198587121183286, "grad_norm": 0.1800595223903656, "learning_rate": 2.1721838257275195e-05, "loss": 1.9378, "step": 314900 }, { "epoch": 1.1986251836514086, "grad_norm": 0.22656163573265076, "learning_rate": 2.1678281209532912e-05, "loss": 1.9489, "step": 314910 }, { "epoch": 1.1986632461195315, "grad_norm": 0.16311343014240265, "learning_rate": 2.163472812783185e-05, "loss": 1.942, "step": 314920 }, { "epoch": 1.1987013085876541, "grad_norm": 0.1718357354402542, "learning_rate": 2.1591179011088868e-05, "loss": 1.9385, "step": 314930 }, { "epoch": 1.1987393710557768, "grad_norm": 0.18059194087982178, "learning_rate": 2.1547633858221284e-05, "loss": 1.9283, "step": 314940 }, { "epoch": 1.1987774335238994, "grad_norm": 0.2025146633386612, "learning_rate": 2.15040926681469e-05, "loss": 1.9472, "step": 314950 }, { "epoch": 1.198815495992022, "grad_norm": 0.15839137136936188, "learning_rate": 2.1460555439784036e-05, "loss": 1.9309, "step": 314960 }, { "epoch": 1.1988535584601447, "grad_norm": 0.2715093195438385, "learning_rate": 2.1417022172051492e-05, "loss": 1.945, "step": 314970 }, { "epoch": 1.1988916209282674, "grad_norm": 0.18683266639709473, "learning_rate": 2.1373492863868526e-05, "loss": 1.9467, "step": 314980 }, { "epoch": 1.1989296833963903, "grad_norm": 0.17632196843624115, "learning_rate": 2.132996751415489e-05, "loss": 1.9365, "step": 314990 }, { "epoch": 1.198967745864513, "grad_norm": 0.17075899243354797, "learning_rate": 2.128644612183095e-05, "loss": 1.9374, "step": 315000 }, { "epoch": 1.1990058083326356, "grad_norm": 0.1725279837846756, "learning_rate": 2.1242928685817396e-05, "loss": 1.9407, "step": 315010 }, { "epoch": 1.1990438708007582, "grad_norm": 0.19900698959827423, "learning_rate": 2.119941520503549e-05, "loss": 1.9349, "step": 315020 }, { "epoch": 1.199081933268881, "grad_norm": 0.30478909611701965, "learning_rate": 2.1155905678406972e-05, "loss": 1.9411, "step": 315030 }, { "epoch": 1.1991199957370036, "grad_norm": 0.21382011473178864, "learning_rate": 2.11124001048541e-05, "loss": 1.9448, "step": 315040 }, { "epoch": 1.1991580582051262, "grad_norm": 0.19819001853466034, "learning_rate": 2.106889848329957e-05, "loss": 1.9446, "step": 315050 }, { "epoch": 1.1991961206732489, "grad_norm": 0.16067779064178467, "learning_rate": 2.102540081266663e-05, "loss": 1.9528, "step": 315060 }, { "epoch": 1.1992341831413715, "grad_norm": 0.24102060496807098, "learning_rate": 2.0981907091878916e-05, "loss": 1.9434, "step": 315070 }, { "epoch": 1.1992722456094942, "grad_norm": 0.1644870638847351, "learning_rate": 2.0938417319860626e-05, "loss": 1.9393, "step": 315080 }, { "epoch": 1.199310308077617, "grad_norm": 0.25682348012924194, "learning_rate": 2.0894931495536507e-05, "loss": 1.9235, "step": 315090 }, { "epoch": 1.1993483705457397, "grad_norm": 0.16999438405036926, "learning_rate": 2.085144961783164e-05, "loss": 1.9349, "step": 315100 }, { "epoch": 1.1993864330138624, "grad_norm": 0.1793372631072998, "learning_rate": 2.0807971685671723e-05, "loss": 1.9546, "step": 315110 }, { "epoch": 1.199424495481985, "grad_norm": 0.20978040993213654, "learning_rate": 2.0764497697982886e-05, "loss": 1.9386, "step": 315120 }, { "epoch": 1.1994625579501077, "grad_norm": 0.21455638110637665, "learning_rate": 2.0721027653691715e-05, "loss": 1.9365, "step": 315130 }, { "epoch": 1.1995006204182304, "grad_norm": 0.20593143999576569, "learning_rate": 2.06775615517254e-05, "loss": 1.9368, "step": 315140 }, { "epoch": 1.199538682886353, "grad_norm": 0.18800471723079681, "learning_rate": 2.0634099391011462e-05, "loss": 1.9266, "step": 315150 }, { "epoch": 1.1995767453544757, "grad_norm": 0.165223628282547, "learning_rate": 2.0590641170478043e-05, "loss": 1.9438, "step": 315160 }, { "epoch": 1.1996148078225986, "grad_norm": 0.16987620294094086, "learning_rate": 2.0547186889053713e-05, "loss": 1.9355, "step": 315170 }, { "epoch": 1.1996528702907212, "grad_norm": 0.17159295082092285, "learning_rate": 2.0503736545667505e-05, "loss": 1.9462, "step": 315180 }, { "epoch": 1.1996909327588439, "grad_norm": 0.16801120340824127, "learning_rate": 2.0460290139248937e-05, "loss": 1.9341, "step": 315190 }, { "epoch": 1.1997289952269665, "grad_norm": 0.16506770253181458, "learning_rate": 2.0416847668728032e-05, "loss": 1.9476, "step": 315200 }, { "epoch": 1.1997670576950892, "grad_norm": 0.194406196475029, "learning_rate": 2.037340913303537e-05, "loss": 1.932, "step": 315210 }, { "epoch": 1.1998051201632118, "grad_norm": 0.272256463766098, "learning_rate": 2.0329974531101915e-05, "loss": 1.944, "step": 315220 }, { "epoch": 1.1998431826313345, "grad_norm": 0.2028186172246933, "learning_rate": 2.028654386185913e-05, "loss": 1.9352, "step": 315230 }, { "epoch": 1.1998812450994571, "grad_norm": 0.21224938333034515, "learning_rate": 2.0243117124238987e-05, "loss": 1.9453, "step": 315240 }, { "epoch": 1.1999193075675798, "grad_norm": 0.1703551709651947, "learning_rate": 2.0199694317173943e-05, "loss": 1.947, "step": 315250 }, { "epoch": 1.1999573700357027, "grad_norm": 0.16464319825172424, "learning_rate": 2.015627543959686e-05, "loss": 1.9359, "step": 315260 }, { "epoch": 1.1999954325038253, "grad_norm": 0.16439057886600494, "learning_rate": 2.0112860490441308e-05, "loss": 1.9366, "step": 315270 }, { "epoch": 1.200033494971948, "grad_norm": 0.1883467137813568, "learning_rate": 2.0069449468640977e-05, "loss": 1.939, "step": 315280 }, { "epoch": 1.2000715574400707, "grad_norm": 0.1818651407957077, "learning_rate": 2.0026042373130437e-05, "loss": 1.9201, "step": 315290 }, { "epoch": 1.2001096199081933, "grad_norm": 0.18975751101970673, "learning_rate": 1.9982639202844378e-05, "loss": 1.9409, "step": 315300 }, { "epoch": 1.200147682376316, "grad_norm": 0.2573046088218689, "learning_rate": 1.993923995671826e-05, "loss": 1.9434, "step": 315310 }, { "epoch": 1.2001857448444386, "grad_norm": 0.1690666824579239, "learning_rate": 1.9895844633687822e-05, "loss": 1.9301, "step": 315320 }, { "epoch": 1.2002238073125613, "grad_norm": 0.18057064712047577, "learning_rate": 1.9852453232689473e-05, "loss": 1.9557, "step": 315330 }, { "epoch": 1.2002618697806842, "grad_norm": 0.16391849517822266, "learning_rate": 1.9809065752659895e-05, "loss": 1.9417, "step": 315340 }, { "epoch": 1.2002999322488068, "grad_norm": 0.18901868164539337, "learning_rate": 1.976568219253633e-05, "loss": 1.9536, "step": 315350 }, { "epoch": 1.2003379947169295, "grad_norm": 0.19878660142421722, "learning_rate": 1.9722302551256623e-05, "loss": 1.9449, "step": 315360 }, { "epoch": 1.2003760571850521, "grad_norm": 0.17215828597545624, "learning_rate": 1.967892682775896e-05, "loss": 1.9358, "step": 315370 }, { "epoch": 1.2004141196531748, "grad_norm": 0.19715379178524017, "learning_rate": 1.963555502098202e-05, "loss": 1.9249, "step": 315380 }, { "epoch": 1.2004521821212975, "grad_norm": 0.2076154500246048, "learning_rate": 1.959218712986499e-05, "loss": 1.9369, "step": 315390 }, { "epoch": 1.20049024458942, "grad_norm": 0.17226295173168182, "learning_rate": 1.9548823153347493e-05, "loss": 1.9362, "step": 315400 }, { "epoch": 1.2005283070575428, "grad_norm": 0.20354777574539185, "learning_rate": 1.950546309036977e-05, "loss": 1.9439, "step": 315410 }, { "epoch": 1.2005663695256654, "grad_norm": 0.20497560501098633, "learning_rate": 1.946210693987238e-05, "loss": 1.9236, "step": 315420 }, { "epoch": 1.2006044319937883, "grad_norm": 0.16953223943710327, "learning_rate": 1.9418754700796404e-05, "loss": 1.9513, "step": 315430 }, { "epoch": 1.200642494461911, "grad_norm": 0.16467490792274475, "learning_rate": 1.937540637208335e-05, "loss": 1.9453, "step": 315440 }, { "epoch": 1.2006805569300336, "grad_norm": 0.23944929242134094, "learning_rate": 1.9332061952675395e-05, "loss": 1.9557, "step": 315450 }, { "epoch": 1.2007186193981563, "grad_norm": 0.26693135499954224, "learning_rate": 1.9288721441515e-05, "loss": 1.9552, "step": 315460 }, { "epoch": 1.200756681866279, "grad_norm": 0.1673591136932373, "learning_rate": 1.9245384837545233e-05, "loss": 1.9439, "step": 315470 }, { "epoch": 1.2007947443344016, "grad_norm": 0.17863279581069946, "learning_rate": 1.9202052139709437e-05, "loss": 1.9347, "step": 315480 }, { "epoch": 1.2008328068025242, "grad_norm": 0.20780551433563232, "learning_rate": 1.9158723346951735e-05, "loss": 1.9419, "step": 315490 }, { "epoch": 1.200870869270647, "grad_norm": 0.19351142644882202, "learning_rate": 1.911539845821636e-05, "loss": 1.9557, "step": 315500 }, { "epoch": 1.2009089317387698, "grad_norm": 0.2197602540254593, "learning_rate": 1.9072077472448378e-05, "loss": 1.9619, "step": 315510 }, { "epoch": 1.2009469942068924, "grad_norm": 0.17918086051940918, "learning_rate": 1.9028760388593136e-05, "loss": 1.9573, "step": 315520 }, { "epoch": 1.200985056675015, "grad_norm": 0.2039628028869629, "learning_rate": 1.8985447205596473e-05, "loss": 1.9322, "step": 315530 }, { "epoch": 1.2010231191431378, "grad_norm": 0.24598252773284912, "learning_rate": 1.8942137922404735e-05, "loss": 1.9402, "step": 315540 }, { "epoch": 1.2010611816112604, "grad_norm": 0.3049358129501343, "learning_rate": 1.8898832537964706e-05, "loss": 1.918, "step": 315550 }, { "epoch": 1.201099244079383, "grad_norm": 0.1963425576686859, "learning_rate": 1.8855531051223675e-05, "loss": 1.9256, "step": 315560 }, { "epoch": 1.2011373065475057, "grad_norm": 0.164651021361351, "learning_rate": 1.8812233461129368e-05, "loss": 1.9371, "step": 315570 }, { "epoch": 1.2011753690156284, "grad_norm": 0.1651751697063446, "learning_rate": 1.876893976663008e-05, "loss": 1.9283, "step": 315580 }, { "epoch": 1.201213431483751, "grad_norm": 0.16817796230316162, "learning_rate": 1.872564996667442e-05, "loss": 1.944, "step": 315590 }, { "epoch": 1.2012514939518737, "grad_norm": 0.16912603378295898, "learning_rate": 1.868236406021162e-05, "loss": 1.9374, "step": 315600 }, { "epoch": 1.2012895564199966, "grad_norm": 0.16702499985694885, "learning_rate": 1.863908204619136e-05, "loss": 1.9432, "step": 315610 }, { "epoch": 1.2013276188881192, "grad_norm": 0.167423278093338, "learning_rate": 1.8595803923563635e-05, "loss": 1.941, "step": 315620 }, { "epoch": 1.2013656813562419, "grad_norm": 0.17401781678199768, "learning_rate": 1.855252969127913e-05, "loss": 1.9322, "step": 315630 }, { "epoch": 1.2014037438243645, "grad_norm": 0.16266636550426483, "learning_rate": 1.85092593482889e-05, "loss": 1.9245, "step": 315640 }, { "epoch": 1.2014418062924872, "grad_norm": 0.17383623123168945, "learning_rate": 1.846599289354445e-05, "loss": 1.9491, "step": 315650 }, { "epoch": 1.2014798687606099, "grad_norm": 0.24540668725967407, "learning_rate": 1.842273032599778e-05, "loss": 1.9434, "step": 315660 }, { "epoch": 1.2015179312287325, "grad_norm": 0.24183866381645203, "learning_rate": 1.837947164460141e-05, "loss": 1.9423, "step": 315670 }, { "epoch": 1.2015559936968554, "grad_norm": 0.16616332530975342, "learning_rate": 1.833621684830816e-05, "loss": 1.9295, "step": 315680 }, { "epoch": 1.201594056164978, "grad_norm": 0.15895366668701172, "learning_rate": 1.8292965936071604e-05, "loss": 1.9401, "step": 315690 }, { "epoch": 1.2016321186331007, "grad_norm": 0.18763354420661926, "learning_rate": 1.8249718906845513e-05, "loss": 1.9384, "step": 315700 }, { "epoch": 1.2016701811012234, "grad_norm": 0.18516993522644043, "learning_rate": 1.820647575958434e-05, "loss": 1.9294, "step": 315710 }, { "epoch": 1.201708243569346, "grad_norm": 0.17786172032356262, "learning_rate": 1.8163236493242807e-05, "loss": 1.9444, "step": 315720 }, { "epoch": 1.2017463060374687, "grad_norm": 0.17752547562122345, "learning_rate": 1.812000110677625e-05, "loss": 1.9276, "step": 315730 }, { "epoch": 1.2017843685055913, "grad_norm": 0.17008447647094727, "learning_rate": 1.8076769599140398e-05, "loss": 1.9465, "step": 315740 }, { "epoch": 1.201822430973714, "grad_norm": 0.1759907305240631, "learning_rate": 1.8033541969291523e-05, "loss": 1.9282, "step": 315750 }, { "epoch": 1.2018604934418367, "grad_norm": 0.22066645324230194, "learning_rate": 1.7990318216186297e-05, "loss": 1.9298, "step": 315760 }, { "epoch": 1.2018985559099593, "grad_norm": 0.1596367508172989, "learning_rate": 1.7947098338781887e-05, "loss": 1.9404, "step": 315770 }, { "epoch": 1.2019366183780822, "grad_norm": 0.18029053509235382, "learning_rate": 1.790388233603596e-05, "loss": 1.9486, "step": 315780 }, { "epoch": 1.2019746808462048, "grad_norm": 0.22112157940864563, "learning_rate": 1.7860670206906572e-05, "loss": 1.9393, "step": 315790 }, { "epoch": 1.2020127433143275, "grad_norm": 0.17528939247131348, "learning_rate": 1.781746195035222e-05, "loss": 1.9224, "step": 315800 }, { "epoch": 1.2020508057824502, "grad_norm": 0.1923305094242096, "learning_rate": 1.7774257565332075e-05, "loss": 1.929, "step": 315810 }, { "epoch": 1.2020888682505728, "grad_norm": 0.17303921282291412, "learning_rate": 1.7731057050805578e-05, "loss": 1.9316, "step": 315820 }, { "epoch": 1.2021269307186955, "grad_norm": 0.21711373329162598, "learning_rate": 1.768786040573267e-05, "loss": 1.9342, "step": 315830 }, { "epoch": 1.2021649931868181, "grad_norm": 0.2056269496679306, "learning_rate": 1.764466762907385e-05, "loss": 1.9473, "step": 315840 }, { "epoch": 1.202203055654941, "grad_norm": 0.23204374313354492, "learning_rate": 1.76014787197899e-05, "loss": 1.9336, "step": 315850 }, { "epoch": 1.2022411181230637, "grad_norm": 0.17051061987876892, "learning_rate": 1.755829367684225e-05, "loss": 1.9504, "step": 315860 }, { "epoch": 1.2022791805911863, "grad_norm": 0.19815026223659515, "learning_rate": 1.7515112499192688e-05, "loss": 1.9396, "step": 315870 }, { "epoch": 1.202317243059309, "grad_norm": 0.16430552303791046, "learning_rate": 1.747193518580359e-05, "loss": 1.95, "step": 315880 }, { "epoch": 1.2023553055274316, "grad_norm": 0.2549295723438263, "learning_rate": 1.7428761735637623e-05, "loss": 1.9507, "step": 315890 }, { "epoch": 1.2023933679955543, "grad_norm": 0.17700010538101196, "learning_rate": 1.738559214765806e-05, "loss": 1.9427, "step": 315900 }, { "epoch": 1.202431430463677, "grad_norm": 0.17616471648216248, "learning_rate": 1.7342426420828506e-05, "loss": 1.9553, "step": 315910 }, { "epoch": 1.2024694929317996, "grad_norm": 0.18350853025913239, "learning_rate": 1.729926455411318e-05, "loss": 1.928, "step": 315920 }, { "epoch": 1.2025075553999223, "grad_norm": 0.1687181144952774, "learning_rate": 1.725610654647669e-05, "loss": 1.9386, "step": 315930 }, { "epoch": 1.202545617868045, "grad_norm": 0.1649000197649002, "learning_rate": 1.7212952396884085e-05, "loss": 1.935, "step": 315940 }, { "epoch": 1.2025836803361678, "grad_norm": 0.1732434183359146, "learning_rate": 1.7169802104300857e-05, "loss": 1.9515, "step": 315950 }, { "epoch": 1.2026217428042905, "grad_norm": 0.20426325500011444, "learning_rate": 1.7126655667693057e-05, "loss": 1.9453, "step": 315960 }, { "epoch": 1.2026598052724131, "grad_norm": 0.23240119218826294, "learning_rate": 1.7083513086027125e-05, "loss": 1.945, "step": 315970 }, { "epoch": 1.2026978677405358, "grad_norm": 0.16769737005233765, "learning_rate": 1.7040374358269996e-05, "loss": 1.9309, "step": 315980 }, { "epoch": 1.2027359302086584, "grad_norm": 0.16312843561172485, "learning_rate": 1.6997239483388995e-05, "loss": 1.9313, "step": 315990 }, { "epoch": 1.202773992676781, "grad_norm": 0.21969033777713776, "learning_rate": 1.695410846035206e-05, "loss": 1.9374, "step": 316000 }, { "epoch": 1.2028120551449037, "grad_norm": 0.18756617605686188, "learning_rate": 1.6910981288127404e-05, "loss": 1.9189, "step": 316010 }, { "epoch": 1.2028501176130264, "grad_norm": 0.16086409986019135, "learning_rate": 1.68678579656838e-05, "loss": 1.9408, "step": 316020 }, { "epoch": 1.2028881800811493, "grad_norm": 0.16711248457431793, "learning_rate": 1.6824738491990565e-05, "loss": 1.9363, "step": 316030 }, { "epoch": 1.202926242549272, "grad_norm": 0.16341838240623474, "learning_rate": 1.6781622866017197e-05, "loss": 1.9355, "step": 316040 }, { "epoch": 1.2029643050173946, "grad_norm": 0.19947198033332825, "learning_rate": 1.673851108673402e-05, "loss": 1.9331, "step": 316050 }, { "epoch": 1.2030023674855173, "grad_norm": 0.17706602811813354, "learning_rate": 1.6695403153111577e-05, "loss": 1.9516, "step": 316060 }, { "epoch": 1.20304042995364, "grad_norm": 0.1658812314271927, "learning_rate": 1.6652299064120913e-05, "loss": 1.9349, "step": 316070 }, { "epoch": 1.2030784924217626, "grad_norm": 0.1825098842382431, "learning_rate": 1.6609198818733574e-05, "loss": 1.95, "step": 316080 }, { "epoch": 1.2031165548898852, "grad_norm": 0.17820632457733154, "learning_rate": 1.656610241592149e-05, "loss": 1.9301, "step": 316090 }, { "epoch": 1.2031546173580079, "grad_norm": 0.16276079416275024, "learning_rate": 1.65230098546571e-05, "loss": 1.9461, "step": 316100 }, { "epoch": 1.2031926798261305, "grad_norm": 0.19121406972408295, "learning_rate": 1.6479921133913333e-05, "loss": 1.9316, "step": 316110 }, { "epoch": 1.2032307422942534, "grad_norm": 0.1845954954624176, "learning_rate": 1.643683625266351e-05, "loss": 1.943, "step": 316120 }, { "epoch": 1.203268804762376, "grad_norm": 0.20090362429618835, "learning_rate": 1.6393755209881512e-05, "loss": 1.9367, "step": 316130 }, { "epoch": 1.2033068672304987, "grad_norm": 0.1640109121799469, "learning_rate": 1.6350678004541542e-05, "loss": 1.9381, "step": 316140 }, { "epoch": 1.2033449296986214, "grad_norm": 0.17727604508399963, "learning_rate": 1.630760463561831e-05, "loss": 1.9326, "step": 316150 }, { "epoch": 1.203382992166744, "grad_norm": 0.16008159518241882, "learning_rate": 1.626453510208703e-05, "loss": 1.9412, "step": 316160 }, { "epoch": 1.2034210546348667, "grad_norm": 0.16219596564769745, "learning_rate": 1.6221469402923296e-05, "loss": 1.9379, "step": 316170 }, { "epoch": 1.2034591171029894, "grad_norm": 0.18563158810138702, "learning_rate": 1.6178407537103256e-05, "loss": 1.942, "step": 316180 }, { "epoch": 1.203497179571112, "grad_norm": 0.2555886507034302, "learning_rate": 1.6135349503603458e-05, "loss": 1.934, "step": 316190 }, { "epoch": 1.203535242039235, "grad_norm": 0.15692013502120972, "learning_rate": 1.609229530140083e-05, "loss": 1.9368, "step": 316200 }, { "epoch": 1.2035733045073576, "grad_norm": 0.19800697267055511, "learning_rate": 1.6049244929472905e-05, "loss": 1.9496, "step": 316210 }, { "epoch": 1.2036113669754802, "grad_norm": 0.18161258101463318, "learning_rate": 1.600619838679751e-05, "loss": 1.9358, "step": 316220 }, { "epoch": 1.2036494294436029, "grad_norm": 0.20629428327083588, "learning_rate": 1.5963155672353124e-05, "loss": 1.9272, "step": 316230 }, { "epoch": 1.2036874919117255, "grad_norm": 0.20691783726215363, "learning_rate": 1.5920116785118455e-05, "loss": 1.9311, "step": 316240 }, { "epoch": 1.2037255543798482, "grad_norm": 0.1592143326997757, "learning_rate": 1.5877081724072873e-05, "loss": 1.9388, "step": 316250 }, { "epoch": 1.2037636168479708, "grad_norm": 0.16132837533950806, "learning_rate": 1.5834050488196084e-05, "loss": 1.9406, "step": 316260 }, { "epoch": 1.2038016793160935, "grad_norm": 0.1728985458612442, "learning_rate": 1.5791023076468235e-05, "loss": 1.9434, "step": 316270 }, { "epoch": 1.2038397417842162, "grad_norm": 0.17591522634029388, "learning_rate": 1.574799948786998e-05, "loss": 1.9307, "step": 316280 }, { "epoch": 1.203877804252339, "grad_norm": 0.17040970921516418, "learning_rate": 1.570497972138235e-05, "loss": 1.9486, "step": 316290 }, { "epoch": 1.2039158667204617, "grad_norm": 0.1899997591972351, "learning_rate": 1.5661963775986997e-05, "loss": 1.9467, "step": 316300 }, { "epoch": 1.2039539291885843, "grad_norm": 0.18535642325878143, "learning_rate": 1.5618951650665848e-05, "loss": 1.9334, "step": 316310 }, { "epoch": 1.203991991656707, "grad_norm": 0.16790741682052612, "learning_rate": 1.557594334440132e-05, "loss": 1.923, "step": 316320 }, { "epoch": 1.2040300541248297, "grad_norm": 0.1611746996641159, "learning_rate": 1.5532938856176403e-05, "loss": 1.9311, "step": 316330 }, { "epoch": 1.2040681165929523, "grad_norm": 0.16700397431850433, "learning_rate": 1.548993818497435e-05, "loss": 1.9496, "step": 316340 }, { "epoch": 1.204106179061075, "grad_norm": 0.1745607554912567, "learning_rate": 1.5446941329778974e-05, "loss": 1.9348, "step": 316350 }, { "epoch": 1.2041442415291976, "grad_norm": 0.15827342867851257, "learning_rate": 1.540394828957453e-05, "loss": 1.9396, "step": 316360 }, { "epoch": 1.2041823039973205, "grad_norm": 0.19324856996536255, "learning_rate": 1.536095906334578e-05, "loss": 1.918, "step": 316370 }, { "epoch": 1.2042203664654432, "grad_norm": 0.16694240272045135, "learning_rate": 1.531797365007781e-05, "loss": 1.9436, "step": 316380 }, { "epoch": 1.2042584289335658, "grad_norm": 0.17361164093017578, "learning_rate": 1.5274992048756263e-05, "loss": 1.9374, "step": 316390 }, { "epoch": 1.2042964914016885, "grad_norm": 0.17740680277347565, "learning_rate": 1.5232014258367122e-05, "loss": 1.9351, "step": 316400 }, { "epoch": 1.2043345538698111, "grad_norm": 0.16227102279663086, "learning_rate": 1.5189040277896915e-05, "loss": 1.9475, "step": 316410 }, { "epoch": 1.2043726163379338, "grad_norm": 0.18371431529521942, "learning_rate": 1.5146070106332622e-05, "loss": 1.9432, "step": 316420 }, { "epoch": 1.2044106788060565, "grad_norm": 0.21148449182510376, "learning_rate": 1.5103103742661606e-05, "loss": 1.9433, "step": 316430 }, { "epoch": 1.2044487412741791, "grad_norm": 0.1894027441740036, "learning_rate": 1.5060141185871735e-05, "loss": 1.9346, "step": 316440 }, { "epoch": 1.2044868037423018, "grad_norm": 0.17949220538139343, "learning_rate": 1.5017182434951259e-05, "loss": 1.9331, "step": 316450 }, { "epoch": 1.2045248662104244, "grad_norm": 0.18078577518463135, "learning_rate": 1.4974227488888991e-05, "loss": 1.9371, "step": 316460 }, { "epoch": 1.2045629286785473, "grad_norm": 0.18450722098350525, "learning_rate": 1.4931276346674071e-05, "loss": 1.9365, "step": 316470 }, { "epoch": 1.20460099114667, "grad_norm": 0.17150239646434784, "learning_rate": 1.4888329007296142e-05, "loss": 1.9435, "step": 316480 }, { "epoch": 1.2046390536147926, "grad_norm": 0.18355433642864227, "learning_rate": 1.4845385469745342e-05, "loss": 1.9367, "step": 316490 }, { "epoch": 1.2046771160829153, "grad_norm": 0.16779930889606476, "learning_rate": 1.4802445733012093e-05, "loss": 1.9297, "step": 316500 }, { "epoch": 1.204715178551038, "grad_norm": 0.1708436906337738, "learning_rate": 1.4759509796087478e-05, "loss": 1.937, "step": 316510 }, { "epoch": 1.2047532410191606, "grad_norm": 0.18960228562355042, "learning_rate": 1.471657765796286e-05, "loss": 1.9386, "step": 316520 }, { "epoch": 1.2047913034872832, "grad_norm": 0.17748971283435822, "learning_rate": 1.46736493176301e-05, "loss": 1.9326, "step": 316530 }, { "epoch": 1.2048293659554061, "grad_norm": 0.16371700167655945, "learning_rate": 1.4630724774081561e-05, "loss": 1.9442, "step": 316540 }, { "epoch": 1.2048674284235288, "grad_norm": 0.17727847397327423, "learning_rate": 1.4587804026310048e-05, "loss": 1.9279, "step": 316550 }, { "epoch": 1.2049054908916514, "grad_norm": 0.17282675206661224, "learning_rate": 1.4544887073308643e-05, "loss": 1.9324, "step": 316560 }, { "epoch": 1.204943553359774, "grad_norm": 0.18812990188598633, "learning_rate": 1.4501973914071098e-05, "loss": 1.9275, "step": 316570 }, { "epoch": 1.2049816158278968, "grad_norm": 0.16910743713378906, "learning_rate": 1.4459064547591439e-05, "loss": 1.9572, "step": 316580 }, { "epoch": 1.2050196782960194, "grad_norm": 0.18709997832775116, "learning_rate": 1.441615897286419e-05, "loss": 1.9381, "step": 316590 }, { "epoch": 1.205057740764142, "grad_norm": 0.1978847086429596, "learning_rate": 1.4373257188884492e-05, "loss": 1.954, "step": 316600 }, { "epoch": 1.2050958032322647, "grad_norm": 0.1758776754140854, "learning_rate": 1.4330359194647647e-05, "loss": 1.9405, "step": 316610 }, { "epoch": 1.2051338657003874, "grad_norm": 0.1603417992591858, "learning_rate": 1.4287464989149512e-05, "loss": 1.952, "step": 316620 }, { "epoch": 1.20517192816851, "grad_norm": 0.1598341017961502, "learning_rate": 1.4244574571386449e-05, "loss": 1.948, "step": 316630 }, { "epoch": 1.205209990636633, "grad_norm": 0.16874103248119354, "learning_rate": 1.4201687940355257e-05, "loss": 1.934, "step": 316640 }, { "epoch": 1.2052480531047556, "grad_norm": 0.21132296323776245, "learning_rate": 1.4158805095053019e-05, "loss": 1.937, "step": 316650 }, { "epoch": 1.2052861155728782, "grad_norm": 0.2193910777568817, "learning_rate": 1.4115926034477478e-05, "loss": 1.9226, "step": 316660 }, { "epoch": 1.205324178041001, "grad_norm": 0.16305066645145416, "learning_rate": 1.4073050757626715e-05, "loss": 1.9384, "step": 316670 }, { "epoch": 1.2053622405091236, "grad_norm": 0.20663656294345856, "learning_rate": 1.4030179263499254e-05, "loss": 1.9442, "step": 316680 }, { "epoch": 1.2054003029772462, "grad_norm": 0.18724772334098816, "learning_rate": 1.3987311551094061e-05, "loss": 1.9262, "step": 316690 }, { "epoch": 1.2054383654453689, "grad_norm": 0.1713886857032776, "learning_rate": 1.3944447619410495e-05, "loss": 1.957, "step": 316700 }, { "epoch": 1.2054764279134917, "grad_norm": 0.18026380240917206, "learning_rate": 1.3901587467448463e-05, "loss": 1.943, "step": 316710 }, { "epoch": 1.2055144903816144, "grad_norm": 0.21686388552188873, "learning_rate": 1.385873109420821e-05, "loss": 1.9225, "step": 316720 }, { "epoch": 1.205552552849737, "grad_norm": 0.17331081628799438, "learning_rate": 1.3815878498690593e-05, "loss": 1.9402, "step": 316730 }, { "epoch": 1.2055906153178597, "grad_norm": 0.162716805934906, "learning_rate": 1.377302967989663e-05, "loss": 1.9288, "step": 316740 }, { "epoch": 1.2056286777859824, "grad_norm": 0.1783205270767212, "learning_rate": 1.373018463682807e-05, "loss": 1.9269, "step": 316750 }, { "epoch": 1.205666740254105, "grad_norm": 0.22573402523994446, "learning_rate": 1.368734336848687e-05, "loss": 1.9314, "step": 316760 }, { "epoch": 1.2057048027222277, "grad_norm": 0.1547260284423828, "learning_rate": 1.3644505873875557e-05, "loss": 1.9366, "step": 316770 }, { "epoch": 1.2057428651903503, "grad_norm": 0.2504291534423828, "learning_rate": 1.3601672151997092e-05, "loss": 1.9327, "step": 316780 }, { "epoch": 1.205780927658473, "grad_norm": 0.17203214764595032, "learning_rate": 1.3558842201854771e-05, "loss": 1.924, "step": 316790 }, { "epoch": 1.2058189901265957, "grad_norm": 0.1697988212108612, "learning_rate": 1.3516016022452503e-05, "loss": 1.9553, "step": 316800 }, { "epoch": 1.2058570525947185, "grad_norm": 0.1626092493534088, "learning_rate": 1.3473193612794531e-05, "loss": 1.9418, "step": 316810 }, { "epoch": 1.2058951150628412, "grad_norm": 0.15964530408382416, "learning_rate": 1.3430374971885483e-05, "loss": 1.9268, "step": 316820 }, { "epoch": 1.2059331775309639, "grad_norm": 0.15963692963123322, "learning_rate": 1.3387560098730434e-05, "loss": 1.9271, "step": 316830 }, { "epoch": 1.2059712399990865, "grad_norm": 0.17314162850379944, "learning_rate": 1.3344748992335065e-05, "loss": 1.9507, "step": 316840 }, { "epoch": 1.2060093024672092, "grad_norm": 0.17278534173965454, "learning_rate": 1.3301941651705396e-05, "loss": 1.9598, "step": 316850 }, { "epoch": 1.2060473649353318, "grad_norm": 0.15973657369613647, "learning_rate": 1.3259138075847722e-05, "loss": 1.9439, "step": 316860 }, { "epoch": 1.2060854274034545, "grad_norm": 0.1658315509557724, "learning_rate": 1.3216338263769056e-05, "loss": 1.9355, "step": 316870 }, { "epoch": 1.2061234898715771, "grad_norm": 0.1809733659029007, "learning_rate": 1.3173542214476586e-05, "loss": 1.9396, "step": 316880 }, { "epoch": 1.2061615523397, "grad_norm": 0.16338767111301422, "learning_rate": 1.3130749926978158e-05, "loss": 1.946, "step": 316890 }, { "epoch": 1.2061996148078227, "grad_norm": 0.17688487470149994, "learning_rate": 1.3087961400281956e-05, "loss": 1.9438, "step": 316900 }, { "epoch": 1.2062376772759453, "grad_norm": 0.19036678969860077, "learning_rate": 1.3045176633396494e-05, "loss": 1.9236, "step": 316910 }, { "epoch": 1.206275739744068, "grad_norm": 0.17270910739898682, "learning_rate": 1.3002395625330954e-05, "loss": 1.9296, "step": 316920 }, { "epoch": 1.2063138022121906, "grad_norm": 0.16987460851669312, "learning_rate": 1.2959618375094795e-05, "loss": 1.943, "step": 316930 }, { "epoch": 1.2063518646803133, "grad_norm": 0.2615673840045929, "learning_rate": 1.2916844881697864e-05, "loss": 1.9325, "step": 316940 }, { "epoch": 1.206389927148436, "grad_norm": 0.2283545732498169, "learning_rate": 1.2874075144150565e-05, "loss": 1.9469, "step": 316950 }, { "epoch": 1.2064279896165586, "grad_norm": 0.18416893482208252, "learning_rate": 1.2831309161463745e-05, "loss": 1.9421, "step": 316960 }, { "epoch": 1.2064660520846813, "grad_norm": 0.20439530909061432, "learning_rate": 1.2788546932648526e-05, "loss": 1.9314, "step": 316970 }, { "epoch": 1.2065041145528042, "grad_norm": 0.1733761429786682, "learning_rate": 1.27457884567167e-05, "loss": 1.9307, "step": 316980 }, { "epoch": 1.2065421770209268, "grad_norm": 0.16197511553764343, "learning_rate": 1.2703033732680281e-05, "loss": 1.9357, "step": 316990 }, { "epoch": 1.2065802394890495, "grad_norm": 0.1635819524526596, "learning_rate": 1.2660282759551834e-05, "loss": 1.9314, "step": 317000 }, { "epoch": 1.2066183019571721, "grad_norm": 0.15866439044475555, "learning_rate": 1.2617535536344205e-05, "loss": 1.9365, "step": 317010 }, { "epoch": 1.2066563644252948, "grad_norm": 0.16617876291275024, "learning_rate": 1.257479206207096e-05, "loss": 1.9362, "step": 317020 }, { "epoch": 1.2066944268934174, "grad_norm": 0.168635293841362, "learning_rate": 1.2532052335745892e-05, "loss": 1.9368, "step": 317030 }, { "epoch": 1.20673248936154, "grad_norm": 0.1748170703649521, "learning_rate": 1.2489316356383173e-05, "loss": 1.9439, "step": 317040 }, { "epoch": 1.2067705518296628, "grad_norm": 0.1713894158601761, "learning_rate": 1.2446584122997539e-05, "loss": 1.9278, "step": 317050 }, { "epoch": 1.2068086142977856, "grad_norm": 0.17230573296546936, "learning_rate": 1.2403855634604111e-05, "loss": 1.9289, "step": 317060 }, { "epoch": 1.2068466767659083, "grad_norm": 0.16374553740024567, "learning_rate": 1.2361130890218452e-05, "loss": 1.9377, "step": 317070 }, { "epoch": 1.206884739234031, "grad_norm": 0.1656305491924286, "learning_rate": 1.2318409888856519e-05, "loss": 1.941, "step": 317080 }, { "epoch": 1.2069228017021536, "grad_norm": 0.16557244956493378, "learning_rate": 1.2275692629534819e-05, "loss": 1.933, "step": 317090 }, { "epoch": 1.2069608641702763, "grad_norm": 0.2730533480644226, "learning_rate": 1.2232979111270082e-05, "loss": 1.947, "step": 317100 }, { "epoch": 1.206998926638399, "grad_norm": 0.20291326940059662, "learning_rate": 1.2190269333079707e-05, "loss": 1.9453, "step": 317110 }, { "epoch": 1.2070369891065216, "grad_norm": 0.16012758016586304, "learning_rate": 1.2147563293981311e-05, "loss": 1.9241, "step": 317120 }, { "epoch": 1.2070750515746442, "grad_norm": 0.22131392359733582, "learning_rate": 1.2104860992993016e-05, "loss": 1.9247, "step": 317130 }, { "epoch": 1.2071131140427669, "grad_norm": 0.1807316094636917, "learning_rate": 1.206216242913344e-05, "loss": 1.929, "step": 317140 }, { "epoch": 1.2071511765108898, "grad_norm": 0.16896076500415802, "learning_rate": 1.2019467601421642e-05, "loss": 1.9314, "step": 317150 }, { "epoch": 1.2071892389790124, "grad_norm": 0.18690729141235352, "learning_rate": 1.1976776508876908e-05, "loss": 1.9534, "step": 317160 }, { "epoch": 1.207227301447135, "grad_norm": 0.16740208864212036, "learning_rate": 1.193408915051919e-05, "loss": 1.9324, "step": 317170 }, { "epoch": 1.2072653639152577, "grad_norm": 0.17816956341266632, "learning_rate": 1.189140552536877e-05, "loss": 1.9526, "step": 317180 }, { "epoch": 1.2073034263833804, "grad_norm": 0.16290433704853058, "learning_rate": 1.1848725632446322e-05, "loss": 1.9398, "step": 317190 }, { "epoch": 1.207341488851503, "grad_norm": 0.17890475690364838, "learning_rate": 1.1806049470773017e-05, "loss": 1.9397, "step": 317200 }, { "epoch": 1.2073795513196257, "grad_norm": 0.20076799392700195, "learning_rate": 1.176337703937036e-05, "loss": 1.9346, "step": 317210 }, { "epoch": 1.2074176137877484, "grad_norm": 0.1811753809452057, "learning_rate": 1.1720708337260466e-05, "loss": 1.9452, "step": 317220 }, { "epoch": 1.2074556762558712, "grad_norm": 0.1892351508140564, "learning_rate": 1.1678043363465673e-05, "loss": 1.9416, "step": 317230 }, { "epoch": 1.207493738723994, "grad_norm": 0.17329028248786926, "learning_rate": 1.1635382117008819e-05, "loss": 1.9457, "step": 317240 }, { "epoch": 1.2075318011921166, "grad_norm": 0.17507338523864746, "learning_rate": 1.1592724596913185e-05, "loss": 1.9367, "step": 317250 }, { "epoch": 1.2075698636602392, "grad_norm": 0.15674608945846558, "learning_rate": 1.155007080220255e-05, "loss": 1.9333, "step": 317260 }, { "epoch": 1.2076079261283619, "grad_norm": 0.18256492912769318, "learning_rate": 1.1507420731900975e-05, "loss": 1.9415, "step": 317270 }, { "epoch": 1.2076459885964845, "grad_norm": 0.1951712816953659, "learning_rate": 1.1464774385033017e-05, "loss": 1.9341, "step": 317280 }, { "epoch": 1.2076840510646072, "grad_norm": 0.18389761447906494, "learning_rate": 1.1422131760623733e-05, "loss": 1.9462, "step": 317290 }, { "epoch": 1.2077221135327298, "grad_norm": 0.18417753279209137, "learning_rate": 1.1379492857698403e-05, "loss": 1.9332, "step": 317300 }, { "epoch": 1.2077601760008525, "grad_norm": 0.1719813197851181, "learning_rate": 1.133685767528292e-05, "loss": 1.9373, "step": 317310 }, { "epoch": 1.2077982384689752, "grad_norm": 0.16601203382015228, "learning_rate": 1.1294226212403614e-05, "loss": 1.9414, "step": 317320 }, { "epoch": 1.207836300937098, "grad_norm": 0.2003100961446762, "learning_rate": 1.1251598468087043e-05, "loss": 1.933, "step": 317330 }, { "epoch": 1.2078743634052207, "grad_norm": 0.21693551540374756, "learning_rate": 1.1208974441360432e-05, "loss": 1.9375, "step": 317340 }, { "epoch": 1.2079124258733434, "grad_norm": 0.1749928891658783, "learning_rate": 1.1166354131251167e-05, "loss": 1.9329, "step": 317350 }, { "epoch": 1.207950488341466, "grad_norm": 0.20150145888328552, "learning_rate": 1.112373753678736e-05, "loss": 1.9366, "step": 317360 }, { "epoch": 1.2079885508095887, "grad_norm": 0.16071544587612152, "learning_rate": 1.1081124656997232e-05, "loss": 1.9243, "step": 317370 }, { "epoch": 1.2080266132777113, "grad_norm": 0.18917936086654663, "learning_rate": 1.1038515490909729e-05, "loss": 1.9473, "step": 317380 }, { "epoch": 1.208064675745834, "grad_norm": 0.16255995631217957, "learning_rate": 1.0995910037553958e-05, "loss": 1.9363, "step": 317390 }, { "epoch": 1.2081027382139569, "grad_norm": 0.16722454130649567, "learning_rate": 1.0953308295959696e-05, "loss": 1.9399, "step": 317400 }, { "epoch": 1.2081408006820795, "grad_norm": 0.16331323981285095, "learning_rate": 1.0910710265156887e-05, "loss": 1.9211, "step": 317410 }, { "epoch": 1.2081788631502022, "grad_norm": 0.160036101937294, "learning_rate": 1.0868115944176082e-05, "loss": 1.9367, "step": 317420 }, { "epoch": 1.2082169256183248, "grad_norm": 0.18118809163570404, "learning_rate": 1.0825525332048115e-05, "loss": 1.9514, "step": 317430 }, { "epoch": 1.2082549880864475, "grad_norm": 0.21691325306892395, "learning_rate": 1.0782938427804478e-05, "loss": 1.9334, "step": 317440 }, { "epoch": 1.2082930505545701, "grad_norm": 0.17789939045906067, "learning_rate": 1.074035523047684e-05, "loss": 1.9348, "step": 317450 }, { "epoch": 1.2083311130226928, "grad_norm": 0.20680665969848633, "learning_rate": 1.069777573909736e-05, "loss": 1.9311, "step": 317460 }, { "epoch": 1.2083691754908155, "grad_norm": 0.17139819264411926, "learning_rate": 1.0655199952698647e-05, "loss": 1.9313, "step": 317470 }, { "epoch": 1.2084072379589381, "grad_norm": 0.17623241245746613, "learning_rate": 1.061262787031375e-05, "loss": 1.9437, "step": 317480 }, { "epoch": 1.2084453004270608, "grad_norm": 0.18453365564346313, "learning_rate": 1.0570059490976114e-05, "loss": 1.9378, "step": 317490 }, { "epoch": 1.2084833628951837, "grad_norm": 0.16772012412548065, "learning_rate": 1.0527494813719562e-05, "loss": 1.9325, "step": 317500 }, { "epoch": 1.2085214253633063, "grad_norm": 0.19086311757564545, "learning_rate": 1.048493383757837e-05, "loss": 1.937, "step": 317510 }, { "epoch": 1.208559487831429, "grad_norm": 0.16531312465667725, "learning_rate": 1.0442376561587307e-05, "loss": 1.9429, "step": 317520 }, { "epoch": 1.2085975502995516, "grad_norm": 0.16466625034809113, "learning_rate": 1.0399822984781482e-05, "loss": 1.9408, "step": 317530 }, { "epoch": 1.2086356127676743, "grad_norm": 0.16177676618099213, "learning_rate": 1.035727310619633e-05, "loss": 1.9397, "step": 317540 }, { "epoch": 1.208673675235797, "grad_norm": 0.21583956480026245, "learning_rate": 1.0314726924867956e-05, "loss": 1.9376, "step": 317550 }, { "epoch": 1.2087117377039196, "grad_norm": 0.16004690527915955, "learning_rate": 1.0272184439832577e-05, "loss": 1.9389, "step": 317560 }, { "epoch": 1.2087498001720425, "grad_norm": 0.17332518100738525, "learning_rate": 1.0229645650127183e-05, "loss": 1.918, "step": 317570 }, { "epoch": 1.2087878626401651, "grad_norm": 0.16747303307056427, "learning_rate": 1.0187110554788825e-05, "loss": 1.9181, "step": 317580 }, { "epoch": 1.2088259251082878, "grad_norm": 0.16373571753501892, "learning_rate": 1.0144579152855215e-05, "loss": 1.934, "step": 317590 }, { "epoch": 1.2088639875764104, "grad_norm": 0.16885121166706085, "learning_rate": 1.0102051443364402e-05, "loss": 1.9454, "step": 317600 }, { "epoch": 1.208902050044533, "grad_norm": 0.1702057272195816, "learning_rate": 1.005952742535482e-05, "loss": 1.9385, "step": 317610 }, { "epoch": 1.2089401125126558, "grad_norm": 0.17671136558055878, "learning_rate": 1.001700709786535e-05, "loss": 1.9336, "step": 317620 }, { "epoch": 1.2089781749807784, "grad_norm": 0.1710839569568634, "learning_rate": 9.974490459935314e-06, "loss": 1.9421, "step": 317630 }, { "epoch": 1.209016237448901, "grad_norm": 0.167668417096138, "learning_rate": 9.931977510604484e-06, "loss": 1.9408, "step": 317640 }, { "epoch": 1.2090542999170237, "grad_norm": 0.17699985206127167, "learning_rate": 9.889468248912903e-06, "loss": 1.927, "step": 317650 }, { "epoch": 1.2090923623851464, "grad_norm": 0.17739124596118927, "learning_rate": 9.846962673901227e-06, "loss": 1.9175, "step": 317660 }, { "epoch": 1.2091304248532693, "grad_norm": 0.19273975491523743, "learning_rate": 9.804460784610281e-06, "loss": 1.9332, "step": 317670 }, { "epoch": 1.209168487321392, "grad_norm": 0.16854619979858398, "learning_rate": 9.76196258008155e-06, "loss": 1.9269, "step": 317680 }, { "epoch": 1.2092065497895146, "grad_norm": 0.1591169238090515, "learning_rate": 9.719468059356862e-06, "loss": 1.9375, "step": 317690 }, { "epoch": 1.2092446122576372, "grad_norm": 0.170506551861763, "learning_rate": 9.676977221478366e-06, "loss": 1.9252, "step": 317700 }, { "epoch": 1.20928267472576, "grad_norm": 0.19091635942459106, "learning_rate": 9.634490065488666e-06, "loss": 1.9325, "step": 317710 }, { "epoch": 1.2093207371938826, "grad_norm": 0.16340164840221405, "learning_rate": 9.592006590430857e-06, "loss": 1.9413, "step": 317720 }, { "epoch": 1.2093587996620052, "grad_norm": 0.16731935739517212, "learning_rate": 9.549526795348428e-06, "loss": 1.9398, "step": 317730 }, { "epoch": 1.2093968621301279, "grad_norm": 0.18606775999069214, "learning_rate": 9.507050679285146e-06, "loss": 1.925, "step": 317740 }, { "epoch": 1.2094349245982507, "grad_norm": 0.17653732001781464, "learning_rate": 9.464578241285382e-06, "loss": 1.9292, "step": 317750 }, { "epoch": 1.2094729870663734, "grad_norm": 0.16764487326145172, "learning_rate": 9.422109480393847e-06, "loss": 1.9397, "step": 317760 }, { "epoch": 1.209511049534496, "grad_norm": 0.1619822233915329, "learning_rate": 9.37964439565564e-06, "loss": 1.929, "step": 317770 }, { "epoch": 1.2095491120026187, "grad_norm": 0.1595163494348526, "learning_rate": 9.337182986116243e-06, "loss": 1.9218, "step": 317780 }, { "epoch": 1.2095871744707414, "grad_norm": 0.25330692529678345, "learning_rate": 9.29472525082159e-06, "loss": 1.9222, "step": 317790 }, { "epoch": 1.209625236938864, "grad_norm": 0.1648254096508026, "learning_rate": 9.252271188818106e-06, "loss": 1.9195, "step": 317800 }, { "epoch": 1.2096632994069867, "grad_norm": 0.19968004524707794, "learning_rate": 9.209820799152502e-06, "loss": 1.9254, "step": 317810 }, { "epoch": 1.2097013618751093, "grad_norm": 0.17890578508377075, "learning_rate": 9.167374080871982e-06, "loss": 1.9243, "step": 317820 }, { "epoch": 1.209739424343232, "grad_norm": 0.18902572989463806, "learning_rate": 9.124931033024142e-06, "loss": 1.9313, "step": 317830 }, { "epoch": 1.2097774868113549, "grad_norm": 0.21188783645629883, "learning_rate": 9.08249165465691e-06, "loss": 1.9444, "step": 317840 }, { "epoch": 1.2098155492794775, "grad_norm": 0.2646874189376831, "learning_rate": 9.040055944818825e-06, "loss": 1.9169, "step": 317850 }, { "epoch": 1.2098536117476002, "grad_norm": 0.19424621760845184, "learning_rate": 8.997623902558538e-06, "loss": 1.9423, "step": 317860 }, { "epoch": 1.2098916742157229, "grad_norm": 0.17482692003250122, "learning_rate": 8.955195526925474e-06, "loss": 1.9476, "step": 317870 }, { "epoch": 1.2099297366838455, "grad_norm": 0.17090508341789246, "learning_rate": 8.912770816969173e-06, "loss": 1.9295, "step": 317880 }, { "epoch": 1.2099677991519682, "grad_norm": 0.1726713925600052, "learning_rate": 8.870349771739671e-06, "loss": 1.9363, "step": 317890 }, { "epoch": 1.2100058616200908, "grad_norm": 0.23577260971069336, "learning_rate": 8.827932390287508e-06, "loss": 1.938, "step": 317900 }, { "epoch": 1.2100439240882135, "grad_norm": 0.1568213552236557, "learning_rate": 8.785518671663495e-06, "loss": 1.9339, "step": 317910 }, { "epoch": 1.2100819865563364, "grad_norm": 0.17128297686576843, "learning_rate": 8.743108614918948e-06, "loss": 1.9275, "step": 317920 }, { "epoch": 1.210120049024459, "grad_norm": 0.1653999388217926, "learning_rate": 8.700702219105516e-06, "loss": 1.9453, "step": 317930 }, { "epoch": 1.2101581114925817, "grad_norm": 0.23350019752979279, "learning_rate": 8.6582994832754e-06, "loss": 1.9327, "step": 317940 }, { "epoch": 1.2101961739607043, "grad_norm": 0.19468331336975098, "learning_rate": 8.61590040648108e-06, "loss": 1.9386, "step": 317950 }, { "epoch": 1.210234236428827, "grad_norm": 0.16924616694450378, "learning_rate": 8.573504987775426e-06, "loss": 1.9449, "step": 317960 }, { "epoch": 1.2102722988969496, "grad_norm": 0.18172939121723175, "learning_rate": 8.531113226211806e-06, "loss": 1.9302, "step": 317970 }, { "epoch": 1.2103103613650723, "grad_norm": 0.18186110258102417, "learning_rate": 8.488725120843921e-06, "loss": 1.9338, "step": 317980 }, { "epoch": 1.210348423833195, "grad_norm": 0.21057625114917755, "learning_rate": 8.446340670725972e-06, "loss": 1.9433, "step": 317990 }, { "epoch": 1.2103864863013176, "grad_norm": 0.19673345983028412, "learning_rate": 8.403959874912492e-06, "loss": 1.9254, "step": 318000 }, { "epoch": 1.2104245487694405, "grad_norm": 0.16198976337909698, "learning_rate": 8.361582732458406e-06, "loss": 1.9523, "step": 318010 }, { "epoch": 1.2104626112375632, "grad_norm": 0.17466185986995697, "learning_rate": 8.319209242419135e-06, "loss": 1.9346, "step": 318020 }, { "epoch": 1.2105006737056858, "grad_norm": 0.16120444238185883, "learning_rate": 8.27683940385049e-06, "loss": 1.9469, "step": 318030 }, { "epoch": 1.2105387361738085, "grad_norm": 0.15718050301074982, "learning_rate": 8.234473215808503e-06, "loss": 1.9322, "step": 318040 }, { "epoch": 1.2105767986419311, "grad_norm": 0.18450264632701874, "learning_rate": 8.192110677349928e-06, "loss": 1.9446, "step": 318050 }, { "epoch": 1.2106148611100538, "grad_norm": 0.18494777381420135, "learning_rate": 8.14975178753169e-06, "loss": 1.9359, "step": 318060 }, { "epoch": 1.2106529235781764, "grad_norm": 0.15343858301639557, "learning_rate": 8.107396545411205e-06, "loss": 1.932, "step": 318070 }, { "epoch": 1.210690986046299, "grad_norm": 0.1697983294725418, "learning_rate": 8.065044950046286e-06, "loss": 1.9562, "step": 318080 }, { "epoch": 1.210729048514422, "grad_norm": 0.18019534647464752, "learning_rate": 8.02269700049507e-06, "loss": 1.9273, "step": 318090 }, { "epoch": 1.2107671109825446, "grad_norm": 0.16635341942310333, "learning_rate": 7.980352695816263e-06, "loss": 1.9335, "step": 318100 }, { "epoch": 1.2108051734506673, "grad_norm": 0.17308969795703888, "learning_rate": 7.938012035068887e-06, "loss": 1.9377, "step": 318110 }, { "epoch": 1.21084323591879, "grad_norm": 0.2071884721517563, "learning_rate": 7.895675017312365e-06, "loss": 1.9307, "step": 318120 }, { "epoch": 1.2108812983869126, "grad_norm": 0.18859954178333282, "learning_rate": 7.853341641606448e-06, "loss": 1.9303, "step": 318130 }, { "epoch": 1.2109193608550353, "grad_norm": 0.18228723108768463, "learning_rate": 7.8110119070115e-06, "loss": 1.9243, "step": 318140 }, { "epoch": 1.210957423323158, "grad_norm": 0.17649097740650177, "learning_rate": 7.76868581258805e-06, "loss": 1.9338, "step": 318150 }, { "epoch": 1.2109954857912806, "grad_norm": 0.1678033173084259, "learning_rate": 7.726363357397182e-06, "loss": 1.9327, "step": 318160 }, { "epoch": 1.2110335482594032, "grad_norm": 0.17184267938137054, "learning_rate": 7.684044540500368e-06, "loss": 1.9444, "step": 318170 }, { "epoch": 1.211071610727526, "grad_norm": 0.20127668976783752, "learning_rate": 7.641729360959471e-06, "loss": 1.9327, "step": 318180 }, { "epoch": 1.2111096731956488, "grad_norm": 0.17560744285583496, "learning_rate": 7.59941781783674e-06, "loss": 1.937, "step": 318190 }, { "epoch": 1.2111477356637714, "grad_norm": 0.1622043251991272, "learning_rate": 7.55710991019476e-06, "loss": 1.9276, "step": 318200 }, { "epoch": 1.211185798131894, "grad_norm": 0.19448207318782806, "learning_rate": 7.514805637096666e-06, "loss": 1.9358, "step": 318210 }, { "epoch": 1.2112238606000167, "grad_norm": 0.1590512990951538, "learning_rate": 7.472504997605878e-06, "loss": 1.9456, "step": 318220 }, { "epoch": 1.2112619230681394, "grad_norm": 0.16298441588878632, "learning_rate": 7.430207990786364e-06, "loss": 1.9235, "step": 318230 }, { "epoch": 1.211299985536262, "grad_norm": 0.16473017632961273, "learning_rate": 7.3879146157022625e-06, "loss": 1.9291, "step": 318240 }, { "epoch": 1.2113380480043847, "grad_norm": 0.15773583948612213, "learning_rate": 7.345624871418266e-06, "loss": 1.9402, "step": 318250 }, { "epoch": 1.2113761104725076, "grad_norm": 0.1632518619298935, "learning_rate": 7.303338756999511e-06, "loss": 1.9394, "step": 318260 }, { "epoch": 1.2114141729406303, "grad_norm": 0.17342713475227356, "learning_rate": 7.261056271511412e-06, "loss": 1.9373, "step": 318270 }, { "epoch": 1.211452235408753, "grad_norm": 0.18714912235736847, "learning_rate": 7.218777414019828e-06, "loss": 1.945, "step": 318280 }, { "epoch": 1.2114902978768756, "grad_norm": 0.17201866209506989, "learning_rate": 7.17650218359106e-06, "loss": 1.9261, "step": 318290 }, { "epoch": 1.2115283603449982, "grad_norm": 0.20901884138584137, "learning_rate": 7.134230579291856e-06, "loss": 1.9428, "step": 318300 }, { "epoch": 1.2115664228131209, "grad_norm": 0.20689956843852997, "learning_rate": 7.091962600189128e-06, "loss": 1.946, "step": 318310 }, { "epoch": 1.2116044852812435, "grad_norm": 0.22000889480113983, "learning_rate": 7.049698245350511e-06, "loss": 1.932, "step": 318320 }, { "epoch": 1.2116425477493662, "grad_norm": 0.1608712524175644, "learning_rate": 7.007437513843751e-06, "loss": 1.9325, "step": 318330 }, { "epoch": 1.2116806102174889, "grad_norm": 0.16387580335140228, "learning_rate": 6.965180404737204e-06, "loss": 1.9485, "step": 318340 }, { "epoch": 1.2117186726856115, "grad_norm": 0.17372748255729675, "learning_rate": 6.922926917099504e-06, "loss": 1.9167, "step": 318350 }, { "epoch": 1.2117567351537344, "grad_norm": 0.1678519994020462, "learning_rate": 6.8806770499997285e-06, "loss": 1.9423, "step": 318360 }, { "epoch": 1.211794797621857, "grad_norm": 0.16828025877475739, "learning_rate": 6.838430802507345e-06, "loss": 1.9371, "step": 318370 }, { "epoch": 1.2118328600899797, "grad_norm": 0.159216970205307, "learning_rate": 6.7961881736922635e-06, "loss": 1.9414, "step": 318380 }, { "epoch": 1.2118709225581024, "grad_norm": 0.17254793643951416, "learning_rate": 6.753949162624673e-06, "loss": 1.9395, "step": 318390 }, { "epoch": 1.211908985026225, "grad_norm": 0.18198195099830627, "learning_rate": 6.711713768375261e-06, "loss": 1.9348, "step": 318400 }, { "epoch": 1.2119470474943477, "grad_norm": 0.17052502930164337, "learning_rate": 6.669481990015103e-06, "loss": 1.9217, "step": 318410 }, { "epoch": 1.2119851099624703, "grad_norm": 0.18030428886413574, "learning_rate": 6.62725382661572e-06, "loss": 1.9321, "step": 318420 }, { "epoch": 1.2120231724305932, "grad_norm": 0.16270743310451508, "learning_rate": 6.585029277248911e-06, "loss": 1.9319, "step": 318430 }, { "epoch": 1.2120612348987159, "grad_norm": 0.15881286561489105, "learning_rate": 6.542808340986917e-06, "loss": 1.9458, "step": 318440 }, { "epoch": 1.2120992973668385, "grad_norm": 0.22062478959560394, "learning_rate": 6.5005910169023684e-06, "loss": 1.9293, "step": 318450 }, { "epoch": 1.2121373598349612, "grad_norm": 0.15809518098831177, "learning_rate": 6.458377304068342e-06, "loss": 1.9403, "step": 318460 }, { "epoch": 1.2121754223030838, "grad_norm": 0.15915168821811676, "learning_rate": 6.4161672015583535e-06, "loss": 1.9463, "step": 318470 }, { "epoch": 1.2122134847712065, "grad_norm": 0.1819572001695633, "learning_rate": 6.373960708446147e-06, "loss": 1.9396, "step": 318480 }, { "epoch": 1.2122515472393292, "grad_norm": 0.16173192858695984, "learning_rate": 6.331757823806017e-06, "loss": 1.9395, "step": 318490 }, { "epoch": 1.2122896097074518, "grad_norm": 0.18848130106925964, "learning_rate": 6.289558546712537e-06, "loss": 1.9447, "step": 318500 }, { "epoch": 1.2123276721755745, "grad_norm": 0.16205497086048126, "learning_rate": 6.247362876240781e-06, "loss": 1.9263, "step": 318510 }, { "epoch": 1.2123657346436971, "grad_norm": 0.1631089150905609, "learning_rate": 6.205170811466099e-06, "loss": 1.9276, "step": 318520 }, { "epoch": 1.21240379711182, "grad_norm": 0.1711810976266861, "learning_rate": 6.162982351464452e-06, "loss": 1.9488, "step": 318530 }, { "epoch": 1.2124418595799427, "grad_norm": 0.15897248685359955, "learning_rate": 6.12079749531197e-06, "loss": 1.9305, "step": 318540 }, { "epoch": 1.2124799220480653, "grad_norm": 0.1594250500202179, "learning_rate": 6.078616242085222e-06, "loss": 1.9329, "step": 318550 }, { "epoch": 1.212517984516188, "grad_norm": 0.1622227430343628, "learning_rate": 6.036438590861227e-06, "loss": 1.933, "step": 318560 }, { "epoch": 1.2125560469843106, "grad_norm": 0.17288058996200562, "learning_rate": 5.994264540717442e-06, "loss": 1.9237, "step": 318570 }, { "epoch": 1.2125941094524333, "grad_norm": 0.16310521960258484, "learning_rate": 5.952094090731552e-06, "loss": 1.9314, "step": 318580 }, { "epoch": 1.212632171920556, "grad_norm": 0.1626407355070114, "learning_rate": 5.9099272399818494e-06, "loss": 1.9348, "step": 318590 }, { "epoch": 1.2126702343886786, "grad_norm": 0.1794476956129074, "learning_rate": 5.867763987546904e-06, "loss": 1.9306, "step": 318600 }, { "epoch": 1.2127082968568015, "grad_norm": 0.17908766865730286, "learning_rate": 5.825604332505563e-06, "loss": 1.9346, "step": 318610 }, { "epoch": 1.2127463593249241, "grad_norm": 0.16136077046394348, "learning_rate": 5.7834482739372865e-06, "loss": 1.9268, "step": 318620 }, { "epoch": 1.2127844217930468, "grad_norm": 0.1657281070947647, "learning_rate": 5.741295810921865e-06, "loss": 1.9527, "step": 318630 }, { "epoch": 1.2128224842611695, "grad_norm": 0.17226015031337738, "learning_rate": 5.699146942539313e-06, "loss": 1.9229, "step": 318640 }, { "epoch": 1.212860546729292, "grad_norm": 0.16876555979251862, "learning_rate": 5.657001667870254e-06, "loss": 1.9412, "step": 318650 }, { "epoch": 1.2128986091974148, "grad_norm": 0.1647004932165146, "learning_rate": 5.614859985995646e-06, "loss": 1.9339, "step": 318660 }, { "epoch": 1.2129366716655374, "grad_norm": 0.15683583915233612, "learning_rate": 5.572721895996779e-06, "loss": 1.9253, "step": 318670 }, { "epoch": 1.21297473413366, "grad_norm": 0.16682861745357513, "learning_rate": 5.5305873969553884e-06, "loss": 1.9218, "step": 318680 }, { "epoch": 1.2130127966017827, "grad_norm": 0.16168420016765594, "learning_rate": 5.488456487953486e-06, "loss": 1.9149, "step": 318690 }, { "epoch": 1.2130508590699056, "grad_norm": 0.1688627451658249, "learning_rate": 5.446329168073638e-06, "loss": 1.931, "step": 318700 }, { "epoch": 1.2130889215380283, "grad_norm": 0.19926753640174866, "learning_rate": 5.404205436398801e-06, "loss": 1.9402, "step": 318710 }, { "epoch": 1.213126984006151, "grad_norm": 0.17776836454868317, "learning_rate": 5.3620852920121535e-06, "loss": 1.9384, "step": 318720 }, { "epoch": 1.2131650464742736, "grad_norm": 0.1572011560201645, "learning_rate": 5.319968733997371e-06, "loss": 1.9441, "step": 318730 }, { "epoch": 1.2132031089423962, "grad_norm": 0.15843211114406586, "learning_rate": 5.2778557614385765e-06, "loss": 1.9369, "step": 318740 }, { "epoch": 1.213241171410519, "grad_norm": 0.15909917652606964, "learning_rate": 5.235746373420114e-06, "loss": 1.935, "step": 318750 }, { "epoch": 1.2132792338786416, "grad_norm": 0.16105514764785767, "learning_rate": 5.193640569026936e-06, "loss": 1.9425, "step": 318760 }, { "epoch": 1.2133172963467642, "grad_norm": 0.1726350486278534, "learning_rate": 5.151538347344165e-06, "loss": 1.9362, "step": 318770 }, { "epoch": 1.213355358814887, "grad_norm": 0.25620037317276, "learning_rate": 5.109439707457475e-06, "loss": 1.9541, "step": 318780 }, { "epoch": 1.2133934212830098, "grad_norm": 0.1771782636642456, "learning_rate": 5.067344648452876e-06, "loss": 1.9432, "step": 318790 }, { "epoch": 1.2134314837511324, "grad_norm": 0.15600189566612244, "learning_rate": 5.02525316941671e-06, "loss": 1.9246, "step": 318800 }, { "epoch": 1.213469546219255, "grad_norm": 0.1723054200410843, "learning_rate": 4.983165269435874e-06, "loss": 1.9399, "step": 318810 }, { "epoch": 1.2135076086873777, "grad_norm": 0.19066469371318817, "learning_rate": 4.941080947597376e-06, "loss": 1.9316, "step": 318820 }, { "epoch": 1.2135456711555004, "grad_norm": 0.15641188621520996, "learning_rate": 4.89900020298889e-06, "loss": 1.9428, "step": 318830 }, { "epoch": 1.213583733623623, "grad_norm": 0.16088195145130157, "learning_rate": 4.856923034698313e-06, "loss": 1.9475, "step": 318840 }, { "epoch": 1.2136217960917457, "grad_norm": 0.2013937532901764, "learning_rate": 4.814849441813984e-06, "loss": 1.9396, "step": 318850 }, { "epoch": 1.2136598585598684, "grad_norm": 0.171700119972229, "learning_rate": 4.7727794234246895e-06, "loss": 1.9231, "step": 318860 }, { "epoch": 1.2136979210279912, "grad_norm": 0.17729665338993073, "learning_rate": 4.730712978619433e-06, "loss": 1.9184, "step": 318870 }, { "epoch": 1.213735983496114, "grad_norm": 0.1747244894504547, "learning_rate": 4.688650106487724e-06, "loss": 1.947, "step": 318880 }, { "epoch": 1.2137740459642365, "grad_norm": 0.16286538541316986, "learning_rate": 4.646590806119511e-06, "loss": 1.9236, "step": 318890 }, { "epoch": 1.2138121084323592, "grad_norm": 0.1795330047607422, "learning_rate": 4.604535076605021e-06, "loss": 1.9268, "step": 318900 }, { "epoch": 1.2138501709004819, "grad_norm": 0.1655510812997818, "learning_rate": 4.562482917034927e-06, "loss": 1.9311, "step": 318910 }, { "epoch": 1.2138882333686045, "grad_norm": 0.1607380360364914, "learning_rate": 4.520434326500178e-06, "loss": 1.9266, "step": 318920 }, { "epoch": 1.2139262958367272, "grad_norm": 0.18507783114910126, "learning_rate": 4.478389304092334e-06, "loss": 1.9361, "step": 318930 }, { "epoch": 1.2139643583048498, "grad_norm": 0.19285060465335846, "learning_rate": 4.436347848903122e-06, "loss": 1.9398, "step": 318940 }, { "epoch": 1.2140024207729727, "grad_norm": 0.17271341383457184, "learning_rate": 4.394309960024767e-06, "loss": 1.9372, "step": 318950 }, { "epoch": 1.2140404832410954, "grad_norm": 0.18780827522277832, "learning_rate": 4.352275636549829e-06, "loss": 1.9356, "step": 318960 }, { "epoch": 1.214078545709218, "grad_norm": 0.16267111897468567, "learning_rate": 4.310244877571312e-06, "loss": 1.9213, "step": 318970 }, { "epoch": 1.2141166081773407, "grad_norm": 0.16083675622940063, "learning_rate": 4.268217682182496e-06, "loss": 1.9338, "step": 318980 }, { "epoch": 1.2141546706454633, "grad_norm": 0.1647607833147049, "learning_rate": 4.226194049477161e-06, "loss": 1.9419, "step": 318990 }, { "epoch": 1.214192733113586, "grad_norm": 0.15294934809207916, "learning_rate": 4.184173978549477e-06, "loss": 1.9551, "step": 319000 }, { "epoch": 1.2142307955817087, "grad_norm": 0.1650754064321518, "learning_rate": 4.142157468493779e-06, "loss": 1.9333, "step": 319010 }, { "epoch": 1.2142688580498313, "grad_norm": 0.156051367521286, "learning_rate": 4.100144518405125e-06, "loss": 1.9246, "step": 319020 }, { "epoch": 1.214306920517954, "grad_norm": 0.17990271747112274, "learning_rate": 4.0581351273787374e-06, "loss": 1.9356, "step": 319030 }, { "epoch": 1.2143449829860766, "grad_norm": 0.1663575917482376, "learning_rate": 4.016129294510229e-06, "loss": 1.9261, "step": 319040 }, { "epoch": 1.2143830454541995, "grad_norm": 0.1609850525856018, "learning_rate": 3.974127018895657e-06, "loss": 1.9233, "step": 319050 }, { "epoch": 1.2144211079223222, "grad_norm": 0.18278783559799194, "learning_rate": 3.932128299631466e-06, "loss": 1.9385, "step": 319060 }, { "epoch": 1.2144591703904448, "grad_norm": 0.170439213514328, "learning_rate": 3.890133135814378e-06, "loss": 1.9417, "step": 319070 }, { "epoch": 1.2144972328585675, "grad_norm": 0.16244801878929138, "learning_rate": 3.8481415265416154e-06, "loss": 1.9365, "step": 319080 }, { "epoch": 1.2145352953266901, "grad_norm": 0.18392355740070343, "learning_rate": 3.806153470910789e-06, "loss": 1.9466, "step": 319090 }, { "epoch": 1.2145733577948128, "grad_norm": 0.16437573730945587, "learning_rate": 3.7641689680197875e-06, "loss": 1.9267, "step": 319100 }, { "epoch": 1.2146114202629354, "grad_norm": 0.20235992968082428, "learning_rate": 3.7221880169669985e-06, "loss": 1.9407, "step": 319110 }, { "epoch": 1.2146494827310583, "grad_norm": 0.16109450161457062, "learning_rate": 3.680210616851087e-06, "loss": 1.9403, "step": 319120 }, { "epoch": 1.214687545199181, "grad_norm": 0.16093021631240845, "learning_rate": 3.6382367667710527e-06, "loss": 1.9376, "step": 319130 }, { "epoch": 1.2147256076673036, "grad_norm": 0.1587112993001938, "learning_rate": 3.596266465826559e-06, "loss": 1.9571, "step": 319140 }, { "epoch": 1.2147636701354263, "grad_norm": 0.1975339949131012, "learning_rate": 3.554299713117326e-06, "loss": 1.9325, "step": 319150 }, { "epoch": 1.214801732603549, "grad_norm": 0.16535383462905884, "learning_rate": 3.51233650774363e-06, "loss": 1.926, "step": 319160 }, { "epoch": 1.2148397950716716, "grad_norm": 0.17008942365646362, "learning_rate": 3.470376848806078e-06, "loss": 1.9341, "step": 319170 }, { "epoch": 1.2148778575397943, "grad_norm": 0.1578245759010315, "learning_rate": 3.4284207354056127e-06, "loss": 1.9351, "step": 319180 }, { "epoch": 1.214915920007917, "grad_norm": 0.15404680371284485, "learning_rate": 3.3864681666436748e-06, "loss": 1.9453, "step": 319190 }, { "epoch": 1.2149539824760396, "grad_norm": 0.15549880266189575, "learning_rate": 3.3445191416219824e-06, "loss": 1.9181, "step": 319200 }, { "epoch": 1.2149920449441622, "grad_norm": 0.1594415158033371, "learning_rate": 3.302573659442698e-06, "loss": 1.9511, "step": 319210 }, { "epoch": 1.2150301074122851, "grad_norm": 0.16243025660514832, "learning_rate": 3.2606317192083177e-06, "loss": 1.9354, "step": 319220 }, { "epoch": 1.2150681698804078, "grad_norm": 0.15204553306102753, "learning_rate": 3.2186933200216706e-06, "loss": 1.9283, "step": 319230 }, { "epoch": 1.2151062323485304, "grad_norm": 0.15984119474887848, "learning_rate": 3.17675846098614e-06, "loss": 1.9428, "step": 319240 }, { "epoch": 1.215144294816653, "grad_norm": 0.17340975999832153, "learning_rate": 3.134827141205221e-06, "loss": 1.9369, "step": 319250 }, { "epoch": 1.2151823572847757, "grad_norm": 0.15931546688079834, "learning_rate": 3.0928993597830747e-06, "loss": 1.9346, "step": 319260 }, { "epoch": 1.2152204197528984, "grad_norm": 0.15926462411880493, "learning_rate": 3.0509751158240285e-06, "loss": 1.9432, "step": 319270 }, { "epoch": 1.215258482221021, "grad_norm": 0.1612613946199417, "learning_rate": 3.0090544084329096e-06, "loss": 1.9393, "step": 319280 }, { "epoch": 1.215296544689144, "grad_norm": 0.16437748074531555, "learning_rate": 2.967137236714823e-06, "loss": 1.9395, "step": 319290 }, { "epoch": 1.2153346071572666, "grad_norm": 0.1627143770456314, "learning_rate": 2.925223599775373e-06, "loss": 1.9374, "step": 319300 }, { "epoch": 1.2153726696253893, "grad_norm": 0.15959599614143372, "learning_rate": 2.8833134967203857e-06, "loss": 1.9318, "step": 319310 }, { "epoch": 1.215410732093512, "grad_norm": 0.15941393375396729, "learning_rate": 2.841406926656187e-06, "loss": 1.9385, "step": 319320 }, { "epoch": 1.2154487945616346, "grad_norm": 0.15475495159626007, "learning_rate": 2.7995038886894363e-06, "loss": 1.9382, "step": 319330 }, { "epoch": 1.2154868570297572, "grad_norm": 0.15965044498443604, "learning_rate": 2.7576043819272366e-06, "loss": 1.9247, "step": 319340 }, { "epoch": 1.2155249194978799, "grad_norm": 0.1535380631685257, "learning_rate": 2.7157084054769134e-06, "loss": 1.9418, "step": 319350 }, { "epoch": 1.2155629819660025, "grad_norm": 0.16000913083553314, "learning_rate": 2.6738159584463463e-06, "loss": 1.9311, "step": 319360 }, { "epoch": 1.2156010444341252, "grad_norm": 0.15471646189689636, "learning_rate": 2.6319270399435825e-06, "loss": 1.9355, "step": 319370 }, { "epoch": 1.2156391069022479, "grad_norm": 0.1763332486152649, "learning_rate": 2.5900416490772794e-06, "loss": 1.9287, "step": 319380 }, { "epoch": 1.2156771693703707, "grad_norm": 0.17447736859321594, "learning_rate": 2.5481597849563165e-06, "loss": 1.9364, "step": 319390 }, { "epoch": 1.2157152318384934, "grad_norm": 0.1559518277645111, "learning_rate": 2.5062814466900175e-06, "loss": 1.9156, "step": 319400 }, { "epoch": 1.215753294306616, "grad_norm": 0.16822876036167145, "learning_rate": 2.464406633388039e-06, "loss": 1.9356, "step": 319410 }, { "epoch": 1.2157913567747387, "grad_norm": 0.1587214171886444, "learning_rate": 2.4225353441603704e-06, "loss": 1.9373, "step": 319420 }, { "epoch": 1.2158294192428614, "grad_norm": 0.1601061224937439, "learning_rate": 2.3806675781175015e-06, "loss": 1.9624, "step": 319430 }, { "epoch": 1.215867481710984, "grad_norm": 0.15818408131599426, "learning_rate": 2.3388033343701987e-06, "loss": 1.9261, "step": 319440 }, { "epoch": 1.2159055441791067, "grad_norm": 0.17259342968463898, "learning_rate": 2.2969426120296177e-06, "loss": 1.9251, "step": 319450 }, { "epoch": 1.2159436066472293, "grad_norm": 0.20744659006595612, "learning_rate": 2.255085410207358e-06, "loss": 1.9259, "step": 319460 }, { "epoch": 1.2159816691153522, "grad_norm": 0.15923643112182617, "learning_rate": 2.2132317280152412e-06, "loss": 1.9455, "step": 319470 }, { "epoch": 1.2160197315834749, "grad_norm": 0.15893425047397614, "learning_rate": 2.1713815645655886e-06, "loss": 1.9264, "step": 319480 }, { "epoch": 1.2160577940515975, "grad_norm": 0.1528540700674057, "learning_rate": 2.12953491897111e-06, "loss": 1.9386, "step": 319490 }, { "epoch": 1.2160958565197202, "grad_norm": 0.16423656046390533, "learning_rate": 2.0876917903447924e-06, "loss": 1.9327, "step": 319500 }, { "epoch": 1.2161339189878428, "grad_norm": 0.15889745950698853, "learning_rate": 2.0458521778000673e-06, "loss": 1.9425, "step": 319510 }, { "epoch": 1.2161719814559655, "grad_norm": 0.18033714592456818, "learning_rate": 2.004016080450699e-06, "loss": 1.9349, "step": 319520 }, { "epoch": 1.2162100439240882, "grad_norm": 0.1714215725660324, "learning_rate": 1.962183497410841e-06, "loss": 1.9437, "step": 319530 }, { "epoch": 1.2162481063922108, "grad_norm": 0.1582074910402298, "learning_rate": 1.9203544277950347e-06, "loss": 1.936, "step": 319540 }, { "epoch": 1.2162861688603335, "grad_norm": 0.16662302613258362, "learning_rate": 1.8785288707180993e-06, "loss": 1.9353, "step": 319550 }, { "epoch": 1.2163242313284564, "grad_norm": 0.15821482241153717, "learning_rate": 1.8367068252954089e-06, "loss": 1.9545, "step": 319560 }, { "epoch": 1.216362293796579, "grad_norm": 0.15704180300235748, "learning_rate": 1.79488829064256e-06, "loss": 1.9378, "step": 319570 }, { "epoch": 1.2164003562647017, "grad_norm": 0.16972213983535767, "learning_rate": 1.7530732658755378e-06, "loss": 1.9328, "step": 319580 }, { "epoch": 1.2164384187328243, "grad_norm": 0.1585708111524582, "learning_rate": 1.7112617501107152e-06, "loss": 1.956, "step": 319590 }, { "epoch": 1.216476481200947, "grad_norm": 0.17058701813220978, "learning_rate": 1.6694537424649103e-06, "loss": 1.9334, "step": 319600 }, { "epoch": 1.2165145436690696, "grad_norm": 0.154144287109375, "learning_rate": 1.6276492420551626e-06, "loss": 1.9254, "step": 319610 }, { "epoch": 1.2165526061371923, "grad_norm": 0.1549491584300995, "learning_rate": 1.5858482479990111e-06, "loss": 1.9385, "step": 319620 }, { "epoch": 1.216590668605315, "grad_norm": 0.1657015085220337, "learning_rate": 1.5440507594143282e-06, "loss": 1.9349, "step": 319630 }, { "epoch": 1.2166287310734378, "grad_norm": 0.15674883127212524, "learning_rate": 1.5022567754193194e-06, "loss": 1.9381, "step": 319640 }, { "epoch": 1.2166667935415605, "grad_norm": 0.15751342475414276, "learning_rate": 1.4604662951325787e-06, "loss": 1.9341, "step": 319650 }, { "epoch": 1.2167048560096831, "grad_norm": 0.17440181970596313, "learning_rate": 1.4186793176730883e-06, "loss": 1.9286, "step": 319660 }, { "epoch": 1.2167429184778058, "grad_norm": 0.15538595616817474, "learning_rate": 1.376895842160164e-06, "loss": 1.9357, "step": 319670 }, { "epoch": 1.2167809809459285, "grad_norm": 0.15798527002334595, "learning_rate": 1.3351158677135655e-06, "loss": 1.933, "step": 319680 }, { "epoch": 1.2168190434140511, "grad_norm": 0.15561030805110931, "learning_rate": 1.29333939345333e-06, "loss": 1.9273, "step": 319690 }, { "epoch": 1.2168571058821738, "grad_norm": 0.15403597056865692, "learning_rate": 1.2515664184999387e-06, "loss": 1.9359, "step": 319700 }, { "epoch": 1.2168951683502964, "grad_norm": 0.16248761117458344, "learning_rate": 1.2097969419741506e-06, "loss": 1.9214, "step": 319710 }, { "epoch": 1.216933230818419, "grad_norm": 0.16012699902057648, "learning_rate": 1.1680309629972241e-06, "loss": 1.9351, "step": 319720 }, { "epoch": 1.216971293286542, "grad_norm": 0.15856362879276276, "learning_rate": 1.1262684806905842e-06, "loss": 1.9329, "step": 319730 }, { "epoch": 1.2170093557546646, "grad_norm": 0.15176869928836823, "learning_rate": 1.0845094941762668e-06, "loss": 1.9431, "step": 319740 }, { "epoch": 1.2170474182227873, "grad_norm": 0.16340629756450653, "learning_rate": 1.0427540025765292e-06, "loss": 1.9335, "step": 319750 }, { "epoch": 1.21708548069091, "grad_norm": 0.15526182949543, "learning_rate": 1.0010020050140178e-06, "loss": 1.9234, "step": 319760 }, { "epoch": 1.2171235431590326, "grad_norm": 0.17227885127067566, "learning_rate": 9.592535006117676e-07, "loss": 1.9371, "step": 319770 }, { "epoch": 1.2171616056271553, "grad_norm": 0.1604343056678772, "learning_rate": 9.175084884931462e-07, "loss": 1.9323, "step": 319780 }, { "epoch": 1.217199668095278, "grad_norm": 0.16079290211200714, "learning_rate": 8.757669677818547e-07, "loss": 1.9255, "step": 319790 }, { "epoch": 1.2172377305634006, "grad_norm": 0.15556688606739044, "learning_rate": 8.340289376020938e-07, "loss": 1.9335, "step": 319800 }, { "epoch": 1.2172757930315234, "grad_norm": 0.1539129763841629, "learning_rate": 7.922943970782859e-07, "loss": 1.9473, "step": 319810 }, { "epoch": 1.217313855499646, "grad_norm": 0.15851177275180817, "learning_rate": 7.505633453353533e-07, "loss": 1.9168, "step": 319820 }, { "epoch": 1.2173519179677688, "grad_norm": 0.16344186663627625, "learning_rate": 7.088357814984958e-07, "loss": 1.9315, "step": 319830 }, { "epoch": 1.2173899804358914, "grad_norm": 0.15521660447120667, "learning_rate": 6.671117046931907e-07, "loss": 1.9425, "step": 319840 }, { "epoch": 1.217428042904014, "grad_norm": 0.15747219324111938, "learning_rate": 6.253911140455259e-07, "loss": 1.9277, "step": 319850 }, { "epoch": 1.2174661053721367, "grad_norm": 0.15791189670562744, "learning_rate": 5.83674008681756e-07, "loss": 1.943, "step": 319860 }, { "epoch": 1.2175041678402594, "grad_norm": 0.16524600982666016, "learning_rate": 5.41960387728524e-07, "loss": 1.9465, "step": 319870 }, { "epoch": 1.217542230308382, "grad_norm": 0.1657755821943283, "learning_rate": 5.002502503129169e-07, "loss": 1.9395, "step": 319880 }, { "epoch": 1.2175802927765047, "grad_norm": 0.1625651866197586, "learning_rate": 4.585435955623551e-07, "loss": 1.9362, "step": 319890 }, { "epoch": 1.2176183552446274, "grad_norm": 0.16468630731105804, "learning_rate": 4.1684042260459186e-07, "loss": 1.9332, "step": 319900 }, { "epoch": 1.2176564177127502, "grad_norm": 0.1581428050994873, "learning_rate": 3.7514073056771347e-07, "loss": 1.9283, "step": 319910 }, { "epoch": 1.217694480180873, "grad_norm": 0.16948306560516357, "learning_rate": 3.334445185803059e-07, "loss": 1.9369, "step": 319920 }, { "epoch": 1.2177325426489956, "grad_norm": 0.15806998312473297, "learning_rate": 2.917517857711771e-07, "loss": 1.9403, "step": 319930 }, { "epoch": 1.2177706051171182, "grad_norm": 0.15995928645133972, "learning_rate": 2.5006253126952374e-07, "loss": 1.9287, "step": 319940 }, { "epoch": 1.2178086675852409, "grad_norm": 0.15800589323043823, "learning_rate": 2.0837675420504187e-07, "loss": 1.9312, "step": 319950 }, { "epoch": 1.2178467300533635, "grad_norm": 0.14926236867904663, "learning_rate": 1.6669445370759428e-07, "loss": 1.9412, "step": 319960 }, { "epoch": 1.2178847925214862, "grad_norm": 0.1593756526708603, "learning_rate": 1.2501562890743223e-07, "loss": 1.9365, "step": 319970 }, { "epoch": 1.217922854989609, "grad_norm": 0.16351835429668427, "learning_rate": 8.334027893541763e-08, "loss": 1.9345, "step": 319980 }, { "epoch": 1.2179609174577317, "grad_norm": 0.1576191782951355, "learning_rate": 4.166840292246788e-08, "loss": 1.9253, "step": 319990 }, { "epoch": 1.2179989799258544, "grad_norm": 0.15571853518486023, "learning_rate": 0.0, "loss": 1.9405, "step": 320000 } ], "logging_steps": 10, "max_steps": 320000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.0539318127253395e+21, "train_batch_size": 8, "trial_name": null, "trial_params": null }