{ "best_global_step": 400, "best_metric": 1.0, "best_model_checkpoint": "/projects/bffw/darora1/llm_ipc/final_models/mpi_async_n3/checkpoint-400", "epoch": 1.039580188078651, "eval_steps": 40, "global_step": 2680, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0007760821191892367, "grad_norm": 12.262979507446289, "learning_rate": 2.0000000000000002e-07, "loss": 0.5292, "step": 2 }, { "epoch": 0.0015521642383784734, "grad_norm": 10.550226211547852, "learning_rate": 6.000000000000001e-07, "loss": 0.5158, "step": 4 }, { "epoch": 0.00232824635756771, "grad_norm": 11.25029182434082, "learning_rate": 1.0000000000000002e-06, "loss": 0.5035, "step": 6 }, { "epoch": 0.003104328476756947, "grad_norm": 12.211933135986328, "learning_rate": 1.4000000000000001e-06, "loss": 0.4699, "step": 8 }, { "epoch": 0.0038804105959461834, "grad_norm": 13.521236419677734, "learning_rate": 1.8000000000000001e-06, "loss": 0.4353, "step": 10 }, { "epoch": 0.00465649271513542, "grad_norm": 10.723718643188477, "learning_rate": 2.2e-06, "loss": 0.3387, "step": 12 }, { "epoch": 0.005432574834324657, "grad_norm": 4.546169757843018, "learning_rate": 2.6e-06, "loss": 0.2936, "step": 14 }, { "epoch": 0.006208656953513894, "grad_norm": 2.195192813873291, "learning_rate": 3e-06, "loss": 0.1848, "step": 16 }, { "epoch": 0.00698473907270313, "grad_norm": 2.521470785140991, "learning_rate": 3.4000000000000005e-06, "loss": 0.1964, "step": 18 }, { "epoch": 0.007760821191892367, "grad_norm": 1.8902873992919922, "learning_rate": 3.8000000000000005e-06, "loss": 0.1254, "step": 20 }, { "epoch": 0.008536903311081603, "grad_norm": 1.6655786037445068, "learning_rate": 4.2000000000000004e-06, "loss": 0.1055, "step": 22 }, { "epoch": 0.00931298543027084, "grad_norm": 1.4653961658477783, "learning_rate": 4.600000000000001e-06, "loss": 0.091, "step": 24 }, { "epoch": 0.010089067549460077, "grad_norm": 1.1770055294036865, "learning_rate": 5e-06, "loss": 0.0762, "step": 26 }, { "epoch": 0.010865149668649314, "grad_norm": 1.4054973125457764, "learning_rate": 5.400000000000001e-06, "loss": 0.0433, "step": 28 }, { "epoch": 0.011641231787838551, "grad_norm": 1.2623802423477173, "learning_rate": 5.8e-06, "loss": 0.0367, "step": 30 }, { "epoch": 0.012417313907027787, "grad_norm": 0.6245309114456177, "learning_rate": 6.200000000000001e-06, "loss": 0.0269, "step": 32 }, { "epoch": 0.013193396026217025, "grad_norm": 0.5869189500808716, "learning_rate": 6.600000000000001e-06, "loss": 0.0224, "step": 34 }, { "epoch": 0.01396947814540626, "grad_norm": 0.828778862953186, "learning_rate": 7e-06, "loss": 0.0243, "step": 36 }, { "epoch": 0.014745560264595498, "grad_norm": 0.5178276896476746, "learning_rate": 7.4e-06, "loss": 0.0143, "step": 38 }, { "epoch": 0.015521642383784734, "grad_norm": 1.2490451335906982, "learning_rate": 7.800000000000002e-06, "loss": 0.0145, "step": 40 }, { "epoch": 0.015521642383784734, "eval_accuracy": 0.9950695396598972, "eval_loss": 0.01397051103413105, "eval_runtime": 131.2714, "eval_samples_per_second": 38.089, "eval_steps_per_second": 9.522, "step": 40 }, { "epoch": 0.01629772450297397, "grad_norm": 0.6127598881721497, "learning_rate": 8.2e-06, "loss": 0.0147, "step": 42 }, { "epoch": 0.017073806622163207, "grad_norm": 0.7742691040039062, "learning_rate": 8.6e-06, "loss": 0.0092, "step": 44 }, { "epoch": 0.017849888741352446, "grad_norm": 0.43619677424430847, "learning_rate": 9e-06, "loss": 0.0139, "step": 46 }, { "epoch": 0.01862597086054168, "grad_norm": 0.6179471015930176, "learning_rate": 9.4e-06, "loss": 0.0137, "step": 48 }, { "epoch": 0.019402052979730917, "grad_norm": 0.6856386065483093, "learning_rate": 9.800000000000001e-06, "loss": 0.0114, "step": 50 }, { "epoch": 0.020178135098920153, "grad_norm": 0.4444126486778259, "learning_rate": 9.999998993000299e-06, "loss": 0.0072, "step": 52 }, { "epoch": 0.020954217218109392, "grad_norm": 0.44204798340797424, "learning_rate": 9.999990937005126e-06, "loss": 0.0081, "step": 54 }, { "epoch": 0.021730299337298628, "grad_norm": 0.6200250387191772, "learning_rate": 9.999974825027756e-06, "loss": 0.0068, "step": 56 }, { "epoch": 0.022506381456487864, "grad_norm": 0.3666571378707886, "learning_rate": 9.999950657094151e-06, "loss": 0.0056, "step": 58 }, { "epoch": 0.023282463575677103, "grad_norm": 0.37394317984580994, "learning_rate": 9.999918433243253e-06, "loss": 0.0057, "step": 60 }, { "epoch": 0.02405854569486634, "grad_norm": 0.3526070713996887, "learning_rate": 9.999878153526974e-06, "loss": 0.0046, "step": 62 }, { "epoch": 0.024834627814055574, "grad_norm": 0.37286990880966187, "learning_rate": 9.99982981801022e-06, "loss": 0.0055, "step": 64 }, { "epoch": 0.02561070993324481, "grad_norm": 0.2880455255508423, "learning_rate": 9.999773426770864e-06, "loss": 0.0055, "step": 66 }, { "epoch": 0.02638679205243405, "grad_norm": 0.1844996213912964, "learning_rate": 9.999708979899769e-06, "loss": 0.0053, "step": 68 }, { "epoch": 0.027162874171623285, "grad_norm": 0.3575407564640045, "learning_rate": 9.999636477500765e-06, "loss": 0.0046, "step": 70 }, { "epoch": 0.02793895629081252, "grad_norm": 0.4409068524837494, "learning_rate": 9.999555919690673e-06, "loss": 0.0047, "step": 72 }, { "epoch": 0.02871503841000176, "grad_norm": 0.28502458333969116, "learning_rate": 9.999467306599285e-06, "loss": 0.0039, "step": 74 }, { "epoch": 0.029491120529190996, "grad_norm": 0.3887697458267212, "learning_rate": 9.999370638369377e-06, "loss": 0.0032, "step": 76 }, { "epoch": 0.03026720264838023, "grad_norm": 0.3041154742240906, "learning_rate": 9.999265915156697e-06, "loss": 0.0028, "step": 78 }, { "epoch": 0.031043284767569467, "grad_norm": 0.3210655748844147, "learning_rate": 9.999153137129978e-06, "loss": 0.0034, "step": 80 }, { "epoch": 0.031043284767569467, "eval_accuracy": 0.9989959166226455, "eval_loss": 0.0025868695229291916, "eval_runtime": 127.9011, "eval_samples_per_second": 39.093, "eval_steps_per_second": 9.773, "step": 80 }, { "epoch": 0.0318193668867587, "grad_norm": 0.6757090091705322, "learning_rate": 9.999032304470926e-06, "loss": 0.003, "step": 82 }, { "epoch": 0.03259544900594794, "grad_norm": 0.16272921860218048, "learning_rate": 9.998903417374228e-06, "loss": 0.0017, "step": 84 }, { "epoch": 0.03337153112513718, "grad_norm": 0.6212184429168701, "learning_rate": 9.998766476047546e-06, "loss": 0.0033, "step": 86 }, { "epoch": 0.034147613244326414, "grad_norm": 0.19103288650512695, "learning_rate": 9.998621480711522e-06, "loss": 0.0016, "step": 88 }, { "epoch": 0.03492369536351565, "grad_norm": 0.4599171280860901, "learning_rate": 9.998468431599768e-06, "loss": 0.0035, "step": 90 }, { "epoch": 0.03569977748270489, "grad_norm": 0.22474364936351776, "learning_rate": 9.99830732895888e-06, "loss": 0.0016, "step": 92 }, { "epoch": 0.036475859601894124, "grad_norm": 0.19210362434387207, "learning_rate": 9.998138173048424e-06, "loss": 0.0015, "step": 94 }, { "epoch": 0.03725194172108336, "grad_norm": 0.22696685791015625, "learning_rate": 9.997960964140946e-06, "loss": 0.0012, "step": 96 }, { "epoch": 0.038028023840272596, "grad_norm": 0.3195860981941223, "learning_rate": 9.997775702521965e-06, "loss": 0.0012, "step": 98 }, { "epoch": 0.038804105959461835, "grad_norm": 0.3686668574810028, "learning_rate": 9.997582388489975e-06, "loss": 0.0014, "step": 100 }, { "epoch": 0.039580188078651074, "grad_norm": 0.28173354268074036, "learning_rate": 9.99738102235644e-06, "loss": 0.0014, "step": 102 }, { "epoch": 0.040356270197840306, "grad_norm": 0.1499175727367401, "learning_rate": 9.997171604445803e-06, "loss": 0.0015, "step": 104 }, { "epoch": 0.041132352317029545, "grad_norm": 0.4097079038619995, "learning_rate": 9.99695413509548e-06, "loss": 0.0019, "step": 106 }, { "epoch": 0.041908434436218785, "grad_norm": 0.1767456978559494, "learning_rate": 9.996728614655854e-06, "loss": 0.0009, "step": 108 }, { "epoch": 0.04268451655540802, "grad_norm": 0.2755231559276581, "learning_rate": 9.996495043490285e-06, "loss": 0.0012, "step": 110 }, { "epoch": 0.043460598674597256, "grad_norm": 0.21294231712818146, "learning_rate": 9.996253421975103e-06, "loss": 0.001, "step": 112 }, { "epoch": 0.044236680793786495, "grad_norm": 0.5105843544006348, "learning_rate": 9.996003750499608e-06, "loss": 0.0017, "step": 114 }, { "epoch": 0.04501276291297573, "grad_norm": 0.20320548117160797, "learning_rate": 9.995746029466071e-06, "loss": 0.0011, "step": 116 }, { "epoch": 0.04578884503216497, "grad_norm": 0.1121864914894104, "learning_rate": 9.995480259289731e-06, "loss": 0.0012, "step": 118 }, { "epoch": 0.046564927151354206, "grad_norm": 0.13900773227214813, "learning_rate": 9.995206440398798e-06, "loss": 0.0005, "step": 120 }, { "epoch": 0.046564927151354206, "eval_accuracy": 0.9996412933698419, "eval_loss": 0.0013112464221194386, "eval_runtime": 129.9264, "eval_samples_per_second": 38.483, "eval_steps_per_second": 9.621, "step": 120 }, { "epoch": 0.04734100927054344, "grad_norm": 0.17495128512382507, "learning_rate": 9.994924573234448e-06, "loss": 0.0011, "step": 122 }, { "epoch": 0.04811709138973268, "grad_norm": 0.44951504468917847, "learning_rate": 9.994634658250825e-06, "loss": 0.0015, "step": 124 }, { "epoch": 0.04889317350892191, "grad_norm": 0.28001341223716736, "learning_rate": 9.994336695915041e-06, "loss": 0.0009, "step": 126 }, { "epoch": 0.04966925562811115, "grad_norm": 0.15198007225990295, "learning_rate": 9.994030686707171e-06, "loss": 0.0008, "step": 128 }, { "epoch": 0.05044533774730039, "grad_norm": 0.2052275389432907, "learning_rate": 9.993716631120259e-06, "loss": 0.0008, "step": 130 }, { "epoch": 0.05122141986648962, "grad_norm": 0.20360974967479706, "learning_rate": 9.993394529660307e-06, "loss": 0.0008, "step": 132 }, { "epoch": 0.05199750198567886, "grad_norm": 0.20459742844104767, "learning_rate": 9.99306438284629e-06, "loss": 0.0007, "step": 134 }, { "epoch": 0.0527735841048681, "grad_norm": 0.12038147449493408, "learning_rate": 9.992726191210139e-06, "loss": 0.0008, "step": 136 }, { "epoch": 0.05354966622405733, "grad_norm": 0.2902871072292328, "learning_rate": 9.992379955296745e-06, "loss": 0.0005, "step": 138 }, { "epoch": 0.05432574834324657, "grad_norm": 0.11465182155370712, "learning_rate": 9.992025675663966e-06, "loss": 0.0006, "step": 140 }, { "epoch": 0.05510183046243581, "grad_norm": 0.10924035310745239, "learning_rate": 9.991663352882615e-06, "loss": 0.0005, "step": 142 }, { "epoch": 0.05587791258162504, "grad_norm": 0.10540606826543808, "learning_rate": 9.991292987536469e-06, "loss": 0.0003, "step": 144 }, { "epoch": 0.05665399470081428, "grad_norm": 0.10914743691682816, "learning_rate": 9.990914580222258e-06, "loss": 0.0004, "step": 146 }, { "epoch": 0.05743007682000352, "grad_norm": 0.06488844007253647, "learning_rate": 9.990528131549674e-06, "loss": 0.0004, "step": 148 }, { "epoch": 0.05820615893919275, "grad_norm": 0.11523474752902985, "learning_rate": 9.990133642141359e-06, "loss": 0.0003, "step": 150 }, { "epoch": 0.05898224105838199, "grad_norm": 0.17658241093158722, "learning_rate": 9.989731112632917e-06, "loss": 0.0004, "step": 152 }, { "epoch": 0.059758323177571224, "grad_norm": 0.1516527682542801, "learning_rate": 9.989320543672904e-06, "loss": 0.0002, "step": 154 }, { "epoch": 0.06053440529676046, "grad_norm": 0.14159496128559113, "learning_rate": 9.988901935922826e-06, "loss": 0.0001, "step": 156 }, { "epoch": 0.0613104874159497, "grad_norm": 0.19340620934963226, "learning_rate": 9.988475290057145e-06, "loss": 0.0006, "step": 158 }, { "epoch": 0.062086569535138934, "grad_norm": 0.17848193645477295, "learning_rate": 9.988040606763272e-06, "loss": 0.0003, "step": 160 }, { "epoch": 0.062086569535138934, "eval_accuracy": 0.9999064094433845, "eval_loss": 0.00032003907836042345, "eval_runtime": 127.2112, "eval_samples_per_second": 39.305, "eval_steps_per_second": 9.826, "step": 160 }, { "epoch": 0.06286265165432818, "grad_norm": 0.15477371215820312, "learning_rate": 9.98759788674157e-06, "loss": 0.0003, "step": 162 }, { "epoch": 0.0636387337735174, "grad_norm": 0.1277933269739151, "learning_rate": 9.987147130705347e-06, "loss": 0.0004, "step": 164 }, { "epoch": 0.06441481589270664, "grad_norm": 0.14449910819530487, "learning_rate": 9.986688339380863e-06, "loss": 0.0002, "step": 166 }, { "epoch": 0.06519089801189588, "grad_norm": 0.6293010115623474, "learning_rate": 9.98622151350732e-06, "loss": 0.0006, "step": 168 }, { "epoch": 0.06596698013108512, "grad_norm": 0.2988656163215637, "learning_rate": 9.985746653836867e-06, "loss": 0.0005, "step": 170 }, { "epoch": 0.06674306225027436, "grad_norm": 0.0764790028333664, "learning_rate": 9.985263761134602e-06, "loss": 0.0005, "step": 172 }, { "epoch": 0.06751914436946359, "grad_norm": 0.3135935366153717, "learning_rate": 9.984772836178559e-06, "loss": 0.0006, "step": 174 }, { "epoch": 0.06829522648865283, "grad_norm": 0.4241097569465637, "learning_rate": 9.984273879759713e-06, "loss": 0.0008, "step": 176 }, { "epoch": 0.06907130860784207, "grad_norm": 0.07492109388113022, "learning_rate": 9.983766892681985e-06, "loss": 0.0003, "step": 178 }, { "epoch": 0.0698473907270313, "grad_norm": 0.15513752400875092, "learning_rate": 9.983251875762234e-06, "loss": 0.0003, "step": 180 }, { "epoch": 0.07062347284622054, "grad_norm": 0.2630753815174103, "learning_rate": 9.982728829830252e-06, "loss": 0.0006, "step": 182 }, { "epoch": 0.07139955496540978, "grad_norm": 0.07824663817882538, "learning_rate": 9.982197755728771e-06, "loss": 0.0003, "step": 184 }, { "epoch": 0.07217563708459901, "grad_norm": 0.03119218535721302, "learning_rate": 9.981658654313458e-06, "loss": 0.0006, "step": 186 }, { "epoch": 0.07295171920378825, "grad_norm": 0.5731412768363953, "learning_rate": 9.981111526452912e-06, "loss": 0.0015, "step": 188 }, { "epoch": 0.07372780132297749, "grad_norm": 0.13840052485466003, "learning_rate": 9.980556373028665e-06, "loss": 0.0002, "step": 190 }, { "epoch": 0.07450388344216673, "grad_norm": 0.04643406346440315, "learning_rate": 9.979993194935182e-06, "loss": 0.0002, "step": 192 }, { "epoch": 0.07527996556135597, "grad_norm": 0.05373441055417061, "learning_rate": 9.979421993079853e-06, "loss": 0.0003, "step": 194 }, { "epoch": 0.07605604768054519, "grad_norm": 0.21675284206867218, "learning_rate": 9.978842768382999e-06, "loss": 0.0004, "step": 196 }, { "epoch": 0.07683212979973443, "grad_norm": 0.18371616303920746, "learning_rate": 9.978255521777865e-06, "loss": 0.0002, "step": 198 }, { "epoch": 0.07760821191892367, "grad_norm": 0.09996998310089111, "learning_rate": 9.977660254210623e-06, "loss": 0.0003, "step": 200 }, { "epoch": 0.07760821191892367, "eval_accuracy": 0.9999704461885914, "eval_loss": 0.00016504956874996424, "eval_runtime": 129.4586, "eval_samples_per_second": 38.622, "eval_steps_per_second": 9.656, "step": 200 }, { "epoch": 0.07838429403811291, "grad_norm": 0.1638646125793457, "learning_rate": 9.977056966640368e-06, "loss": 0.0004, "step": 202 }, { "epoch": 0.07916037615730215, "grad_norm": 0.13783912360668182, "learning_rate": 9.976445660039118e-06, "loss": 0.0001, "step": 204 }, { "epoch": 0.07993645827649139, "grad_norm": 0.14015792310237885, "learning_rate": 9.975826335391808e-06, "loss": 0.0001, "step": 206 }, { "epoch": 0.08071254039568061, "grad_norm": 0.2642574906349182, "learning_rate": 9.975198993696294e-06, "loss": 0.0002, "step": 208 }, { "epoch": 0.08148862251486985, "grad_norm": 0.15489515662193298, "learning_rate": 9.974563635963348e-06, "loss": 0.0006, "step": 210 }, { "epoch": 0.08226470463405909, "grad_norm": 0.35902225971221924, "learning_rate": 9.973920263216658e-06, "loss": 0.0004, "step": 212 }, { "epoch": 0.08304078675324833, "grad_norm": 0.4768538773059845, "learning_rate": 9.973268876492827e-06, "loss": 0.0005, "step": 214 }, { "epoch": 0.08381686887243757, "grad_norm": 0.13987833261489868, "learning_rate": 9.972609476841368e-06, "loss": 0.0002, "step": 216 }, { "epoch": 0.0845929509916268, "grad_norm": 0.1310640126466751, "learning_rate": 9.971942065324704e-06, "loss": 0.0003, "step": 218 }, { "epoch": 0.08536903311081603, "grad_norm": 0.2835996747016907, "learning_rate": 9.971266643018171e-06, "loss": 0.0004, "step": 220 }, { "epoch": 0.08614511523000527, "grad_norm": 0.14516514539718628, "learning_rate": 9.970583211010008e-06, "loss": 0.0005, "step": 222 }, { "epoch": 0.08692119734919451, "grad_norm": 0.1896241158246994, "learning_rate": 9.969891770401358e-06, "loss": 0.0011, "step": 224 }, { "epoch": 0.08769727946838375, "grad_norm": 0.13900001347064972, "learning_rate": 9.969192322306271e-06, "loss": 0.0006, "step": 226 }, { "epoch": 0.08847336158757299, "grad_norm": 0.12469799816608429, "learning_rate": 9.968484867851698e-06, "loss": 0.0003, "step": 228 }, { "epoch": 0.08924944370676222, "grad_norm": 0.2005859613418579, "learning_rate": 9.96776940817749e-06, "loss": 0.0008, "step": 230 }, { "epoch": 0.09002552582595146, "grad_norm": 0.13151948153972626, "learning_rate": 9.967045944436392e-06, "loss": 0.0002, "step": 232 }, { "epoch": 0.0908016079451407, "grad_norm": 0.10286468267440796, "learning_rate": 9.966314477794052e-06, "loss": 0.0004, "step": 234 }, { "epoch": 0.09157769006432993, "grad_norm": 0.08904605358839035, "learning_rate": 9.965575009429006e-06, "loss": 0.0003, "step": 236 }, { "epoch": 0.09235377218351917, "grad_norm": 0.110069639980793, "learning_rate": 9.964827540532685e-06, "loss": 0.0004, "step": 238 }, { "epoch": 0.09312985430270841, "grad_norm": 0.044081419706344604, "learning_rate": 9.964072072309412e-06, "loss": 0.0002, "step": 240 }, { "epoch": 0.09312985430270841, "eval_accuracy": 0.9998835115818656, "eval_loss": 0.00031863132608123124, "eval_runtime": 127.6209, "eval_samples_per_second": 39.179, "eval_steps_per_second": 9.795, "step": 240 }, { "epoch": 0.09390593642189764, "grad_norm": 0.11008896678686142, "learning_rate": 9.963308605976397e-06, "loss": 0.0002, "step": 242 }, { "epoch": 0.09468201854108688, "grad_norm": 0.07347576320171356, "learning_rate": 9.962537142763733e-06, "loss": 0.0003, "step": 244 }, { "epoch": 0.09545810066027612, "grad_norm": 0.061514757573604584, "learning_rate": 9.961757683914406e-06, "loss": 0.0001, "step": 246 }, { "epoch": 0.09623418277946535, "grad_norm": 0.030034126713871956, "learning_rate": 9.960970230684276e-06, "loss": 0.0001, "step": 248 }, { "epoch": 0.0970102648986546, "grad_norm": 0.06853067874908447, "learning_rate": 9.96017478434209e-06, "loss": 0.0001, "step": 250 }, { "epoch": 0.09778634701784382, "grad_norm": 0.08918727934360504, "learning_rate": 9.959371346169466e-06, "loss": 0.0001, "step": 252 }, { "epoch": 0.09856242913703306, "grad_norm": 0.014135139063000679, "learning_rate": 9.958559917460909e-06, "loss": 0.0, "step": 254 }, { "epoch": 0.0993385112562223, "grad_norm": 0.03587706759572029, "learning_rate": 9.957740499523787e-06, "loss": 0.0, "step": 256 }, { "epoch": 0.10011459337541154, "grad_norm": 0.028472531586885452, "learning_rate": 9.95691309367835e-06, "loss": 0.0, "step": 258 }, { "epoch": 0.10089067549460078, "grad_norm": 0.10992776602506638, "learning_rate": 9.95607770125771e-06, "loss": 0.0, "step": 260 }, { "epoch": 0.10166675761379002, "grad_norm": 0.015062687918543816, "learning_rate": 9.955234323607854e-06, "loss": 0.0, "step": 262 }, { "epoch": 0.10244283973297924, "grad_norm": 0.09784650802612305, "learning_rate": 9.954382962087628e-06, "loss": 0.0001, "step": 264 }, { "epoch": 0.10321892185216848, "grad_norm": 0.004005913157016039, "learning_rate": 9.95352361806875e-06, "loss": 0.0001, "step": 266 }, { "epoch": 0.10399500397135772, "grad_norm": 0.0028742440044879913, "learning_rate": 9.95265629293579e-06, "loss": 0.0, "step": 268 }, { "epoch": 0.10477108609054696, "grad_norm": 0.01080241333693266, "learning_rate": 9.951780988086183e-06, "loss": 0.0, "step": 270 }, { "epoch": 0.1055471682097362, "grad_norm": 0.006698825862258673, "learning_rate": 9.950897704930223e-06, "loss": 0.0, "step": 272 }, { "epoch": 0.10632325032892542, "grad_norm": 0.0032098847441375256, "learning_rate": 9.95000644489105e-06, "loss": 0.0, "step": 274 }, { "epoch": 0.10709933244811466, "grad_norm": 0.014737925492227077, "learning_rate": 9.949107209404664e-06, "loss": 0.0, "step": 276 }, { "epoch": 0.1078754145673039, "grad_norm": 0.002784354379400611, "learning_rate": 9.948199999919914e-06, "loss": 0.0, "step": 278 }, { "epoch": 0.10865149668649314, "grad_norm": 0.001066903700120747, "learning_rate": 9.947284817898493e-06, "loss": 0.0, "step": 280 }, { "epoch": 0.10865149668649314, "eval_accuracy": 0.9999963963963965, "eval_loss": 2.1505837139557116e-05, "eval_runtime": 127.9196, "eval_samples_per_second": 39.087, "eval_steps_per_second": 9.772, "step": 280 }, { "epoch": 0.10942757880568238, "grad_norm": 0.0006136983865872025, "learning_rate": 9.946361664814942e-06, "loss": 0.0, "step": 282 }, { "epoch": 0.11020366092487162, "grad_norm": 0.021285895258188248, "learning_rate": 9.945430542156647e-06, "loss": 0.0, "step": 284 }, { "epoch": 0.11097974304406084, "grad_norm": 0.0006559508037753403, "learning_rate": 9.944491451423829e-06, "loss": 0.0, "step": 286 }, { "epoch": 0.11175582516325008, "grad_norm": 0.0006491419044323266, "learning_rate": 9.943544394129552e-06, "loss": 0.0, "step": 288 }, { "epoch": 0.11253190728243932, "grad_norm": 0.0015669898129999638, "learning_rate": 9.942589371799715e-06, "loss": 0.0, "step": 290 }, { "epoch": 0.11330798940162856, "grad_norm": 0.003007555613294244, "learning_rate": 9.941626385973047e-06, "loss": 0.0, "step": 292 }, { "epoch": 0.1140840715208178, "grad_norm": 0.004424386657774448, "learning_rate": 9.940655438201113e-06, "loss": 0.0, "step": 294 }, { "epoch": 0.11486015364000704, "grad_norm": 0.0031703764107078314, "learning_rate": 9.9396765300483e-06, "loss": 0.0, "step": 296 }, { "epoch": 0.11563623575919627, "grad_norm": 0.00221498915925622, "learning_rate": 9.938689663091828e-06, "loss": 0.0, "step": 298 }, { "epoch": 0.1164123178783855, "grad_norm": 0.005704451352357864, "learning_rate": 9.937694838921734e-06, "loss": 0.0, "step": 300 }, { "epoch": 0.11718839999757474, "grad_norm": 0.0013350360095500946, "learning_rate": 9.93669205914088e-06, "loss": 0.0, "step": 302 }, { "epoch": 0.11796448211676398, "grad_norm": 0.0008370972354896367, "learning_rate": 9.93568132536494e-06, "loss": 0.0, "step": 304 }, { "epoch": 0.11874056423595322, "grad_norm": 0.01809551753103733, "learning_rate": 9.934662639222412e-06, "loss": 0.0, "step": 306 }, { "epoch": 0.11951664635514245, "grad_norm": 0.0005159855354577303, "learning_rate": 9.9336360023546e-06, "loss": 0.0, "step": 308 }, { "epoch": 0.12029272847433169, "grad_norm": 0.0009325972059741616, "learning_rate": 9.932601416415622e-06, "loss": 0.0, "step": 310 }, { "epoch": 0.12106881059352093, "grad_norm": 0.003970442805439234, "learning_rate": 9.931558883072403e-06, "loss": 0.0, "step": 312 }, { "epoch": 0.12184489271271016, "grad_norm": 0.000802877766545862, "learning_rate": 9.930508404004668e-06, "loss": 0.0, "step": 314 }, { "epoch": 0.1226209748318994, "grad_norm": 0.000747400859836489, "learning_rate": 9.929449980904952e-06, "loss": 0.0, "step": 316 }, { "epoch": 0.12339705695108864, "grad_norm": 0.05219698324799538, "learning_rate": 9.928383615478586e-06, "loss": 0.0, "step": 318 }, { "epoch": 0.12417313907027787, "grad_norm": 0.0008670546812936664, "learning_rate": 9.927309309443696e-06, "loss": 0.0, "step": 320 }, { "epoch": 0.12417313907027787, "eval_accuracy": 0.9999981981981982, "eval_loss": 1.2200940545881167e-05, "eval_runtime": 126.4816, "eval_samples_per_second": 39.531, "eval_steps_per_second": 9.883, "step": 320 }, { "epoch": 0.12494922118946711, "grad_norm": 0.0031448816880583763, "learning_rate": 9.9262270645312e-06, "loss": 0.0, "step": 322 }, { "epoch": 0.12572530330865636, "grad_norm": 0.0009269348229281604, "learning_rate": 9.925136882484816e-06, "loss": 0.0, "step": 324 }, { "epoch": 0.1265013854278456, "grad_norm": 0.00048692882410250604, "learning_rate": 9.924038765061042e-06, "loss": 0.0, "step": 326 }, { "epoch": 0.1272774675470348, "grad_norm": 0.0416707918047905, "learning_rate": 9.922932714029163e-06, "loss": 0.0, "step": 328 }, { "epoch": 0.12805354966622406, "grad_norm": 0.0007638961542397738, "learning_rate": 9.921818731171249e-06, "loss": 0.0, "step": 330 }, { "epoch": 0.1288296317854133, "grad_norm": 0.0007810278912074864, "learning_rate": 9.920696818282147e-06, "loss": 0.0, "step": 332 }, { "epoch": 0.12960571390460254, "grad_norm": 0.0021285091061145067, "learning_rate": 9.919566977169486e-06, "loss": 0.0, "step": 334 }, { "epoch": 0.13038179602379177, "grad_norm": 0.0033166895154863596, "learning_rate": 9.918429209653662e-06, "loss": 0.0, "step": 336 }, { "epoch": 0.131157878142981, "grad_norm": 0.0007379205781035125, "learning_rate": 9.917283517567845e-06, "loss": 0.0, "step": 338 }, { "epoch": 0.13193396026217025, "grad_norm": 0.0007624090649187565, "learning_rate": 9.916129902757977e-06, "loss": 0.0, "step": 340 }, { "epoch": 0.13271004238135947, "grad_norm": 0.00043780903797596693, "learning_rate": 9.914968367082756e-06, "loss": 0.0, "step": 342 }, { "epoch": 0.13348612450054873, "grad_norm": 0.0003401880676392466, "learning_rate": 9.913798912413653e-06, "loss": 0.0, "step": 344 }, { "epoch": 0.13426220661973795, "grad_norm": 0.3694112002849579, "learning_rate": 9.912621540634889e-06, "loss": 0.0001, "step": 346 }, { "epoch": 0.13503828873892718, "grad_norm": 0.0005322833894751966, "learning_rate": 9.911436253643445e-06, "loss": 0.0, "step": 348 }, { "epoch": 0.13581437085811643, "grad_norm": 0.023259738460183144, "learning_rate": 9.910243053349055e-06, "loss": 0.0, "step": 350 }, { "epoch": 0.13659045297730565, "grad_norm": 0.00202095415443182, "learning_rate": 9.909041941674205e-06, "loss": 0.0002, "step": 352 }, { "epoch": 0.1373665350964949, "grad_norm": 0.001592564396560192, "learning_rate": 9.90783292055412e-06, "loss": 0.0001, "step": 354 }, { "epoch": 0.13814261721568413, "grad_norm": 0.2993152141571045, "learning_rate": 9.906615991936781e-06, "loss": 0.0005, "step": 356 }, { "epoch": 0.13891869933487336, "grad_norm": 0.016617566347122192, "learning_rate": 9.905391157782897e-06, "loss": 0.0001, "step": 358 }, { "epoch": 0.1396947814540626, "grad_norm": 0.10565357655286789, "learning_rate": 9.904158420065923e-06, "loss": 0.0001, "step": 360 }, { "epoch": 0.1396947814540626, "eval_accuracy": 0.9999483411112575, "eval_loss": 0.0001308279752265662, "eval_runtime": 125.426, "eval_samples_per_second": 39.864, "eval_steps_per_second": 9.966, "step": 360 }, { "epoch": 0.14047086357325184, "grad_norm": 0.19931581616401672, "learning_rate": 9.902917780772043e-06, "loss": 0.0002, "step": 362 }, { "epoch": 0.1412469456924411, "grad_norm": 0.016386395320296288, "learning_rate": 9.901669241900178e-06, "loss": 0.0, "step": 364 }, { "epoch": 0.14202302781163031, "grad_norm": 0.03343227878212929, "learning_rate": 9.900412805461968e-06, "loss": 0.0, "step": 366 }, { "epoch": 0.14279910993081957, "grad_norm": 0.007820719853043556, "learning_rate": 9.899148473481786e-06, "loss": 0.0, "step": 368 }, { "epoch": 0.1435751920500088, "grad_norm": 0.006656871177256107, "learning_rate": 9.89787624799672e-06, "loss": 0.0, "step": 370 }, { "epoch": 0.14435127416919802, "grad_norm": 0.008290220983326435, "learning_rate": 9.896596131056583e-06, "loss": 0.0, "step": 372 }, { "epoch": 0.14512735628838727, "grad_norm": 0.006194319576025009, "learning_rate": 9.895308124723897e-06, "loss": 0.0, "step": 374 }, { "epoch": 0.1459034384075765, "grad_norm": 0.008603195659816265, "learning_rate": 9.894012231073895e-06, "loss": 0.0, "step": 376 }, { "epoch": 0.14667952052676575, "grad_norm": 0.023244811221957207, "learning_rate": 9.892708452194522e-06, "loss": 0.0001, "step": 378 }, { "epoch": 0.14745560264595498, "grad_norm": 0.005203426815569401, "learning_rate": 9.891396790186424e-06, "loss": 0.0, "step": 380 }, { "epoch": 0.1482316847651442, "grad_norm": 0.0015924626495689154, "learning_rate": 9.890077247162951e-06, "loss": 0.0, "step": 382 }, { "epoch": 0.14900776688433345, "grad_norm": 0.002123458543792367, "learning_rate": 9.888749825250151e-06, "loss": 0.0, "step": 384 }, { "epoch": 0.14978384900352268, "grad_norm": 0.0028824363835155964, "learning_rate": 9.887414526586764e-06, "loss": 0.0, "step": 386 }, { "epoch": 0.15055993112271193, "grad_norm": 0.002307540737092495, "learning_rate": 9.886071353324223e-06, "loss": 0.0, "step": 388 }, { "epoch": 0.15133601324190116, "grad_norm": 0.0008634248515591025, "learning_rate": 9.884720307626647e-06, "loss": 0.0, "step": 390 }, { "epoch": 0.15211209536109038, "grad_norm": 0.005232020281255245, "learning_rate": 9.883361391670841e-06, "loss": 0.0, "step": 392 }, { "epoch": 0.15288817748027964, "grad_norm": 0.0016856775619089603, "learning_rate": 9.881994607646288e-06, "loss": 0.0, "step": 394 }, { "epoch": 0.15366425959946886, "grad_norm": 0.07297682762145996, "learning_rate": 9.880619957755151e-06, "loss": 0.0002, "step": 396 }, { "epoch": 0.15444034171865811, "grad_norm": 0.017098629847168922, "learning_rate": 9.879237444212265e-06, "loss": 0.0, "step": 398 }, { "epoch": 0.15521642383784734, "grad_norm": 0.0019075347809121013, "learning_rate": 9.877847069245134e-06, "loss": 0.0, "step": 400 }, { "epoch": 0.15521642383784734, "eval_accuracy": 1.0, "eval_loss": 6.299953383859247e-06, "eval_runtime": 127.1041, "eval_samples_per_second": 39.338, "eval_steps_per_second": 9.834, "step": 400 }, { "epoch": 0.1559925059570366, "grad_norm": 0.0027607521042227745, "learning_rate": 9.87644883509393e-06, "loss": 0.0, "step": 402 }, { "epoch": 0.15676858807622582, "grad_norm": 0.0009071730892173946, "learning_rate": 9.875042744011487e-06, "loss": 0.0, "step": 404 }, { "epoch": 0.15754467019541504, "grad_norm": 0.0006593936122953892, "learning_rate": 9.873628798263297e-06, "loss": 0.0, "step": 406 }, { "epoch": 0.1583207523146043, "grad_norm": 0.0007138429209589958, "learning_rate": 9.87220700012751e-06, "loss": 0.0, "step": 408 }, { "epoch": 0.15909683443379352, "grad_norm": 0.05871044471859932, "learning_rate": 9.870777351894926e-06, "loss": 0.0, "step": 410 }, { "epoch": 0.15987291655298277, "grad_norm": 0.004336285404860973, "learning_rate": 9.869339855868992e-06, "loss": 0.0, "step": 412 }, { "epoch": 0.160648998672172, "grad_norm": 0.0009174658334814012, "learning_rate": 9.867894514365802e-06, "loss": 0.0, "step": 414 }, { "epoch": 0.16142508079136123, "grad_norm": 0.001874931389465928, "learning_rate": 9.86644132971409e-06, "loss": 0.0, "step": 416 }, { "epoch": 0.16220116291055048, "grad_norm": 0.0012493067188188434, "learning_rate": 9.864980304255222e-06, "loss": 0.0, "step": 418 }, { "epoch": 0.1629772450297397, "grad_norm": 0.002323364606127143, "learning_rate": 9.863511440343206e-06, "loss": 0.0, "step": 420 }, { "epoch": 0.16375332714892896, "grad_norm": 0.0008437803480774164, "learning_rate": 9.862034740344673e-06, "loss": 0.0, "step": 422 }, { "epoch": 0.16452940926811818, "grad_norm": 0.0008547037723474205, "learning_rate": 9.860550206638881e-06, "loss": 0.0, "step": 424 }, { "epoch": 0.1653054913873074, "grad_norm": 0.00046750501496717334, "learning_rate": 9.859057841617709e-06, "loss": 0.0, "step": 426 }, { "epoch": 0.16608157350649666, "grad_norm": 0.008160830475389957, "learning_rate": 9.857557647685657e-06, "loss": 0.0, "step": 428 }, { "epoch": 0.16685765562568589, "grad_norm": 0.0017940751276910305, "learning_rate": 9.856049627259833e-06, "loss": 0.0, "step": 430 }, { "epoch": 0.16763373774487514, "grad_norm": 0.0009521475876681507, "learning_rate": 9.85453378276996e-06, "loss": 0.0, "step": 432 }, { "epoch": 0.16840981986406436, "grad_norm": 0.000398926466004923, "learning_rate": 9.853010116658368e-06, "loss": 0.0, "step": 434 }, { "epoch": 0.1691859019832536, "grad_norm": 0.0009814132936298847, "learning_rate": 9.851478631379982e-06, "loss": 0.0, "step": 436 }, { "epoch": 0.16996198410244284, "grad_norm": 0.0003290042805019766, "learning_rate": 9.849939329402337e-06, "loss": 0.0, "step": 438 }, { "epoch": 0.17073806622163207, "grad_norm": 0.00044680741848424077, "learning_rate": 9.848392213205549e-06, "loss": 0.0, "step": 440 }, { "epoch": 0.17073806622163207, "eval_accuracy": 1.0, "eval_loss": 2.245959649371798e-06, "eval_runtime": 126.2754, "eval_samples_per_second": 39.596, "eval_steps_per_second": 9.899, "step": 440 }, { "epoch": 0.17151414834082132, "grad_norm": 0.0005498761311173439, "learning_rate": 9.846837285282331e-06, "loss": 0.0, "step": 442 }, { "epoch": 0.17229023046001055, "grad_norm": 0.0013184353010728955, "learning_rate": 9.845274548137986e-06, "loss": 0.0, "step": 444 }, { "epoch": 0.1730663125791998, "grad_norm": 0.0017092734342440963, "learning_rate": 9.843704004290393e-06, "loss": 0.0, "step": 446 }, { "epoch": 0.17384239469838902, "grad_norm": 0.07550952583551407, "learning_rate": 9.842125656270011e-06, "loss": 0.0, "step": 448 }, { "epoch": 0.17461847681757825, "grad_norm": 0.011813322082161903, "learning_rate": 9.840539506619874e-06, "loss": 0.0, "step": 450 }, { "epoch": 0.1753945589367675, "grad_norm": 0.005851654335856438, "learning_rate": 9.838945557895586e-06, "loss": 0.0002, "step": 452 }, { "epoch": 0.17617064105595673, "grad_norm": 0.00033667022944428027, "learning_rate": 9.837343812665311e-06, "loss": 0.0, "step": 454 }, { "epoch": 0.17694672317514598, "grad_norm": 0.007402330171316862, "learning_rate": 9.835734273509787e-06, "loss": 0.0, "step": 456 }, { "epoch": 0.1777228052943352, "grad_norm": 0.14475902915000916, "learning_rate": 9.834116943022299e-06, "loss": 0.0, "step": 458 }, { "epoch": 0.17849888741352443, "grad_norm": 0.0023257508873939514, "learning_rate": 9.832491823808688e-06, "loss": 0.0, "step": 460 }, { "epoch": 0.17927496953271368, "grad_norm": 0.3273751437664032, "learning_rate": 9.830858918487347e-06, "loss": 0.001, "step": 462 }, { "epoch": 0.1800510516519029, "grad_norm": 0.03867664933204651, "learning_rate": 9.829218229689211e-06, "loss": 0.0004, "step": 464 }, { "epoch": 0.18082713377109216, "grad_norm": 0.11620517820119858, "learning_rate": 9.827569760057755e-06, "loss": 0.0005, "step": 466 }, { "epoch": 0.1816032158902814, "grad_norm": 0.06383311003446579, "learning_rate": 9.825913512248996e-06, "loss": 0.0004, "step": 468 }, { "epoch": 0.18237929800947061, "grad_norm": 0.1005072072148323, "learning_rate": 9.824249488931477e-06, "loss": 0.0004, "step": 470 }, { "epoch": 0.18315538012865987, "grad_norm": 0.10733664035797119, "learning_rate": 9.822577692786272e-06, "loss": 0.0004, "step": 472 }, { "epoch": 0.1839314622478491, "grad_norm": 0.03747246786952019, "learning_rate": 9.820898126506978e-06, "loss": 0.0002, "step": 474 }, { "epoch": 0.18470754436703835, "grad_norm": 0.011840682476758957, "learning_rate": 9.819210792799711e-06, "loss": 0.0002, "step": 476 }, { "epoch": 0.18548362648622757, "grad_norm": 0.012368162162601948, "learning_rate": 9.817515694383102e-06, "loss": 0.0001, "step": 478 }, { "epoch": 0.18625970860541682, "grad_norm": 0.01901157945394516, "learning_rate": 9.815812833988292e-06, "loss": 0.0001, "step": 480 }, { "epoch": 0.18625970860541682, "eval_accuracy": 0.9999533210264934, "eval_loss": 0.00015479570720344782, "eval_runtime": 126.5166, "eval_samples_per_second": 39.521, "eval_steps_per_second": 9.88, "step": 480 }, { "epoch": 0.18703579072460605, "grad_norm": 0.037855107337236404, "learning_rate": 9.814102214358928e-06, "loss": 0.0001, "step": 482 }, { "epoch": 0.18781187284379527, "grad_norm": 0.03597020357847214, "learning_rate": 9.81238383825116e-06, "loss": 0.0001, "step": 484 }, { "epoch": 0.18858795496298453, "grad_norm": 0.08369135111570358, "learning_rate": 9.810657708433637e-06, "loss": 0.0002, "step": 486 }, { "epoch": 0.18936403708217375, "grad_norm": 0.08246992528438568, "learning_rate": 9.808923827687494e-06, "loss": 0.0001, "step": 488 }, { "epoch": 0.190140119201363, "grad_norm": 0.05389998108148575, "learning_rate": 9.807182198806362e-06, "loss": 0.0001, "step": 490 }, { "epoch": 0.19091620132055223, "grad_norm": 0.026415130123496056, "learning_rate": 9.805432824596347e-06, "loss": 0.0, "step": 492 }, { "epoch": 0.19169228343974146, "grad_norm": 0.01571790501475334, "learning_rate": 9.803675707876048e-06, "loss": 0.0, "step": 494 }, { "epoch": 0.1924683655589307, "grad_norm": 0.12665115296840668, "learning_rate": 9.801910851476524e-06, "loss": 0.0, "step": 496 }, { "epoch": 0.19324444767811993, "grad_norm": 0.020718397572636604, "learning_rate": 9.800138258241311e-06, "loss": 0.0, "step": 498 }, { "epoch": 0.1940205297973092, "grad_norm": 0.006764342077076435, "learning_rate": 9.798357931026411e-06, "loss": 0.0001, "step": 500 }, { "epoch": 0.1947966119164984, "grad_norm": 0.018557220697402954, "learning_rate": 9.796569872700287e-06, "loss": 0.0, "step": 502 }, { "epoch": 0.19557269403568764, "grad_norm": 0.04763049632310867, "learning_rate": 9.79477408614386e-06, "loss": 0.0, "step": 504 }, { "epoch": 0.1963487761548769, "grad_norm": 0.07618030905723572, "learning_rate": 9.792970574250493e-06, "loss": 0.0, "step": 506 }, { "epoch": 0.19712485827406612, "grad_norm": 0.0342671163380146, "learning_rate": 9.791159339926009e-06, "loss": 0.0, "step": 508 }, { "epoch": 0.19790094039325537, "grad_norm": 0.01189445611089468, "learning_rate": 9.789340386088663e-06, "loss": 0.0001, "step": 510 }, { "epoch": 0.1986770225124446, "grad_norm": 0.02746419608592987, "learning_rate": 9.787513715669158e-06, "loss": 0.0, "step": 512 }, { "epoch": 0.19945310463163385, "grad_norm": 0.09005803614854813, "learning_rate": 9.78567933161062e-06, "loss": 0.0, "step": 514 }, { "epoch": 0.20022918675082307, "grad_norm": 0.1561058908700943, "learning_rate": 9.78383723686861e-06, "loss": 0.0001, "step": 516 }, { "epoch": 0.2010052688700123, "grad_norm": 0.04290369898080826, "learning_rate": 9.781987434411106e-06, "loss": 0.0003, "step": 518 }, { "epoch": 0.20178135098920155, "grad_norm": 0.3850896656513214, "learning_rate": 9.780129927218513e-06, "loss": 0.0005, "step": 520 }, { "epoch": 0.20178135098920155, "eval_accuracy": 0.9998925712285803, "eval_loss": 0.0007603790145367384, "eval_runtime": 125.9709, "eval_samples_per_second": 39.692, "eval_steps_per_second": 9.923, "step": 520 }, { "epoch": 0.20255743310839078, "grad_norm": 0.07285759598016739, "learning_rate": 9.778264718283644e-06, "loss": 0.0006, "step": 522 }, { "epoch": 0.20333351522758003, "grad_norm": 0.20383314788341522, "learning_rate": 9.776391810611719e-06, "loss": 0.0014, "step": 524 }, { "epoch": 0.20410959734676926, "grad_norm": 0.27576491236686707, "learning_rate": 9.774511207220369e-06, "loss": 0.0004, "step": 526 }, { "epoch": 0.20488567946595848, "grad_norm": 0.19563119113445282, "learning_rate": 9.772622911139622e-06, "loss": 0.0005, "step": 528 }, { "epoch": 0.20566176158514773, "grad_norm": 0.15061968564987183, "learning_rate": 9.770726925411898e-06, "loss": 0.0005, "step": 530 }, { "epoch": 0.20643784370433696, "grad_norm": 0.08727551251649857, "learning_rate": 9.768823253092008e-06, "loss": 0.0004, "step": 532 }, { "epoch": 0.2072139258235262, "grad_norm": 0.17683257162570953, "learning_rate": 9.766911897247147e-06, "loss": 0.0005, "step": 534 }, { "epoch": 0.20799000794271544, "grad_norm": 0.17190676927566528, "learning_rate": 9.76499286095689e-06, "loss": 0.0003, "step": 536 }, { "epoch": 0.20876609006190466, "grad_norm": 0.2041502594947815, "learning_rate": 9.763066147313189e-06, "loss": 0.0005, "step": 538 }, { "epoch": 0.20954217218109392, "grad_norm": 0.09993339329957962, "learning_rate": 9.76113175942036e-06, "loss": 0.0001, "step": 540 }, { "epoch": 0.21031825430028314, "grad_norm": 0.022698964923620224, "learning_rate": 9.759189700395096e-06, "loss": 0.0003, "step": 542 }, { "epoch": 0.2110943364194724, "grad_norm": 0.0878206416964531, "learning_rate": 9.75723997336643e-06, "loss": 0.0001, "step": 544 }, { "epoch": 0.21187041853866162, "grad_norm": 0.006571301259100437, "learning_rate": 9.755282581475769e-06, "loss": 0.0005, "step": 546 }, { "epoch": 0.21264650065785085, "grad_norm": 1.6722694635391235, "learning_rate": 9.753317527876857e-06, "loss": 0.0005, "step": 548 }, { "epoch": 0.2134225827770401, "grad_norm": 0.5739099383354187, "learning_rate": 9.751344815735791e-06, "loss": 0.0015, "step": 550 }, { "epoch": 0.21419866489622932, "grad_norm": 0.19300027191638947, "learning_rate": 9.749364448231001e-06, "loss": 0.0012, "step": 552 }, { "epoch": 0.21497474701541858, "grad_norm": 0.050146687775850296, "learning_rate": 9.747376428553255e-06, "loss": 0.0009, "step": 554 }, { "epoch": 0.2157508291346078, "grad_norm": 0.13551265001296997, "learning_rate": 9.745380759905648e-06, "loss": 0.0005, "step": 556 }, { "epoch": 0.21652691125379706, "grad_norm": 0.09519948810338974, "learning_rate": 9.743377445503598e-06, "loss": 0.0003, "step": 558 }, { "epoch": 0.21730299337298628, "grad_norm": 0.062203116714954376, "learning_rate": 9.74136648857485e-06, "loss": 0.0002, "step": 560 }, { "epoch": 0.21730299337298628, "eval_accuracy": 0.9998422048167375, "eval_loss": 0.00038139638490974903, "eval_runtime": 125.77, "eval_samples_per_second": 39.755, "eval_steps_per_second": 9.939, "step": 560 }, { "epoch": 0.2180790754921755, "grad_norm": 0.15157951414585114, "learning_rate": 9.739347892359453e-06, "loss": 0.0003, "step": 562 }, { "epoch": 0.21885515761136476, "grad_norm": 0.053871285170316696, "learning_rate": 9.737321660109767e-06, "loss": 0.0001, "step": 564 }, { "epoch": 0.21963123973055398, "grad_norm": 0.11267916858196259, "learning_rate": 9.735287795090455e-06, "loss": 0.0002, "step": 566 }, { "epoch": 0.22040732184974324, "grad_norm": 0.044487178325653076, "learning_rate": 9.733246300578482e-06, "loss": 0.0001, "step": 568 }, { "epoch": 0.22118340396893246, "grad_norm": 0.06593261659145355, "learning_rate": 9.731197179863104e-06, "loss": 0.0001, "step": 570 }, { "epoch": 0.2219594860881217, "grad_norm": 0.15028122067451477, "learning_rate": 9.729140436245857e-06, "loss": 0.0001, "step": 572 }, { "epoch": 0.22273556820731094, "grad_norm": 0.004901881329715252, "learning_rate": 9.72707607304057e-06, "loss": 0.0, "step": 574 }, { "epoch": 0.22351165032650017, "grad_norm": 0.005214722361415625, "learning_rate": 9.725004093573343e-06, "loss": 0.0, "step": 576 }, { "epoch": 0.22428773244568942, "grad_norm": 0.007923359051346779, "learning_rate": 9.722924501182546e-06, "loss": 0.0001, "step": 578 }, { "epoch": 0.22506381456487864, "grad_norm": 0.05480680987238884, "learning_rate": 9.72083729921882e-06, "loss": 0.0001, "step": 580 }, { "epoch": 0.22583989668406787, "grad_norm": 0.06114561855792999, "learning_rate": 9.718742491045061e-06, "loss": 0.0, "step": 582 }, { "epoch": 0.22661597880325712, "grad_norm": 0.007530731149017811, "learning_rate": 9.716640080036423e-06, "loss": 0.0002, "step": 584 }, { "epoch": 0.22739206092244635, "grad_norm": 0.3882319927215576, "learning_rate": 9.71453006958031e-06, "loss": 0.0002, "step": 586 }, { "epoch": 0.2281681430416356, "grad_norm": 0.09104151278734207, "learning_rate": 9.712412463076368e-06, "loss": 0.0, "step": 588 }, { "epoch": 0.22894422516082483, "grad_norm": 0.5799682140350342, "learning_rate": 9.710287263936485e-06, "loss": 0.0008, "step": 590 }, { "epoch": 0.22972030728001408, "grad_norm": 0.373335063457489, "learning_rate": 9.708154475584779e-06, "loss": 0.0015, "step": 592 }, { "epoch": 0.2304963893992033, "grad_norm": 0.07736678421497345, "learning_rate": 9.7060141014576e-06, "loss": 0.0007, "step": 594 }, { "epoch": 0.23127247151839253, "grad_norm": 0.1351231336593628, "learning_rate": 9.703866145003512e-06, "loss": 0.0004, "step": 596 }, { "epoch": 0.23204855363758178, "grad_norm": 0.09312327951192856, "learning_rate": 9.701710609683305e-06, "loss": 0.0005, "step": 598 }, { "epoch": 0.232824635756771, "grad_norm": 0.1107032522559166, "learning_rate": 9.699547498969978e-06, "loss": 0.0006, "step": 600 }, { "epoch": 0.232824635756771, "eval_accuracy": 0.9998158034996139, "eval_loss": 0.0004652898933272809, "eval_runtime": 125.606, "eval_samples_per_second": 39.807, "eval_steps_per_second": 9.952, "step": 600 }, { "epoch": 0.23360071787596026, "grad_norm": 0.1096329391002655, "learning_rate": 9.697376816348732e-06, "loss": 0.0004, "step": 602 }, { "epoch": 0.2343767999951495, "grad_norm": 0.040570348501205444, "learning_rate": 9.695198565316966e-06, "loss": 0.0002, "step": 604 }, { "epoch": 0.2351528821143387, "grad_norm": 0.07446596026420593, "learning_rate": 9.69301274938428e-06, "loss": 0.0003, "step": 606 }, { "epoch": 0.23592896423352797, "grad_norm": 0.02620445005595684, "learning_rate": 9.690819372072457e-06, "loss": 0.0001, "step": 608 }, { "epoch": 0.2367050463527172, "grad_norm": 0.05135127529501915, "learning_rate": 9.68861843691547e-06, "loss": 0.0001, "step": 610 }, { "epoch": 0.23748112847190644, "grad_norm": 0.054929956793785095, "learning_rate": 9.68640994745946e-06, "loss": 0.0001, "step": 612 }, { "epoch": 0.23825721059109567, "grad_norm": 0.03947428613901138, "learning_rate": 9.684193907262742e-06, "loss": 0.0, "step": 614 }, { "epoch": 0.2390332927102849, "grad_norm": 0.053851328790187836, "learning_rate": 9.681970319895804e-06, "loss": 0.0001, "step": 616 }, { "epoch": 0.23980937482947415, "grad_norm": 0.013983331620693207, "learning_rate": 9.679739188941283e-06, "loss": 0.0, "step": 618 }, { "epoch": 0.24058545694866337, "grad_norm": 0.06546950340270996, "learning_rate": 9.677500517993983e-06, "loss": 0.0002, "step": 620 }, { "epoch": 0.24136153906785263, "grad_norm": 0.014229993335902691, "learning_rate": 9.675254310660842e-06, "loss": 0.0, "step": 622 }, { "epoch": 0.24213762118704185, "grad_norm": 0.019442947581410408, "learning_rate": 9.673000570560952e-06, "loss": 0.0, "step": 624 }, { "epoch": 0.24291370330623108, "grad_norm": 0.045110318809747696, "learning_rate": 9.670739301325534e-06, "loss": 0.0002, "step": 626 }, { "epoch": 0.24368978542542033, "grad_norm": 0.04606299474835396, "learning_rate": 9.668470506597946e-06, "loss": 0.0, "step": 628 }, { "epoch": 0.24446586754460956, "grad_norm": 0.004683693405240774, "learning_rate": 9.66619419003367e-06, "loss": 0.0003, "step": 630 }, { "epoch": 0.2452419496637988, "grad_norm": 0.005102402530610561, "learning_rate": 9.663910355300306e-06, "loss": 0.0, "step": 632 }, { "epoch": 0.24601803178298803, "grad_norm": 0.015854936093091965, "learning_rate": 9.661619006077562e-06, "loss": 0.0, "step": 634 }, { "epoch": 0.2467941139021773, "grad_norm": 0.01310874056071043, "learning_rate": 9.659320146057263e-06, "loss": 0.0001, "step": 636 }, { "epoch": 0.2475701960213665, "grad_norm": 0.07131452858448029, "learning_rate": 9.657013778943328e-06, "loss": 0.0006, "step": 638 }, { "epoch": 0.24834627814055574, "grad_norm": 0.19167400896549225, "learning_rate": 9.654699908451777e-06, "loss": 0.0014, "step": 640 }, { "epoch": 0.24834627814055574, "eval_accuracy": 0.9998640253286262, "eval_loss": 0.0005603338358923793, "eval_runtime": 126.0066, "eval_samples_per_second": 39.68, "eval_steps_per_second": 9.92, "step": 640 }, { "epoch": 0.249122360259745, "grad_norm": 0.3487143814563751, "learning_rate": 9.652378538310715e-06, "loss": 0.0008, "step": 642 }, { "epoch": 0.24989844237893422, "grad_norm": 0.0879506841301918, "learning_rate": 9.650049672260333e-06, "loss": 0.0002, "step": 644 }, { "epoch": 0.25067452449812344, "grad_norm": 0.3337320387363434, "learning_rate": 9.647713314052896e-06, "loss": 0.0006, "step": 646 }, { "epoch": 0.2514506066173127, "grad_norm": 0.10642711073160172, "learning_rate": 9.645369467452746e-06, "loss": 0.0003, "step": 648 }, { "epoch": 0.25222668873650195, "grad_norm": 0.11507799476385117, "learning_rate": 9.643018136236286e-06, "loss": 0.0003, "step": 650 }, { "epoch": 0.2530027708556912, "grad_norm": 0.042679496109485626, "learning_rate": 9.64065932419198e-06, "loss": 0.0002, "step": 652 }, { "epoch": 0.2537788529748804, "grad_norm": 0.1168542206287384, "learning_rate": 9.638293035120342e-06, "loss": 0.0003, "step": 654 }, { "epoch": 0.2545549350940696, "grad_norm": 0.01957826130092144, "learning_rate": 9.635919272833938e-06, "loss": 0.0001, "step": 656 }, { "epoch": 0.2553310172132589, "grad_norm": 0.061160337179899216, "learning_rate": 9.63353804115737e-06, "loss": 0.0001, "step": 658 }, { "epoch": 0.25610709933244813, "grad_norm": 0.16292768716812134, "learning_rate": 9.63114934392728e-06, "loss": 0.0005, "step": 660 }, { "epoch": 0.25688318145163735, "grad_norm": 0.009718884713947773, "learning_rate": 9.628753184992334e-06, "loss": 0.0001, "step": 662 }, { "epoch": 0.2576592635708266, "grad_norm": 0.008971277624368668, "learning_rate": 9.62634956821322e-06, "loss": 0.0001, "step": 664 }, { "epoch": 0.2584353456900158, "grad_norm": 0.009109067730605602, "learning_rate": 9.623938497462647e-06, "loss": 0.0, "step": 666 }, { "epoch": 0.2592114278092051, "grad_norm": 0.06948648393154144, "learning_rate": 9.621519976625327e-06, "loss": 0.0001, "step": 668 }, { "epoch": 0.2599875099283943, "grad_norm": 0.04568452760577202, "learning_rate": 9.619094009597982e-06, "loss": 0.0001, "step": 670 }, { "epoch": 0.26076359204758354, "grad_norm": 0.004767918027937412, "learning_rate": 9.616660600289329e-06, "loss": 0.0001, "step": 672 }, { "epoch": 0.26153967416677276, "grad_norm": 0.024964256212115288, "learning_rate": 9.614219752620074e-06, "loss": 0.0, "step": 674 }, { "epoch": 0.262315756285962, "grad_norm": 0.08372616022825241, "learning_rate": 9.611771470522908e-06, "loss": 0.0002, "step": 676 }, { "epoch": 0.26309183840515127, "grad_norm": 0.05544775351881981, "learning_rate": 9.609315757942504e-06, "loss": 0.0001, "step": 678 }, { "epoch": 0.2638679205243405, "grad_norm": 0.00295156124047935, "learning_rate": 9.606852618835503e-06, "loss": 0.0, "step": 680 }, { "epoch": 0.2638679205243405, "eval_accuracy": 0.9999964455901882, "eval_loss": 1.7429731087759137e-05, "eval_runtime": 126.4677, "eval_samples_per_second": 39.536, "eval_steps_per_second": 9.884, "step": 680 }, { "epoch": 0.2646440026435297, "grad_norm": 0.0017297189915552735, "learning_rate": 9.604382057170514e-06, "loss": 0.0, "step": 682 }, { "epoch": 0.26542008476271894, "grad_norm": 0.006471114233136177, "learning_rate": 9.601904076928103e-06, "loss": 0.0, "step": 684 }, { "epoch": 0.26619616688190817, "grad_norm": 0.00347676663659513, "learning_rate": 9.599418682100793e-06, "loss": 0.0, "step": 686 }, { "epoch": 0.26697224900109745, "grad_norm": 0.0037247599102556705, "learning_rate": 9.596925876693047e-06, "loss": 0.0, "step": 688 }, { "epoch": 0.2677483311202867, "grad_norm": 0.0023733838461339474, "learning_rate": 9.594425664721275e-06, "loss": 0.0, "step": 690 }, { "epoch": 0.2685244132394759, "grad_norm": 0.015363924205303192, "learning_rate": 9.591918050213814e-06, "loss": 0.0, "step": 692 }, { "epoch": 0.2693004953586651, "grad_norm": 0.0032706656493246555, "learning_rate": 9.589403037210933e-06, "loss": 0.0, "step": 694 }, { "epoch": 0.27007657747785435, "grad_norm": 0.44799983501434326, "learning_rate": 9.586880629764817e-06, "loss": 0.0001, "step": 696 }, { "epoch": 0.27085265959704363, "grad_norm": 0.12151243537664413, "learning_rate": 9.584350831939571e-06, "loss": 0.0, "step": 698 }, { "epoch": 0.27162874171623286, "grad_norm": 0.30089908838272095, "learning_rate": 9.581813647811199e-06, "loss": 0.0002, "step": 700 }, { "epoch": 0.2724048238354221, "grad_norm": 0.04375402256846428, "learning_rate": 9.579269081467614e-06, "loss": 0.0003, "step": 702 }, { "epoch": 0.2731809059546113, "grad_norm": 0.15066662430763245, "learning_rate": 9.576717137008617e-06, "loss": 0.0004, "step": 704 }, { "epoch": 0.27395698807380053, "grad_norm": 0.10531582683324814, "learning_rate": 9.574157818545902e-06, "loss": 0.0001, "step": 706 }, { "epoch": 0.2747330701929898, "grad_norm": 0.2945062220096588, "learning_rate": 9.57159113020304e-06, "loss": 0.0011, "step": 708 }, { "epoch": 0.27550915231217904, "grad_norm": 0.09745926409959793, "learning_rate": 9.569017076115476e-06, "loss": 0.0003, "step": 710 }, { "epoch": 0.27628523443136827, "grad_norm": 0.05198003724217415, "learning_rate": 9.566435660430528e-06, "loss": 0.0002, "step": 712 }, { "epoch": 0.2770613165505575, "grad_norm": 0.0568278431892395, "learning_rate": 9.563846887307369e-06, "loss": 0.0003, "step": 714 }, { "epoch": 0.2778373986697467, "grad_norm": 0.039371099323034286, "learning_rate": 9.561250760917026e-06, "loss": 0.0002, "step": 716 }, { "epoch": 0.278613480788936, "grad_norm": 0.08864190429449081, "learning_rate": 9.558647285442382e-06, "loss": 0.0002, "step": 718 }, { "epoch": 0.2793895629081252, "grad_norm": 0.09049225598573685, "learning_rate": 9.55603646507815e-06, "loss": 0.0003, "step": 720 }, { "epoch": 0.2793895629081252, "eval_accuracy": 0.9999203060143169, "eval_loss": 0.00019011966651305556, "eval_runtime": 126.3204, "eval_samples_per_second": 39.582, "eval_steps_per_second": 9.895, "step": 720 }, { "epoch": 0.28016564502731445, "grad_norm": 0.030860867351293564, "learning_rate": 9.553418304030886e-06, "loss": 0.0001, "step": 722 }, { "epoch": 0.2809417271465037, "grad_norm": 0.09574900567531586, "learning_rate": 9.550792806518967e-06, "loss": 0.0002, "step": 724 }, { "epoch": 0.28171780926569295, "grad_norm": 0.0391797199845314, "learning_rate": 9.548159976772593e-06, "loss": 0.0001, "step": 726 }, { "epoch": 0.2824938913848822, "grad_norm": 0.010204690508544445, "learning_rate": 9.545519819033777e-06, "loss": 0.0, "step": 728 }, { "epoch": 0.2832699735040714, "grad_norm": 0.03360763192176819, "learning_rate": 9.542872337556341e-06, "loss": 0.0002, "step": 730 }, { "epoch": 0.28404605562326063, "grad_norm": 0.10943489521741867, "learning_rate": 9.540217536605906e-06, "loss": 0.0002, "step": 732 }, { "epoch": 0.28482213774244985, "grad_norm": 0.018541136756539345, "learning_rate": 9.537555420459883e-06, "loss": 0.0001, "step": 734 }, { "epoch": 0.28559821986163914, "grad_norm": 0.01958926022052765, "learning_rate": 9.534885993407474e-06, "loss": 0.0, "step": 736 }, { "epoch": 0.28637430198082836, "grad_norm": 0.09343879669904709, "learning_rate": 9.532209259749658e-06, "loss": 0.0, "step": 738 }, { "epoch": 0.2871503841000176, "grad_norm": 0.007415697444230318, "learning_rate": 9.529525223799185e-06, "loss": 0.0, "step": 740 }, { "epoch": 0.2879264662192068, "grad_norm": 0.04692302644252777, "learning_rate": 9.526833889880573e-06, "loss": 0.0001, "step": 742 }, { "epoch": 0.28870254833839604, "grad_norm": 0.004772865679115057, "learning_rate": 9.524135262330098e-06, "loss": 0.0, "step": 744 }, { "epoch": 0.2894786304575853, "grad_norm": 0.006624268833547831, "learning_rate": 9.521429345495787e-06, "loss": 0.0, "step": 746 }, { "epoch": 0.29025471257677454, "grad_norm": 0.00328302220441401, "learning_rate": 9.51871614373741e-06, "loss": 0.0, "step": 748 }, { "epoch": 0.29103079469596377, "grad_norm": 0.018977543339133263, "learning_rate": 9.515995661426478e-06, "loss": 0.0001, "step": 750 }, { "epoch": 0.291806876815153, "grad_norm": 0.0034067758824676275, "learning_rate": 9.513267902946228e-06, "loss": 0.0, "step": 752 }, { "epoch": 0.2925829589343422, "grad_norm": 0.006681219674646854, "learning_rate": 9.510532872691624e-06, "loss": 0.0, "step": 754 }, { "epoch": 0.2933590410535315, "grad_norm": 0.006615277845412493, "learning_rate": 9.507790575069347e-06, "loss": 0.0, "step": 756 }, { "epoch": 0.2941351231727207, "grad_norm": 0.0020538324024528265, "learning_rate": 9.50504101449778e-06, "loss": 0.0, "step": 758 }, { "epoch": 0.29491120529190995, "grad_norm": 0.004129903856664896, "learning_rate": 9.50228419540702e-06, "loss": 0.0, "step": 760 }, { "epoch": 0.29491120529190995, "eval_accuracy": 0.9999894967205549, "eval_loss": 2.910307011916302e-05, "eval_runtime": 127.256, "eval_samples_per_second": 39.291, "eval_steps_per_second": 9.823, "step": 760 }, { "epoch": 0.2956872874110992, "grad_norm": 0.000902244180906564, "learning_rate": 9.499520122238846e-06, "loss": 0.0, "step": 762 }, { "epoch": 0.2964633695302884, "grad_norm": 0.005757022183388472, "learning_rate": 9.496748799446733e-06, "loss": 0.0, "step": 764 }, { "epoch": 0.2972394516494777, "grad_norm": 0.055734045803546906, "learning_rate": 9.493970231495836e-06, "loss": 0.0, "step": 766 }, { "epoch": 0.2980155337686669, "grad_norm": 0.001675387378782034, "learning_rate": 9.49118442286298e-06, "loss": 0.0, "step": 768 }, { "epoch": 0.29879161588785613, "grad_norm": 0.0013664959697052836, "learning_rate": 9.488391378036662e-06, "loss": 0.0, "step": 770 }, { "epoch": 0.29956769800704536, "grad_norm": 0.08549904078245163, "learning_rate": 9.485591101517027e-06, "loss": 0.0, "step": 772 }, { "epoch": 0.3003437801262346, "grad_norm": 0.004790373612195253, "learning_rate": 9.482783597815883e-06, "loss": 0.0, "step": 774 }, { "epoch": 0.30111986224542386, "grad_norm": 0.0086152832955122, "learning_rate": 9.47996887145668e-06, "loss": 0.0, "step": 776 }, { "epoch": 0.3018959443646131, "grad_norm": 0.0011121364077553153, "learning_rate": 9.477146926974501e-06, "loss": 0.0, "step": 778 }, { "epoch": 0.3026720264838023, "grad_norm": 0.0020719694439321756, "learning_rate": 9.47431776891606e-06, "loss": 0.0, "step": 780 }, { "epoch": 0.30344810860299154, "grad_norm": 0.057527460157871246, "learning_rate": 9.471481401839696e-06, "loss": 0.0, "step": 782 }, { "epoch": 0.30422419072218077, "grad_norm": 0.00046402812586165965, "learning_rate": 9.468637830315364e-06, "loss": 0.0, "step": 784 }, { "epoch": 0.30500027284137005, "grad_norm": 0.0010086536640301347, "learning_rate": 9.46578705892462e-06, "loss": 0.0, "step": 786 }, { "epoch": 0.30577635496055927, "grad_norm": 0.00028398106223903596, "learning_rate": 9.46292909226063e-06, "loss": 0.0, "step": 788 }, { "epoch": 0.3065524370797485, "grad_norm": 0.021826280280947685, "learning_rate": 9.460063934928142e-06, "loss": 0.0, "step": 790 }, { "epoch": 0.3073285191989377, "grad_norm": 0.0005104991141706705, "learning_rate": 9.4571915915435e-06, "loss": 0.0, "step": 792 }, { "epoch": 0.30810460131812695, "grad_norm": 0.0023655067197978497, "learning_rate": 9.454312066734624e-06, "loss": 0.0, "step": 794 }, { "epoch": 0.30888068343731623, "grad_norm": 0.0004329844960011542, "learning_rate": 9.451425365140997e-06, "loss": 0.0, "step": 796 }, { "epoch": 0.30965676555650545, "grad_norm": 0.0008137148688547313, "learning_rate": 9.448531491413673e-06, "loss": 0.0, "step": 798 }, { "epoch": 0.3104328476756947, "grad_norm": 0.0005432083271443844, "learning_rate": 9.445630450215259e-06, "loss": 0.0, "step": 800 }, { "epoch": 0.3104328476756947, "eval_accuracy": 0.9999969636302969, "eval_loss": 1.007641367323231e-05, "eval_runtime": 125.944, "eval_samples_per_second": 39.7, "eval_steps_per_second": 9.925, "step": 800 }, { "epoch": 0.3112089297948839, "grad_norm": 0.0004606054862961173, "learning_rate": 9.442722246219915e-06, "loss": 0.0, "step": 802 }, { "epoch": 0.3119850119140732, "grad_norm": 0.0005307365208864212, "learning_rate": 9.439806884113331e-06, "loss": 0.0, "step": 804 }, { "epoch": 0.3127610940332624, "grad_norm": 0.0022837109863758087, "learning_rate": 9.43688436859274e-06, "loss": 0.0, "step": 806 }, { "epoch": 0.31353717615245164, "grad_norm": 0.0017890158342197537, "learning_rate": 9.433954704366897e-06, "loss": 0.0, "step": 808 }, { "epoch": 0.31431325827164086, "grad_norm": 0.002820837078616023, "learning_rate": 9.431017896156074e-06, "loss": 0.0, "step": 810 }, { "epoch": 0.3150893403908301, "grad_norm": 0.0007451911806128919, "learning_rate": 9.428073948692056e-06, "loss": 0.0, "step": 812 }, { "epoch": 0.31586542251001937, "grad_norm": 0.00033517737756483257, "learning_rate": 9.425122866718128e-06, "loss": 0.0, "step": 814 }, { "epoch": 0.3166415046292086, "grad_norm": 0.000584141758736223, "learning_rate": 9.422164654989073e-06, "loss": 0.0, "step": 816 }, { "epoch": 0.3174175867483978, "grad_norm": 0.000700132513884455, "learning_rate": 9.419199318271158e-06, "loss": 0.0, "step": 818 }, { "epoch": 0.31819366886758704, "grad_norm": 0.00040585125680081546, "learning_rate": 9.416226861342132e-06, "loss": 0.0, "step": 820 }, { "epoch": 0.31896975098677627, "grad_norm": 0.00018835687660612166, "learning_rate": 9.413247288991216e-06, "loss": 0.0, "step": 822 }, { "epoch": 0.31974583310596555, "grad_norm": 0.0010284122545272112, "learning_rate": 9.410260606019095e-06, "loss": 0.0, "step": 824 }, { "epoch": 0.3205219152251548, "grad_norm": 0.001625030068680644, "learning_rate": 9.40726681723791e-06, "loss": 0.0, "step": 826 }, { "epoch": 0.321297997344344, "grad_norm": 0.00041874812450259924, "learning_rate": 9.404265927471255e-06, "loss": 0.0, "step": 828 }, { "epoch": 0.3220740794635332, "grad_norm": 0.0008408282301388681, "learning_rate": 9.401257941554157e-06, "loss": 0.0, "step": 830 }, { "epoch": 0.32285016158272245, "grad_norm": 0.00029697222635149956, "learning_rate": 9.398242864333084e-06, "loss": 0.0, "step": 832 }, { "epoch": 0.32362624370191173, "grad_norm": 0.0003149902040604502, "learning_rate": 9.395220700665924e-06, "loss": 0.0, "step": 834 }, { "epoch": 0.32440232582110096, "grad_norm": 0.003555809846147895, "learning_rate": 9.392191455421989e-06, "loss": 0.0, "step": 836 }, { "epoch": 0.3251784079402902, "grad_norm": 0.0002259164466522634, "learning_rate": 9.389155133481993e-06, "loss": 0.0, "step": 838 }, { "epoch": 0.3259544900594794, "grad_norm": 0.0006663685198873281, "learning_rate": 9.386111739738057e-06, "loss": 0.0, "step": 840 }, { "epoch": 0.3259544900594794, "eval_accuracy": 0.9999981981981982, "eval_loss": 3.87445015803678e-06, "eval_runtime": 125.6695, "eval_samples_per_second": 39.787, "eval_steps_per_second": 9.947, "step": 840 }, { "epoch": 0.32673057217866863, "grad_norm": 0.00026177996187470853, "learning_rate": 9.383061279093697e-06, "loss": 0.0, "step": 842 }, { "epoch": 0.3275066542978579, "grad_norm": 0.00030967817292548716, "learning_rate": 9.380003756463812e-06, "loss": 0.0, "step": 844 }, { "epoch": 0.32828273641704714, "grad_norm": 0.010552030056715012, "learning_rate": 9.376939176774678e-06, "loss": 0.0, "step": 846 }, { "epoch": 0.32905881853623636, "grad_norm": 0.00029037470812909305, "learning_rate": 9.373867544963949e-06, "loss": 0.0, "step": 848 }, { "epoch": 0.3298349006554256, "grad_norm": 0.0001495286705903709, "learning_rate": 9.370788865980633e-06, "loss": 0.0, "step": 850 }, { "epoch": 0.3306109827746148, "grad_norm": 0.0002662305487319827, "learning_rate": 9.367703144785097e-06, "loss": 0.0, "step": 852 }, { "epoch": 0.3313870648938041, "grad_norm": 0.0021101173479110003, "learning_rate": 9.364610386349048e-06, "loss": 0.0, "step": 854 }, { "epoch": 0.3321631470129933, "grad_norm": 0.00026662024902179837, "learning_rate": 9.361510595655545e-06, "loss": 0.0, "step": 856 }, { "epoch": 0.33293922913218255, "grad_norm": 0.0002303088695043698, "learning_rate": 9.358403777698962e-06, "loss": 0.0, "step": 858 }, { "epoch": 0.33371531125137177, "grad_norm": 0.00013503020454663783, "learning_rate": 9.355289937485005e-06, "loss": 0.0, "step": 860 }, { "epoch": 0.334491393370561, "grad_norm": 0.00030105997575446963, "learning_rate": 9.35216908003069e-06, "loss": 0.0, "step": 862 }, { "epoch": 0.3352674754897503, "grad_norm": 0.0008567289914935827, "learning_rate": 9.349041210364343e-06, "loss": 0.0, "step": 864 }, { "epoch": 0.3360435576089395, "grad_norm": 0.00020159636915195733, "learning_rate": 9.345906333525582e-06, "loss": 0.0, "step": 866 }, { "epoch": 0.33681963972812873, "grad_norm": 0.023337041959166527, "learning_rate": 9.342764454565321e-06, "loss": 0.0, "step": 868 }, { "epoch": 0.33759572184731795, "grad_norm": 0.00021924391330685467, "learning_rate": 9.339615578545753e-06, "loss": 0.0, "step": 870 }, { "epoch": 0.3383718039665072, "grad_norm": 0.0024127087090164423, "learning_rate": 9.336459710540344e-06, "loss": 0.0, "step": 872 }, { "epoch": 0.33914788608569646, "grad_norm": 0.00021501471928786486, "learning_rate": 9.333296855633828e-06, "loss": 0.0, "step": 874 }, { "epoch": 0.3399239682048857, "grad_norm": 0.00021591952827293426, "learning_rate": 9.330127018922195e-06, "loss": 0.0, "step": 876 }, { "epoch": 0.3407000503240749, "grad_norm": 0.0002424675622023642, "learning_rate": 9.326950205512682e-06, "loss": 0.0, "step": 878 }, { "epoch": 0.34147613244326414, "grad_norm": 0.000660261488519609, "learning_rate": 9.323766420523768e-06, "loss": 0.0, "step": 880 }, { "epoch": 0.34147613244326414, "eval_accuracy": 0.9999992031872509, "eval_loss": 2.521178885217523e-06, "eval_runtime": 125.7035, "eval_samples_per_second": 39.776, "eval_steps_per_second": 9.944, "step": 880 }, { "epoch": 0.3422522145624534, "grad_norm": 0.00019684905419126153, "learning_rate": 9.32057566908517e-06, "loss": 0.0, "step": 882 }, { "epoch": 0.34302829668164264, "grad_norm": 0.0002331451396457851, "learning_rate": 9.31737795633782e-06, "loss": 0.0, "step": 884 }, { "epoch": 0.34380437880083187, "grad_norm": 0.0003608768747653812, "learning_rate": 9.314173287433874e-06, "loss": 0.0, "step": 886 }, { "epoch": 0.3445804609200211, "grad_norm": 0.00022846901265438646, "learning_rate": 9.310961667536689e-06, "loss": 0.0, "step": 888 }, { "epoch": 0.3453565430392103, "grad_norm": 0.0003173276490997523, "learning_rate": 9.307743101820828e-06, "loss": 0.0, "step": 890 }, { "epoch": 0.3461326251583996, "grad_norm": 0.00014914020721334964, "learning_rate": 9.30451759547204e-06, "loss": 0.0, "step": 892 }, { "epoch": 0.3469087072775888, "grad_norm": 0.00020171761570964009, "learning_rate": 9.301285153687261e-06, "loss": 0.0, "step": 894 }, { "epoch": 0.34768478939677805, "grad_norm": 0.0009668665588833392, "learning_rate": 9.298045781674595e-06, "loss": 0.0, "step": 896 }, { "epoch": 0.3484608715159673, "grad_norm": 0.00016892921121325344, "learning_rate": 9.294799484653323e-06, "loss": 0.0, "step": 898 }, { "epoch": 0.3492369536351565, "grad_norm": 0.005890785250812769, "learning_rate": 9.291546267853871e-06, "loss": 0.0, "step": 900 }, { "epoch": 0.3500130357543458, "grad_norm": 0.00015733564214315265, "learning_rate": 9.28828613651782e-06, "loss": 0.0, "step": 902 }, { "epoch": 0.350789117873535, "grad_norm": 0.0003056585555896163, "learning_rate": 9.285019095897894e-06, "loss": 0.0, "step": 904 }, { "epoch": 0.35156519999272423, "grad_norm": 9.215499449055642e-05, "learning_rate": 9.281745151257946e-06, "loss": 0.0, "step": 906 }, { "epoch": 0.35234128211191346, "grad_norm": 0.004926912486553192, "learning_rate": 9.278464307872952e-06, "loss": 0.0, "step": 908 }, { "epoch": 0.3531173642311027, "grad_norm": 0.00023767896345816553, "learning_rate": 9.275176571029008e-06, "loss": 0.0, "step": 910 }, { "epoch": 0.35389344635029196, "grad_norm": 0.0001497797347838059, "learning_rate": 9.271881946023309e-06, "loss": 0.0, "step": 912 }, { "epoch": 0.3546695284694812, "grad_norm": 0.0002432416658848524, "learning_rate": 9.268580438164157e-06, "loss": 0.0, "step": 914 }, { "epoch": 0.3554456105886704, "grad_norm": 0.0001381765614496544, "learning_rate": 9.265272052770936e-06, "loss": 0.0, "step": 916 }, { "epoch": 0.35622169270785964, "grad_norm": 0.00015635219460818917, "learning_rate": 9.261956795174116e-06, "loss": 0.0, "step": 918 }, { "epoch": 0.35699777482704886, "grad_norm": 0.00013150287850294262, "learning_rate": 9.25863467071524e-06, "loss": 0.0, "step": 920 }, { "epoch": 0.35699777482704886, "eval_accuracy": 1.0, "eval_loss": 1.3487173191606416e-06, "eval_runtime": 125.9279, "eval_samples_per_second": 39.705, "eval_steps_per_second": 9.926, "step": 920 }, { "epoch": 0.35777385694623814, "grad_norm": 0.00013299559941515326, "learning_rate": 9.255305684746908e-06, "loss": 0.0, "step": 922 }, { "epoch": 0.35854993906542737, "grad_norm": 0.00014121051935944706, "learning_rate": 9.251969842632785e-06, "loss": 0.0, "step": 924 }, { "epoch": 0.3593260211846166, "grad_norm": 0.00018795536016114056, "learning_rate": 9.248627149747573e-06, "loss": 0.0, "step": 926 }, { "epoch": 0.3601021033038058, "grad_norm": 0.0002016944927163422, "learning_rate": 9.24527761147702e-06, "loss": 0.0, "step": 928 }, { "epoch": 0.36087818542299505, "grad_norm": 0.00017276617290917784, "learning_rate": 9.241921233217899e-06, "loss": 0.0, "step": 930 }, { "epoch": 0.3616542675421843, "grad_norm": 0.0003142351924907416, "learning_rate": 9.238558020378003e-06, "loss": 0.0, "step": 932 }, { "epoch": 0.36243034966137355, "grad_norm": 7.167387957451865e-05, "learning_rate": 9.235187978376141e-06, "loss": 0.0, "step": 934 }, { "epoch": 0.3632064317805628, "grad_norm": 0.004592986311763525, "learning_rate": 9.231811112642121e-06, "loss": 0.0, "step": 936 }, { "epoch": 0.363982513899752, "grad_norm": 0.00039325596299022436, "learning_rate": 9.228427428616749e-06, "loss": 0.0, "step": 938 }, { "epoch": 0.36475859601894123, "grad_norm": 0.000126360246213153, "learning_rate": 9.225036931751811e-06, "loss": 0.0, "step": 940 }, { "epoch": 0.3655346781381305, "grad_norm": 0.00033908282057382166, "learning_rate": 9.221639627510076e-06, "loss": 0.0, "step": 942 }, { "epoch": 0.36631076025731973, "grad_norm": 0.00011746160453185439, "learning_rate": 9.218235521365278e-06, "loss": 0.0, "step": 944 }, { "epoch": 0.36708684237650896, "grad_norm": 0.00014918751548975706, "learning_rate": 9.214824618802108e-06, "loss": 0.0, "step": 946 }, { "epoch": 0.3678629244956982, "grad_norm": 0.003275355789810419, "learning_rate": 9.211406925316214e-06, "loss": 0.0, "step": 948 }, { "epoch": 0.3686390066148874, "grad_norm": 0.00016756876721046865, "learning_rate": 9.20798244641418e-06, "loss": 0.0, "step": 950 }, { "epoch": 0.3694150887340767, "grad_norm": 0.0002650785609148443, "learning_rate": 9.204551187613521e-06, "loss": 0.0, "step": 952 }, { "epoch": 0.3701911708532659, "grad_norm": 0.00020422847592271864, "learning_rate": 9.201113154442685e-06, "loss": 0.0, "step": 954 }, { "epoch": 0.37096725297245514, "grad_norm": 0.00018903792079072446, "learning_rate": 9.197668352441025e-06, "loss": 0.0, "step": 956 }, { "epoch": 0.37174333509164437, "grad_norm": 0.00014484893472399563, "learning_rate": 9.194216787158805e-06, "loss": 0.0, "step": 958 }, { "epoch": 0.37251941721083365, "grad_norm": 0.00031614949693903327, "learning_rate": 9.190758464157184e-06, "loss": 0.0, "step": 960 }, { "epoch": 0.37251941721083365, "eval_accuracy": 1.0, "eval_loss": 1.3196950021665543e-06, "eval_runtime": 125.6395, "eval_samples_per_second": 39.796, "eval_steps_per_second": 9.949, "step": 960 }, { "epoch": 0.3732954993300229, "grad_norm": 0.001263777376152575, "learning_rate": 9.18729338900821e-06, "loss": 0.0, "step": 962 }, { "epoch": 0.3740715814492121, "grad_norm": 0.002196323359385133, "learning_rate": 9.18382156729481e-06, "loss": 0.0, "step": 964 }, { "epoch": 0.3748476635684013, "grad_norm": 0.0002094211959047243, "learning_rate": 9.18034300461078e-06, "loss": 0.0, "step": 966 }, { "epoch": 0.37562374568759055, "grad_norm": 0.00017572658543940634, "learning_rate": 9.17685770656078e-06, "loss": 0.0, "step": 968 }, { "epoch": 0.37639982780677983, "grad_norm": 0.0005696384469047189, "learning_rate": 9.173365678760318e-06, "loss": 0.0, "step": 970 }, { "epoch": 0.37717590992596906, "grad_norm": 0.00016265253361780196, "learning_rate": 9.169866926835749e-06, "loss": 0.0, "step": 972 }, { "epoch": 0.3779519920451583, "grad_norm": 0.0001599711977178231, "learning_rate": 9.166361456424257e-06, "loss": 0.0, "step": 974 }, { "epoch": 0.3787280741643475, "grad_norm": 0.000261208217125386, "learning_rate": 9.162849273173857e-06, "loss": 0.0, "step": 976 }, { "epoch": 0.37950415628353673, "grad_norm": 0.0002480531402397901, "learning_rate": 9.159330382743375e-06, "loss": 0.0, "step": 978 }, { "epoch": 0.380280238402726, "grad_norm": 0.00017667381325736642, "learning_rate": 9.155804790802444e-06, "loss": 0.0, "step": 980 }, { "epoch": 0.38105632052191524, "grad_norm": 9.964186028810218e-05, "learning_rate": 9.152272503031496e-06, "loss": 0.0, "step": 982 }, { "epoch": 0.38183240264110446, "grad_norm": 0.0002093881630571559, "learning_rate": 9.148733525121751e-06, "loss": 0.0, "step": 984 }, { "epoch": 0.3826084847602937, "grad_norm": 0.00011706231452990323, "learning_rate": 9.145187862775208e-06, "loss": 0.0, "step": 986 }, { "epoch": 0.3833845668794829, "grad_norm": 0.0003462635213509202, "learning_rate": 9.141635521704638e-06, "loss": 0.0, "step": 988 }, { "epoch": 0.3841606489986722, "grad_norm": 0.0002677836164366454, "learning_rate": 9.138076507633566e-06, "loss": 0.0, "step": 990 }, { "epoch": 0.3849367311178614, "grad_norm": 0.00010408299567643553, "learning_rate": 9.134510826296277e-06, "loss": 0.0, "step": 992 }, { "epoch": 0.38571281323705064, "grad_norm": 0.00013964559184387326, "learning_rate": 9.130938483437792e-06, "loss": 0.0, "step": 994 }, { "epoch": 0.38648889535623987, "grad_norm": 8.362986409338191e-05, "learning_rate": 9.12735948481387e-06, "loss": 0.0, "step": 996 }, { "epoch": 0.3872649774754291, "grad_norm": 9.481846063863486e-05, "learning_rate": 9.12377383619099e-06, "loss": 0.0, "step": 998 }, { "epoch": 0.3880410595946184, "grad_norm": 6.607809336856008e-05, "learning_rate": 9.120181543346348e-06, "loss": 0.0, "step": 1000 }, { "epoch": 0.3880410595946184, "eval_accuracy": 1.0, "eval_loss": 9.819256092669093e-07, "eval_runtime": 127.8507, "eval_samples_per_second": 39.108, "eval_steps_per_second": 9.777, "step": 1000 }, { "epoch": 0.3888171417138076, "grad_norm": 8.773151057539508e-05, "learning_rate": 9.11658261206784e-06, "loss": 0.0, "step": 1002 }, { "epoch": 0.3895932238329968, "grad_norm": 0.00019934470765292645, "learning_rate": 9.112977048154066e-06, "loss": 0.0, "step": 1004 }, { "epoch": 0.39036930595218605, "grad_norm": 0.0001199338657897897, "learning_rate": 9.109364857414306e-06, "loss": 0.0, "step": 1006 }, { "epoch": 0.3911453880713753, "grad_norm": 0.00014477952208835632, "learning_rate": 9.10574604566852e-06, "loss": 0.0, "step": 1008 }, { "epoch": 0.39192147019056456, "grad_norm": 0.0002476123918313533, "learning_rate": 9.102120618747336e-06, "loss": 0.0, "step": 1010 }, { "epoch": 0.3926975523097538, "grad_norm": 0.00014017504872754216, "learning_rate": 9.09848858249204e-06, "loss": 0.0, "step": 1012 }, { "epoch": 0.393473634428943, "grad_norm": 0.00012746201537083834, "learning_rate": 9.094849942754564e-06, "loss": 0.0, "step": 1014 }, { "epoch": 0.39424971654813223, "grad_norm": 0.00011980177805526182, "learning_rate": 9.091204705397485e-06, "loss": 0.0, "step": 1016 }, { "epoch": 0.39502579866732146, "grad_norm": 0.00014856885536573827, "learning_rate": 9.087552876294003e-06, "loss": 0.0, "step": 1018 }, { "epoch": 0.39580188078651074, "grad_norm": 0.0001720783329801634, "learning_rate": 9.083894461327946e-06, "loss": 0.0, "step": 1020 }, { "epoch": 0.39657796290569997, "grad_norm": 8.398832142120227e-05, "learning_rate": 9.08022946639375e-06, "loss": 0.0, "step": 1022 }, { "epoch": 0.3973540450248892, "grad_norm": 0.0002526867901906371, "learning_rate": 9.076557897396452e-06, "loss": 0.0, "step": 1024 }, { "epoch": 0.3981301271440784, "grad_norm": 0.0001529043511254713, "learning_rate": 9.07287976025168e-06, "loss": 0.0, "step": 1026 }, { "epoch": 0.3989062092632677, "grad_norm": 0.0001069461286533624, "learning_rate": 9.069195060885647e-06, "loss": 0.0, "step": 1028 }, { "epoch": 0.3996822913824569, "grad_norm": 6.42699669697322e-05, "learning_rate": 9.065503805235139e-06, "loss": 0.0, "step": 1030 }, { "epoch": 0.40045837350164615, "grad_norm": 5.6433171266689897e-05, "learning_rate": 9.061805999247504e-06, "loss": 0.0, "step": 1032 }, { "epoch": 0.4012344556208354, "grad_norm": 0.000280446169199422, "learning_rate": 9.058101648880646e-06, "loss": 0.0, "step": 1034 }, { "epoch": 0.4020105377400246, "grad_norm": 0.00010639019455993548, "learning_rate": 9.05439076010301e-06, "loss": 0.0, "step": 1036 }, { "epoch": 0.4027866198592139, "grad_norm": 9.839262929745018e-05, "learning_rate": 9.050673338893578e-06, "loss": 0.0, "step": 1038 }, { "epoch": 0.4035627019784031, "grad_norm": 8.282740600407124e-05, "learning_rate": 9.046949391241859e-06, "loss": 0.0, "step": 1040 }, { "epoch": 0.4035627019784031, "eval_accuracy": 1.0, "eval_loss": 8.070776971180749e-07, "eval_runtime": 129.7286, "eval_samples_per_second": 38.542, "eval_steps_per_second": 9.635, "step": 1040 }, { "epoch": 0.40433878409759233, "grad_norm": 0.00010247069440083578, "learning_rate": 9.043218923147874e-06, "loss": 0.0, "step": 1042 }, { "epoch": 0.40511486621678156, "grad_norm": 0.00012395612429827452, "learning_rate": 9.039481940622148e-06, "loss": 0.0, "step": 1044 }, { "epoch": 0.4058909483359708, "grad_norm": 0.00047949692816473544, "learning_rate": 9.035738449685707e-06, "loss": 0.0, "step": 1046 }, { "epoch": 0.40666703045516006, "grad_norm": 0.0001393361308146268, "learning_rate": 9.031988456370062e-06, "loss": 0.0, "step": 1048 }, { "epoch": 0.4074431125743493, "grad_norm": 0.00010181173274759203, "learning_rate": 9.0282319667172e-06, "loss": 0.0, "step": 1050 }, { "epoch": 0.4082191946935385, "grad_norm": 5.809147478430532e-05, "learning_rate": 9.02446898677957e-06, "loss": 0.0, "step": 1052 }, { "epoch": 0.40899527681272774, "grad_norm": 9.658202179707587e-05, "learning_rate": 9.020699522620091e-06, "loss": 0.0, "step": 1054 }, { "epoch": 0.40977135893191696, "grad_norm": 0.00011895164789166301, "learning_rate": 9.016923580312114e-06, "loss": 0.0, "step": 1056 }, { "epoch": 0.41054744105110624, "grad_norm": 0.00011817643098765984, "learning_rate": 9.013141165939439e-06, "loss": 0.0, "step": 1058 }, { "epoch": 0.41132352317029547, "grad_norm": 0.00010169815504923463, "learning_rate": 9.009352285596287e-06, "loss": 0.0, "step": 1060 }, { "epoch": 0.4120996052894847, "grad_norm": 0.00016690471966285259, "learning_rate": 9.005556945387301e-06, "loss": 0.0, "step": 1062 }, { "epoch": 0.4128756874086739, "grad_norm": 9.181562199955806e-05, "learning_rate": 9.001755151427532e-06, "loss": 0.0, "step": 1064 }, { "epoch": 0.41365176952786314, "grad_norm": 0.0002814804029185325, "learning_rate": 8.997946909842426e-06, "loss": 0.0, "step": 1066 }, { "epoch": 0.4144278516470524, "grad_norm": 9.53360868152231e-05, "learning_rate": 8.99413222676782e-06, "loss": 0.0, "step": 1068 }, { "epoch": 0.41520393376624165, "grad_norm": 8.568393241148442e-05, "learning_rate": 8.990311108349926e-06, "loss": 0.0, "step": 1070 }, { "epoch": 0.4159800158854309, "grad_norm": 0.0065495409071445465, "learning_rate": 8.986483560745335e-06, "loss": 0.0, "step": 1072 }, { "epoch": 0.4167560980046201, "grad_norm": 8.002893446246162e-05, "learning_rate": 8.982649590120982e-06, "loss": 0.0, "step": 1074 }, { "epoch": 0.4175321801238093, "grad_norm": 0.00010333453246857971, "learning_rate": 8.978809202654161e-06, "loss": 0.0, "step": 1076 }, { "epoch": 0.4183082622429986, "grad_norm": 0.00011106064630439505, "learning_rate": 8.974962404532503e-06, "loss": 0.0, "step": 1078 }, { "epoch": 0.41908434436218783, "grad_norm": 0.08603014796972275, "learning_rate": 8.971109201953962e-06, "loss": 0.0, "step": 1080 }, { "epoch": 0.41908434436218783, "eval_accuracy": 1.0, "eval_loss": 6.540701065205212e-07, "eval_runtime": 127.7494, "eval_samples_per_second": 39.139, "eval_steps_per_second": 9.785, "step": 1080 }, { "epoch": 0.41986042648137706, "grad_norm": 8.471347973681986e-05, "learning_rate": 8.967249601126821e-06, "loss": 0.0, "step": 1082 }, { "epoch": 0.4206365086005663, "grad_norm": 8.519980474375188e-05, "learning_rate": 8.963383608269665e-06, "loss": 0.0, "step": 1084 }, { "epoch": 0.4214125907197555, "grad_norm": 0.00011271241237409413, "learning_rate": 8.959511229611377e-06, "loss": 0.0, "step": 1086 }, { "epoch": 0.4221886728389448, "grad_norm": 5.517565296031535e-05, "learning_rate": 8.955632471391132e-06, "loss": 0.0, "step": 1088 }, { "epoch": 0.422964754958134, "grad_norm": 0.009469215758144855, "learning_rate": 8.951747339858383e-06, "loss": 0.0, "step": 1090 }, { "epoch": 0.42374083707732324, "grad_norm": 8.665325003676116e-05, "learning_rate": 8.947855841272852e-06, "loss": 0.0, "step": 1092 }, { "epoch": 0.42451691919651247, "grad_norm": 0.0001427274983143434, "learning_rate": 8.943957981904518e-06, "loss": 0.0, "step": 1094 }, { "epoch": 0.4252930013157017, "grad_norm": 0.00017721591575536877, "learning_rate": 8.94005376803361e-06, "loss": 0.0, "step": 1096 }, { "epoch": 0.42606908343489097, "grad_norm": 0.0001241111458512023, "learning_rate": 8.936143205950596e-06, "loss": 0.0, "step": 1098 }, { "epoch": 0.4268451655540802, "grad_norm": 0.00016166915884241462, "learning_rate": 8.93222630195617e-06, "loss": 0.0, "step": 1100 }, { "epoch": 0.4276212476732694, "grad_norm": 8.763946243561804e-05, "learning_rate": 8.928303062361244e-06, "loss": 0.0, "step": 1102 }, { "epoch": 0.42839732979245865, "grad_norm": 0.000265681796008721, "learning_rate": 8.924373493486941e-06, "loss": 0.0, "step": 1104 }, { "epoch": 0.42917341191164793, "grad_norm": 0.00013727635086979717, "learning_rate": 8.92043760166458e-06, "loss": 0.0, "step": 1106 }, { "epoch": 0.42994949403083715, "grad_norm": 0.0001845496881287545, "learning_rate": 8.916495393235666e-06, "loss": 0.0004, "step": 1108 }, { "epoch": 0.4307255761500264, "grad_norm": 0.00015601796621922404, "learning_rate": 8.912546874551883e-06, "loss": 0.0, "step": 1110 }, { "epoch": 0.4315016582692156, "grad_norm": 8.881723624654114e-05, "learning_rate": 8.908592051975083e-06, "loss": 0.0, "step": 1112 }, { "epoch": 0.43227774038840483, "grad_norm": 0.00032336293952539563, "learning_rate": 8.904630931877271e-06, "loss": 0.0, "step": 1114 }, { "epoch": 0.4330538225075941, "grad_norm": 0.0038151578046381474, "learning_rate": 8.900663520640605e-06, "loss": 0.0, "step": 1116 }, { "epoch": 0.43382990462678334, "grad_norm": 0.0023975528310984373, "learning_rate": 8.896689824657371e-06, "loss": 0.0, "step": 1118 }, { "epoch": 0.43460598674597256, "grad_norm": 0.003024220932275057, "learning_rate": 8.892709850329991e-06, "loss": 0.0, "step": 1120 }, { "epoch": 0.43460598674597256, "eval_accuracy": 1.0, "eval_loss": 4.078156052855775e-06, "eval_runtime": 127.1177, "eval_samples_per_second": 39.334, "eval_steps_per_second": 9.833, "step": 1120 }, { "epoch": 0.4353820688651618, "grad_norm": 0.0007882779464125633, "learning_rate": 8.88872360407099e-06, "loss": 0.0, "step": 1122 }, { "epoch": 0.436158150984351, "grad_norm": 0.0011043330887332559, "learning_rate": 8.884731092303011e-06, "loss": 0.0, "step": 1124 }, { "epoch": 0.4369342331035403, "grad_norm": 0.0014391590375453234, "learning_rate": 8.880732321458785e-06, "loss": 0.0, "step": 1126 }, { "epoch": 0.4377103152227295, "grad_norm": 0.0013540824875235558, "learning_rate": 8.876727297981129e-06, "loss": 0.0, "step": 1128 }, { "epoch": 0.43848639734191874, "grad_norm": 0.00019897986203432083, "learning_rate": 8.872716028322931e-06, "loss": 0.0, "step": 1130 }, { "epoch": 0.43926247946110797, "grad_norm": 0.0002147004270227626, "learning_rate": 8.868698518947152e-06, "loss": 0.0, "step": 1132 }, { "epoch": 0.4400385615802972, "grad_norm": 0.0001553621987113729, "learning_rate": 8.864674776326798e-06, "loss": 0.0, "step": 1134 }, { "epoch": 0.4408146436994865, "grad_norm": 0.00013062762445770204, "learning_rate": 8.860644806944917e-06, "loss": 0.0, "step": 1136 }, { "epoch": 0.4415907258186757, "grad_norm": 0.005284741520881653, "learning_rate": 8.8566086172946e-06, "loss": 0.0, "step": 1138 }, { "epoch": 0.4423668079378649, "grad_norm": 9.267154382541776e-05, "learning_rate": 8.852566213878947e-06, "loss": 0.0, "step": 1140 }, { "epoch": 0.44314289005705415, "grad_norm": 0.0020106425508856773, "learning_rate": 8.84851760321108e-06, "loss": 0.0, "step": 1142 }, { "epoch": 0.4439189721762434, "grad_norm": 0.00399937154725194, "learning_rate": 8.844462791814113e-06, "loss": 0.0, "step": 1144 }, { "epoch": 0.44469505429543266, "grad_norm": 0.00013771952944807708, "learning_rate": 8.84040178622116e-06, "loss": 0.0, "step": 1146 }, { "epoch": 0.4454711364146219, "grad_norm": 0.00011455425556050614, "learning_rate": 8.83633459297531e-06, "loss": 0.0, "step": 1148 }, { "epoch": 0.4462472185338111, "grad_norm": 0.0001155481077148579, "learning_rate": 8.83226121862962e-06, "loss": 0.0, "step": 1150 }, { "epoch": 0.44702330065300033, "grad_norm": 0.00010677455429686233, "learning_rate": 8.828181669747111e-06, "loss": 0.0, "step": 1152 }, { "epoch": 0.44779938277218956, "grad_norm": 0.0013834693236276507, "learning_rate": 8.824095952900746e-06, "loss": 0.0, "step": 1154 }, { "epoch": 0.44857546489137884, "grad_norm": 0.0002081162529066205, "learning_rate": 8.820004074673433e-06, "loss": 0.0, "step": 1156 }, { "epoch": 0.44935154701056806, "grad_norm": 0.0017033626791089773, "learning_rate": 8.815906041658001e-06, "loss": 0.0, "step": 1158 }, { "epoch": 0.4501276291297573, "grad_norm": 0.00014090523472987115, "learning_rate": 8.8118018604572e-06, "loss": 0.0, "step": 1160 }, { "epoch": 0.4501276291297573, "eval_accuracy": 1.0, "eval_loss": 7.843883054192702e-07, "eval_runtime": 129.241, "eval_samples_per_second": 38.687, "eval_steps_per_second": 9.672, "step": 1160 }, { "epoch": 0.4509037112489465, "grad_norm": 8.750114648137242e-05, "learning_rate": 8.807691537683685e-06, "loss": 0.0, "step": 1162 }, { "epoch": 0.45167979336813574, "grad_norm": 0.0003974711580667645, "learning_rate": 8.80357507996e-06, "loss": 0.0, "step": 1164 }, { "epoch": 0.452455875487325, "grad_norm": 0.00014241860480979085, "learning_rate": 8.799452493918586e-06, "loss": 0.0, "step": 1166 }, { "epoch": 0.45323195760651425, "grad_norm": 0.0001636273373151198, "learning_rate": 8.795323786201746e-06, "loss": 0.0, "step": 1168 }, { "epoch": 0.45400803972570347, "grad_norm": 0.00033104620524682105, "learning_rate": 8.791188963461653e-06, "loss": 0.0, "step": 1170 }, { "epoch": 0.4547841218448927, "grad_norm": 0.0008865257259458303, "learning_rate": 8.787048032360332e-06, "loss": 0.0, "step": 1172 }, { "epoch": 0.4555602039640819, "grad_norm": 0.0002572090597823262, "learning_rate": 8.782900999569646e-06, "loss": 0.0, "step": 1174 }, { "epoch": 0.4563362860832712, "grad_norm": 9.637617040425539e-05, "learning_rate": 8.778747871771293e-06, "loss": 0.0, "step": 1176 }, { "epoch": 0.45711236820246043, "grad_norm": 9.231559670297429e-05, "learning_rate": 8.774588655656787e-06, "loss": 0.0, "step": 1178 }, { "epoch": 0.45788845032164965, "grad_norm": 0.0002455124049447477, "learning_rate": 8.770423357927463e-06, "loss": 0.0, "step": 1180 }, { "epoch": 0.4586645324408389, "grad_norm": 9.630520071368665e-05, "learning_rate": 8.766251985294435e-06, "loss": 0.0, "step": 1182 }, { "epoch": 0.45944061456002816, "grad_norm": 0.00015750904276501387, "learning_rate": 8.762074544478622e-06, "loss": 0.0, "step": 1184 }, { "epoch": 0.4602166966792174, "grad_norm": 9.142507042270154e-05, "learning_rate": 8.757891042210713e-06, "loss": 0.0, "step": 1186 }, { "epoch": 0.4609927787984066, "grad_norm": 0.00017592050426173955, "learning_rate": 8.753701485231165e-06, "loss": 0.0, "step": 1188 }, { "epoch": 0.46176886091759584, "grad_norm": 9.888388012768701e-05, "learning_rate": 8.749505880290188e-06, "loss": 0.0, "step": 1190 }, { "epoch": 0.46254494303678506, "grad_norm": 0.00018603552598506212, "learning_rate": 8.74530423414774e-06, "loss": 0.0, "step": 1192 }, { "epoch": 0.46332102515597434, "grad_norm": 0.00026137952227145433, "learning_rate": 8.741096553573506e-06, "loss": 0.0, "step": 1194 }, { "epoch": 0.46409710727516357, "grad_norm": 0.00011923851707251742, "learning_rate": 8.736882845346906e-06, "loss": 0.0, "step": 1196 }, { "epoch": 0.4648731893943528, "grad_norm": 8.40307111502625e-05, "learning_rate": 8.732663116257057e-06, "loss": 0.0, "step": 1198 }, { "epoch": 0.465649271513542, "grad_norm": 0.00015016018005553633, "learning_rate": 8.728437373102784e-06, "loss": 0.0, "step": 1200 }, { "epoch": 0.465649271513542, "eval_accuracy": 1.0, "eval_loss": 6.378712100740813e-07, "eval_runtime": 127.4275, "eval_samples_per_second": 39.238, "eval_steps_per_second": 9.81, "step": 1200 }, { "epoch": 0.46642535363273124, "grad_norm": 0.0001696134713711217, "learning_rate": 8.724205622692608e-06, "loss": 0.0, "step": 1202 }, { "epoch": 0.4672014357519205, "grad_norm": 0.0004091104492545128, "learning_rate": 8.719967871844715e-06, "loss": 0.0, "step": 1204 }, { "epoch": 0.46797751787110975, "grad_norm": 6.0830552683910355e-05, "learning_rate": 8.715724127386971e-06, "loss": 0.0, "step": 1206 }, { "epoch": 0.468753599990299, "grad_norm": 5.9658585087163374e-05, "learning_rate": 8.711474396156894e-06, "loss": 0.0, "step": 1208 }, { "epoch": 0.4695296821094882, "grad_norm": 5.93626537011005e-05, "learning_rate": 8.707218685001648e-06, "loss": 0.0, "step": 1210 }, { "epoch": 0.4703057642286774, "grad_norm": 0.00014541247219312936, "learning_rate": 8.702957000778029e-06, "loss": 0.0, "step": 1212 }, { "epoch": 0.4710818463478667, "grad_norm": 9.278554352931678e-05, "learning_rate": 8.698689350352465e-06, "loss": 0.0, "step": 1214 }, { "epoch": 0.47185792846705593, "grad_norm": 0.00017925351858139038, "learning_rate": 8.69441574060099e-06, "loss": 0.0, "step": 1216 }, { "epoch": 0.47263401058624516, "grad_norm": 0.00010362664033891633, "learning_rate": 8.690136178409237e-06, "loss": 0.0, "step": 1218 }, { "epoch": 0.4734100927054344, "grad_norm": 8.074884681263939e-05, "learning_rate": 8.685850670672438e-06, "loss": 0.0, "step": 1220 }, { "epoch": 0.4741861748246236, "grad_norm": 0.00013894755102228373, "learning_rate": 8.681559224295401e-06, "loss": 0.0, "step": 1222 }, { "epoch": 0.4749622569438129, "grad_norm": 7.650947372894734e-05, "learning_rate": 8.6772618461925e-06, "loss": 0.0, "step": 1224 }, { "epoch": 0.4757383390630021, "grad_norm": 0.0006909709773026407, "learning_rate": 8.672958543287666e-06, "loss": 0.0, "step": 1226 }, { "epoch": 0.47651442118219134, "grad_norm": 0.00010987075802404433, "learning_rate": 8.668649322514382e-06, "loss": 0.0, "step": 1228 }, { "epoch": 0.47729050330138056, "grad_norm": 5.872250039828941e-05, "learning_rate": 8.66433419081566e-06, "loss": 0.0, "step": 1230 }, { "epoch": 0.4780665854205698, "grad_norm": 9.159026376437396e-05, "learning_rate": 8.660013155144036e-06, "loss": 0.0, "step": 1232 }, { "epoch": 0.47884266753975907, "grad_norm": 0.0002560248249210417, "learning_rate": 8.655686222461561e-06, "loss": 0.0, "step": 1234 }, { "epoch": 0.4796187496589483, "grad_norm": 6.712140020681545e-05, "learning_rate": 8.651353399739787e-06, "loss": 0.0, "step": 1236 }, { "epoch": 0.4803948317781375, "grad_norm": 8.750952838454396e-05, "learning_rate": 8.647014693959754e-06, "loss": 0.0, "step": 1238 }, { "epoch": 0.48117091389732675, "grad_norm": 0.0039330353029072285, "learning_rate": 8.642670112111982e-06, "loss": 0.0, "step": 1240 }, { "epoch": 0.48117091389732675, "eval_accuracy": 1.0, "eval_loss": 5.634702802126412e-07, "eval_runtime": 129.1229, "eval_samples_per_second": 38.723, "eval_steps_per_second": 9.681, "step": 1240 }, { "epoch": 0.48194699601651597, "grad_norm": 0.0001646587043069303, "learning_rate": 8.63831966119646e-06, "loss": 0.0, "step": 1242 }, { "epoch": 0.48272307813570525, "grad_norm": 7.407696102745831e-05, "learning_rate": 8.633963348222628e-06, "loss": 0.0, "step": 1244 }, { "epoch": 0.4834991602548945, "grad_norm": 9.739304368849844e-05, "learning_rate": 8.629601180209382e-06, "loss": 0.0, "step": 1246 }, { "epoch": 0.4842752423740837, "grad_norm": 7.505776011385024e-05, "learning_rate": 8.625233164185035e-06, "loss": 0.0, "step": 1248 }, { "epoch": 0.48505132449327293, "grad_norm": 0.00010909570846706629, "learning_rate": 8.620859307187339e-06, "loss": 0.0, "step": 1250 }, { "epoch": 0.48582740661246215, "grad_norm": 5.2743791457032785e-05, "learning_rate": 8.616479616263444e-06, "loss": 0.0, "step": 1252 }, { "epoch": 0.48660348873165143, "grad_norm": 5.285683801048435e-05, "learning_rate": 8.61209409846991e-06, "loss": 0.0, "step": 1254 }, { "epoch": 0.48737957085084066, "grad_norm": 0.00016743461310397834, "learning_rate": 8.607702760872679e-06, "loss": 0.0, "step": 1256 }, { "epoch": 0.4881556529700299, "grad_norm": 0.0001348243240499869, "learning_rate": 8.60330561054707e-06, "loss": 0.0, "step": 1258 }, { "epoch": 0.4889317350892191, "grad_norm": 0.00023121941194403917, "learning_rate": 8.598902654577768e-06, "loss": 0.0, "step": 1260 }, { "epoch": 0.4897078172084084, "grad_norm": 8.464252459816635e-05, "learning_rate": 8.594493900058817e-06, "loss": 0.0, "step": 1262 }, { "epoch": 0.4904838993275976, "grad_norm": 6.712898903060704e-05, "learning_rate": 8.590079354093594e-06, "loss": 0.0, "step": 1264 }, { "epoch": 0.49125998144678684, "grad_norm": 8.066197915468365e-05, "learning_rate": 8.585659023794818e-06, "loss": 0.0, "step": 1266 }, { "epoch": 0.49203606356597607, "grad_norm": 9.7354241006542e-05, "learning_rate": 8.581232916284519e-06, "loss": 0.0, "step": 1268 }, { "epoch": 0.4928121456851653, "grad_norm": 9.442515874980018e-05, "learning_rate": 8.57680103869404e-06, "loss": 0.0, "step": 1270 }, { "epoch": 0.4935882278043546, "grad_norm": 6.256580672925338e-05, "learning_rate": 8.572363398164017e-06, "loss": 0.0, "step": 1272 }, { "epoch": 0.4943643099235438, "grad_norm": 8.78170394571498e-05, "learning_rate": 8.567920001844376e-06, "loss": 0.0, "step": 1274 }, { "epoch": 0.495140392042733, "grad_norm": 6.0675814893329516e-05, "learning_rate": 8.563470856894316e-06, "loss": 0.0, "step": 1276 }, { "epoch": 0.49591647416192225, "grad_norm": 3.854846363537945e-05, "learning_rate": 8.559015970482292e-06, "loss": 0.0, "step": 1278 }, { "epoch": 0.4966925562811115, "grad_norm": 0.00010671203199308366, "learning_rate": 8.554555349786016e-06, "loss": 0.0, "step": 1280 }, { "epoch": 0.4966925562811115, "eval_accuracy": 1.0, "eval_loss": 5.17692058110697e-07, "eval_runtime": 127.9157, "eval_samples_per_second": 39.088, "eval_steps_per_second": 9.772, "step": 1280 }, { "epoch": 0.49746863840030076, "grad_norm": 5.758116458309814e-05, "learning_rate": 8.550089001992438e-06, "loss": 0.0, "step": 1282 }, { "epoch": 0.49824472051949, "grad_norm": 5.027606675866991e-05, "learning_rate": 8.545616934297733e-06, "loss": 0.0, "step": 1284 }, { "epoch": 0.4990208026386792, "grad_norm": 7.525689579779282e-05, "learning_rate": 8.541139153907296e-06, "loss": 0.0, "step": 1286 }, { "epoch": 0.49979688475786843, "grad_norm": 6.39339632471092e-05, "learning_rate": 8.536655668035723e-06, "loss": 0.0, "step": 1288 }, { "epoch": 0.5005729668770577, "grad_norm": 8.428450382780284e-05, "learning_rate": 8.532166483906804e-06, "loss": 0.0, "step": 1290 }, { "epoch": 0.5013490489962469, "grad_norm": 7.738485874142498e-05, "learning_rate": 8.527671608753508e-06, "loss": 0.0, "step": 1292 }, { "epoch": 0.5021251311154361, "grad_norm": 0.0001262014266103506, "learning_rate": 8.523171049817974e-06, "loss": 0.0, "step": 1294 }, { "epoch": 0.5029012132346254, "grad_norm": 6.336189107969403e-05, "learning_rate": 8.518664814351502e-06, "loss": 0.0, "step": 1296 }, { "epoch": 0.5036772953538147, "grad_norm": 6.285082054091617e-05, "learning_rate": 8.514152909614538e-06, "loss": 0.0, "step": 1298 }, { "epoch": 0.5044533774730039, "grad_norm": 7.193644705694169e-05, "learning_rate": 8.509635342876655e-06, "loss": 0.0, "step": 1300 }, { "epoch": 0.5052294595921931, "grad_norm": 0.00040224468102678657, "learning_rate": 8.505112121416554e-06, "loss": 0.0, "step": 1302 }, { "epoch": 0.5060055417113823, "grad_norm": 9.218790364684537e-05, "learning_rate": 8.500583252522053e-06, "loss": 0.0, "step": 1304 }, { "epoch": 0.5067816238305716, "grad_norm": 7.570124580524862e-05, "learning_rate": 8.496048743490053e-06, "loss": 0.0, "step": 1306 }, { "epoch": 0.5075577059497608, "grad_norm": 7.448979158652946e-05, "learning_rate": 8.49150860162656e-06, "loss": 0.0, "step": 1308 }, { "epoch": 0.50833378806895, "grad_norm": 0.00010793129331432283, "learning_rate": 8.486962834246646e-06, "loss": 0.0, "step": 1310 }, { "epoch": 0.5091098701881392, "grad_norm": 5.203771434025839e-05, "learning_rate": 8.482411448674445e-06, "loss": 0.0, "step": 1312 }, { "epoch": 0.5098859523073285, "grad_norm": 0.0076132542453706264, "learning_rate": 8.477854452243149e-06, "loss": 0.0, "step": 1314 }, { "epoch": 0.5106620344265178, "grad_norm": 9.313248301623389e-05, "learning_rate": 8.473291852294986e-06, "loss": 0.0, "step": 1316 }, { "epoch": 0.511438116545707, "grad_norm": 8.953544602263719e-05, "learning_rate": 8.468723656181219e-06, "loss": 0.0, "step": 1318 }, { "epoch": 0.5122141986648963, "grad_norm": 5.689664249075577e-05, "learning_rate": 8.464149871262118e-06, "loss": 0.0, "step": 1320 }, { "epoch": 0.5122141986648963, "eval_accuracy": 1.0, "eval_loss": 4.7084884613468603e-07, "eval_runtime": 126.7586, "eval_samples_per_second": 39.445, "eval_steps_per_second": 9.861, "step": 1320 }, { "epoch": 0.5129902807840855, "grad_norm": 7.276899850694463e-05, "learning_rate": 8.459570504906962e-06, "loss": 0.0, "step": 1322 }, { "epoch": 0.5137663629032747, "grad_norm": 8.557028922950849e-05, "learning_rate": 8.454985564494025e-06, "loss": 0.0, "step": 1324 }, { "epoch": 0.5145424450224639, "grad_norm": 0.00010284512973157689, "learning_rate": 8.450395057410561e-06, "loss": 0.0, "step": 1326 }, { "epoch": 0.5153185271416532, "grad_norm": 9.363534627482295e-05, "learning_rate": 8.445798991052791e-06, "loss": 0.0, "step": 1328 }, { "epoch": 0.5160946092608424, "grad_norm": 6.128710083430633e-05, "learning_rate": 8.441197372825892e-06, "loss": 0.0, "step": 1330 }, { "epoch": 0.5168706913800316, "grad_norm": 6.353965000016615e-05, "learning_rate": 8.436590210143991e-06, "loss": 0.0, "step": 1332 }, { "epoch": 0.5176467734992208, "grad_norm": 5.024932397645898e-05, "learning_rate": 8.431977510430145e-06, "loss": 0.0, "step": 1334 }, { "epoch": 0.5184228556184102, "grad_norm": 7.873632421251386e-05, "learning_rate": 8.427359281116335e-06, "loss": 0.0, "step": 1336 }, { "epoch": 0.5191989377375994, "grad_norm": 8.525773591827601e-05, "learning_rate": 8.422735529643445e-06, "loss": 0.0, "step": 1338 }, { "epoch": 0.5199750198567886, "grad_norm": 0.00016745791072025895, "learning_rate": 8.418106263461261e-06, "loss": 0.0, "step": 1340 }, { "epoch": 0.5207511019759778, "grad_norm": 8.936987433116883e-05, "learning_rate": 8.413471490028456e-06, "loss": 0.0, "step": 1342 }, { "epoch": 0.5215271840951671, "grad_norm": 6.260307418415323e-05, "learning_rate": 8.408831216812574e-06, "loss": 0.0, "step": 1344 }, { "epoch": 0.5223032662143563, "grad_norm": 0.00048531507491134107, "learning_rate": 8.404185451290017e-06, "loss": 0.0, "step": 1346 }, { "epoch": 0.5230793483335455, "grad_norm": 5.9685316955437884e-05, "learning_rate": 8.399534200946044e-06, "loss": 0.0, "step": 1348 }, { "epoch": 0.5238554304527347, "grad_norm": 0.028279516845941544, "learning_rate": 8.394877473274743e-06, "loss": 0.0, "step": 1350 }, { "epoch": 0.524631512571924, "grad_norm": 6.0437945649027824e-05, "learning_rate": 8.39021527577903e-06, "loss": 0.0, "step": 1352 }, { "epoch": 0.5254075946911132, "grad_norm": 6.762636621715501e-05, "learning_rate": 8.38554761597064e-06, "loss": 0.0, "step": 1354 }, { "epoch": 0.5261836768103025, "grad_norm": 0.00010587056021904573, "learning_rate": 8.380874501370098e-06, "loss": 0.0, "step": 1356 }, { "epoch": 0.5269597589294918, "grad_norm": 0.00014892751642037183, "learning_rate": 8.376195939506727e-06, "loss": 0.0, "step": 1358 }, { "epoch": 0.527735841048681, "grad_norm": 0.00022276054369285703, "learning_rate": 8.371511937918616e-06, "loss": 0.0, "step": 1360 }, { "epoch": 0.527735841048681, "eval_accuracy": 1.0, "eval_loss": 4.4349556560518977e-07, "eval_runtime": 126.9749, "eval_samples_per_second": 39.378, "eval_steps_per_second": 9.844, "step": 1360 }, { "epoch": 0.5285119231678702, "grad_norm": 4.504739627009258e-05, "learning_rate": 8.366822504152636e-06, "loss": 0.0, "step": 1362 }, { "epoch": 0.5292880052870594, "grad_norm": 5.4630098020425066e-05, "learning_rate": 8.362127645764392e-06, "loss": 0.0, "step": 1364 }, { "epoch": 0.5300640874062487, "grad_norm": 0.00011746805830625817, "learning_rate": 8.357427370318239e-06, "loss": 0.0, "step": 1366 }, { "epoch": 0.5308401695254379, "grad_norm": 5.199177758186124e-05, "learning_rate": 8.352721685387258e-06, "loss": 0.0, "step": 1368 }, { "epoch": 0.5316162516446271, "grad_norm": 5.120503919897601e-05, "learning_rate": 8.348010598553245e-06, "loss": 0.0, "step": 1370 }, { "epoch": 0.5323923337638163, "grad_norm": 7.405896030832082e-05, "learning_rate": 8.3432941174067e-06, "loss": 0.0, "step": 1372 }, { "epoch": 0.5331684158830057, "grad_norm": 3.713270416483283e-05, "learning_rate": 8.338572249546813e-06, "loss": 0.0, "step": 1374 }, { "epoch": 0.5339444980021949, "grad_norm": 5.5398730182787403e-05, "learning_rate": 8.33384500258146e-06, "loss": 0.0, "step": 1376 }, { "epoch": 0.5347205801213841, "grad_norm": 6.655969627900049e-05, "learning_rate": 8.329112384127172e-06, "loss": 0.0, "step": 1378 }, { "epoch": 0.5354966622405734, "grad_norm": 5.5164146033348516e-05, "learning_rate": 8.324374401809144e-06, "loss": 0.0, "step": 1380 }, { "epoch": 0.5362727443597626, "grad_norm": 7.302646554308012e-05, "learning_rate": 8.319631063261209e-06, "loss": 0.0, "step": 1382 }, { "epoch": 0.5370488264789518, "grad_norm": 5.169885844225064e-05, "learning_rate": 8.314882376125832e-06, "loss": 0.0, "step": 1384 }, { "epoch": 0.537824908598141, "grad_norm": 5.935852459515445e-05, "learning_rate": 8.310128348054093e-06, "loss": 0.0, "step": 1386 }, { "epoch": 0.5386009907173303, "grad_norm": 0.00012214272283017635, "learning_rate": 8.305368986705683e-06, "loss": 0.0, "step": 1388 }, { "epoch": 0.5393770728365195, "grad_norm": 9.045819024322554e-05, "learning_rate": 8.300604299748876e-06, "loss": 0.0, "step": 1390 }, { "epoch": 0.5401531549557087, "grad_norm": 8.053346391534433e-05, "learning_rate": 8.295834294860535e-06, "loss": 0.0, "step": 1392 }, { "epoch": 0.540929237074898, "grad_norm": 7.596631621709093e-05, "learning_rate": 8.291058979726092e-06, "loss": 0.0, "step": 1394 }, { "epoch": 0.5417053191940873, "grad_norm": 4.1703515307744965e-05, "learning_rate": 8.286278362039527e-06, "loss": 0.0, "step": 1396 }, { "epoch": 0.5424814013132765, "grad_norm": 4.8398211220046505e-05, "learning_rate": 8.281492449503372e-06, "loss": 0.0, "step": 1398 }, { "epoch": 0.5432574834324657, "grad_norm": 6.856134859845042e-05, "learning_rate": 8.276701249828684e-06, "loss": 0.0, "step": 1400 }, { "epoch": 0.5432574834324657, "eval_accuracy": 1.0, "eval_loss": 4.365287225027714e-07, "eval_runtime": 128.5657, "eval_samples_per_second": 38.891, "eval_steps_per_second": 9.723, "step": 1400 }, { "epoch": 0.5440335655516549, "grad_norm": 8.691730909049511e-05, "learning_rate": 8.271904770735042e-06, "loss": 0.0, "step": 1402 }, { "epoch": 0.5448096476708442, "grad_norm": 5.0105154514312744e-05, "learning_rate": 8.267103019950529e-06, "loss": 0.0, "step": 1404 }, { "epoch": 0.5455857297900334, "grad_norm": 8.251678809756413e-05, "learning_rate": 8.262296005211722e-06, "loss": 0.0, "step": 1406 }, { "epoch": 0.5463618119092226, "grad_norm": 9.603463695384562e-05, "learning_rate": 8.257483734263682e-06, "loss": 0.0, "step": 1408 }, { "epoch": 0.5471378940284118, "grad_norm": 0.0007246703607961535, "learning_rate": 8.252666214859936e-06, "loss": 0.0, "step": 1410 }, { "epoch": 0.5479139761476011, "grad_norm": 5.550576315727085e-05, "learning_rate": 8.247843454762467e-06, "loss": 0.0, "step": 1412 }, { "epoch": 0.5486900582667904, "grad_norm": 9.284519182983786e-05, "learning_rate": 8.243015461741707e-06, "loss": 0.0, "step": 1414 }, { "epoch": 0.5494661403859796, "grad_norm": 4.02440236939583e-05, "learning_rate": 8.238182243576512e-06, "loss": 0.0, "step": 1416 }, { "epoch": 0.5502422225051689, "grad_norm": 2.5214430934283882e-05, "learning_rate": 8.233343808054159e-06, "loss": 0.0, "step": 1418 }, { "epoch": 0.5510183046243581, "grad_norm": 4.4800672185374424e-05, "learning_rate": 8.228500162970333e-06, "loss": 0.0, "step": 1420 }, { "epoch": 0.5517943867435473, "grad_norm": 0.00010170348105020821, "learning_rate": 8.223651316129115e-06, "loss": 0.0, "step": 1422 }, { "epoch": 0.5525704688627365, "grad_norm": 0.0007546765264123678, "learning_rate": 8.21879727534296e-06, "loss": 0.0, "step": 1424 }, { "epoch": 0.5533465509819258, "grad_norm": 0.0006885923794470727, "learning_rate": 8.213938048432697e-06, "loss": 0.0, "step": 1426 }, { "epoch": 0.554122633101115, "grad_norm": 0.005545514170080423, "learning_rate": 8.20907364322751e-06, "loss": 0.0, "step": 1428 }, { "epoch": 0.5548987152203042, "grad_norm": 7.936695328680798e-05, "learning_rate": 8.204204067564924e-06, "loss": 0.0, "step": 1430 }, { "epoch": 0.5556747973394934, "grad_norm": 4.694041854236275e-05, "learning_rate": 8.199329329290798e-06, "loss": 0.0, "step": 1432 }, { "epoch": 0.5564508794586828, "grad_norm": 6.579761975444853e-05, "learning_rate": 8.194449436259305e-06, "loss": 0.0, "step": 1434 }, { "epoch": 0.557226961577872, "grad_norm": 7.020895282039419e-05, "learning_rate": 8.189564396332927e-06, "loss": 0.0, "step": 1436 }, { "epoch": 0.5580030436970612, "grad_norm": 5.4274041758617386e-05, "learning_rate": 8.184674217382438e-06, "loss": 0.0, "step": 1438 }, { "epoch": 0.5587791258162504, "grad_norm": 4.338437065598555e-05, "learning_rate": 8.179778907286889e-06, "loss": 0.0, "step": 1440 }, { "epoch": 0.5587791258162504, "eval_accuracy": 1.0, "eval_loss": 4.0279860513692256e-07, "eval_runtime": 126.2841, "eval_samples_per_second": 39.593, "eval_steps_per_second": 9.898, "step": 1440 }, { "epoch": 0.5595552079354397, "grad_norm": 4.7420468035852537e-05, "learning_rate": 8.174878473933601e-06, "loss": 0.0, "step": 1442 }, { "epoch": 0.5603312900546289, "grad_norm": 0.0001968707947526127, "learning_rate": 8.16997292521815e-06, "loss": 0.0, "step": 1444 }, { "epoch": 0.5611073721738181, "grad_norm": 5.1431783504085615e-05, "learning_rate": 8.165062269044353e-06, "loss": 0.0, "step": 1446 }, { "epoch": 0.5618834542930073, "grad_norm": 8.569697820348665e-05, "learning_rate": 8.160146513324256e-06, "loss": 0.0, "step": 1448 }, { "epoch": 0.5626595364121966, "grad_norm": 3.671423110063188e-05, "learning_rate": 8.15522566597812e-06, "loss": 0.0, "step": 1450 }, { "epoch": 0.5634356185313859, "grad_norm": 4.679307676269673e-05, "learning_rate": 8.150299734934413e-06, "loss": 0.0, "step": 1452 }, { "epoch": 0.5642117006505751, "grad_norm": 5.9181929827900603e-05, "learning_rate": 8.14536872812979e-06, "loss": 0.0, "step": 1454 }, { "epoch": 0.5649877827697644, "grad_norm": 5.765046444139443e-05, "learning_rate": 8.140432653509089e-06, "loss": 0.0, "step": 1456 }, { "epoch": 0.5657638648889536, "grad_norm": 0.00042973991367034614, "learning_rate": 8.135491519025307e-06, "loss": 0.0, "step": 1458 }, { "epoch": 0.5665399470081428, "grad_norm": 6.0458758525783196e-05, "learning_rate": 8.130545332639599e-06, "loss": 0.0, "step": 1460 }, { "epoch": 0.567316029127332, "grad_norm": 0.00014692865079268813, "learning_rate": 8.125594102321256e-06, "loss": 0.0, "step": 1462 }, { "epoch": 0.5680921112465213, "grad_norm": 0.0005595634575001895, "learning_rate": 8.120637836047698e-06, "loss": 0.0, "step": 1464 }, { "epoch": 0.5688681933657105, "grad_norm": 8.883653208613396e-05, "learning_rate": 8.115676541804456e-06, "loss": 0.0, "step": 1466 }, { "epoch": 0.5696442754848997, "grad_norm": 7.879840995883569e-05, "learning_rate": 8.110710227585169e-06, "loss": 0.0, "step": 1468 }, { "epoch": 0.5704203576040889, "grad_norm": 9.372194472234696e-05, "learning_rate": 8.105738901391553e-06, "loss": 0.0, "step": 1470 }, { "epoch": 0.5711964397232783, "grad_norm": 6.043996472726576e-05, "learning_rate": 8.100762571233409e-06, "loss": 0.0, "step": 1472 }, { "epoch": 0.5719725218424675, "grad_norm": 4.433980575413443e-05, "learning_rate": 8.095781245128598e-06, "loss": 0.0, "step": 1474 }, { "epoch": 0.5727486039616567, "grad_norm": 5.414113911683671e-05, "learning_rate": 8.090794931103026e-06, "loss": 0.0, "step": 1476 }, { "epoch": 0.573524686080846, "grad_norm": 4.483163866098039e-05, "learning_rate": 8.085803637190643e-06, "loss": 0.0, "step": 1478 }, { "epoch": 0.5743007682000352, "grad_norm": 2.6743557100417092e-05, "learning_rate": 8.080807371433415e-06, "loss": 0.0, "step": 1480 }, { "epoch": 0.5743007682000352, "eval_accuracy": 1.0, "eval_loss": 3.8571994309677393e-07, "eval_runtime": 126.078, "eval_samples_per_second": 39.658, "eval_steps_per_second": 9.915, "step": 1480 }, { "epoch": 0.5750768503192244, "grad_norm": 6.0011538153048605e-05, "learning_rate": 8.075806141881327e-06, "loss": 0.0, "step": 1482 }, { "epoch": 0.5758529324384136, "grad_norm": 6.077942089177668e-05, "learning_rate": 8.07079995659235e-06, "loss": 0.0, "step": 1484 }, { "epoch": 0.5766290145576028, "grad_norm": 5.7140096032526344e-05, "learning_rate": 8.065788823632451e-06, "loss": 0.0, "step": 1486 }, { "epoch": 0.5774050966767921, "grad_norm": 0.00021332010510377586, "learning_rate": 8.060772751075564e-06, "loss": 0.0, "step": 1488 }, { "epoch": 0.5781811787959813, "grad_norm": 6.341590778902173e-05, "learning_rate": 8.05575174700358e-06, "loss": 0.0, "step": 1490 }, { "epoch": 0.5789572609151706, "grad_norm": 9.011371730593964e-05, "learning_rate": 8.05072581950634e-06, "loss": 0.0, "step": 1492 }, { "epoch": 0.5797333430343599, "grad_norm": 4.301783337723464e-05, "learning_rate": 8.045694976681613e-06, "loss": 0.0, "step": 1494 }, { "epoch": 0.5805094251535491, "grad_norm": 7.515721517847851e-05, "learning_rate": 8.04065922663509e-06, "loss": 0.0, "step": 1496 }, { "epoch": 0.5812855072727383, "grad_norm": 4.030248601338826e-05, "learning_rate": 8.035618577480369e-06, "loss": 0.0, "step": 1498 }, { "epoch": 0.5820615893919275, "grad_norm": 3.1120103813009337e-05, "learning_rate": 8.030573037338942e-06, "loss": 0.0, "step": 1500 }, { "epoch": 0.5828376715111168, "grad_norm": 7.704844756517559e-05, "learning_rate": 8.025522614340177e-06, "loss": 0.0, "step": 1502 }, { "epoch": 0.583613753630306, "grad_norm": 7.151111640268937e-05, "learning_rate": 8.020467316621316e-06, "loss": 0.0, "step": 1504 }, { "epoch": 0.5843898357494952, "grad_norm": 4.2859497625613585e-05, "learning_rate": 8.015407152327448e-06, "loss": 0.0, "step": 1506 }, { "epoch": 0.5851659178686844, "grad_norm": 0.0025747367180883884, "learning_rate": 8.010342129611508e-06, "loss": 0.0, "step": 1508 }, { "epoch": 0.5859419999878737, "grad_norm": 7.34597779228352e-05, "learning_rate": 8.005272256634257e-06, "loss": 0.0, "step": 1510 }, { "epoch": 0.586718082107063, "grad_norm": 5.048272942076437e-05, "learning_rate": 8.000197541564273e-06, "loss": 0.0, "step": 1512 }, { "epoch": 0.5874941642262522, "grad_norm": 0.00027801268151961267, "learning_rate": 7.99511799257793e-06, "loss": 0.0, "step": 1514 }, { "epoch": 0.5882702463454414, "grad_norm": 5.5441934819100425e-05, "learning_rate": 7.990033617859396e-06, "loss": 0.0, "step": 1516 }, { "epoch": 0.5890463284646307, "grad_norm": 7.506380643462762e-05, "learning_rate": 7.984944425600614e-06, "loss": 0.0, "step": 1518 }, { "epoch": 0.5898224105838199, "grad_norm": 5.7581426517572254e-05, "learning_rate": 7.979850424001283e-06, "loss": 0.0, "step": 1520 }, { "epoch": 0.5898224105838199, "eval_accuracy": 1.0, "eval_loss": 3.4706778251347714e-07, "eval_runtime": 126.5118, "eval_samples_per_second": 39.522, "eval_steps_per_second": 9.88, "step": 1520 }, { "epoch": 0.5905984927030091, "grad_norm": 3.9170179661596194e-05, "learning_rate": 7.97475162126886e-06, "loss": 0.0, "step": 1522 }, { "epoch": 0.5913745748221984, "grad_norm": 4.3280124373268336e-05, "learning_rate": 7.96964802561853e-06, "loss": 0.0, "step": 1524 }, { "epoch": 0.5921506569413876, "grad_norm": 0.00010236985690426081, "learning_rate": 7.964539645273204e-06, "loss": 0.0, "step": 1526 }, { "epoch": 0.5929267390605768, "grad_norm": 0.03516064211726189, "learning_rate": 7.9594264884635e-06, "loss": 0.0, "step": 1528 }, { "epoch": 0.5937028211797661, "grad_norm": 0.00020885800768155605, "learning_rate": 7.954308563427732e-06, "loss": 0.0, "step": 1530 }, { "epoch": 0.5944789032989554, "grad_norm": 4.5101263822289184e-05, "learning_rate": 7.9491858784119e-06, "loss": 0.0, "step": 1532 }, { "epoch": 0.5952549854181446, "grad_norm": 0.00016714293451514095, "learning_rate": 7.944058441669671e-06, "loss": 0.0, "step": 1534 }, { "epoch": 0.5960310675373338, "grad_norm": 0.005708463490009308, "learning_rate": 7.938926261462366e-06, "loss": 0.0, "step": 1536 }, { "epoch": 0.596807149656523, "grad_norm": 4.2953841330017895e-05, "learning_rate": 7.933789346058951e-06, "loss": 0.0, "step": 1538 }, { "epoch": 0.5975832317757123, "grad_norm": 6.88051586621441e-05, "learning_rate": 7.928647703736024e-06, "loss": 0.0, "step": 1540 }, { "epoch": 0.5983593138949015, "grad_norm": 0.00022147178242448717, "learning_rate": 7.923501342777788e-06, "loss": 0.0, "step": 1542 }, { "epoch": 0.5991353960140907, "grad_norm": 5.676286673406139e-05, "learning_rate": 7.918350271476064e-06, "loss": 0.0, "step": 1544 }, { "epoch": 0.5999114781332799, "grad_norm": 0.020044377073645592, "learning_rate": 7.913194498130252e-06, "loss": 0.0, "step": 1546 }, { "epoch": 0.6006875602524692, "grad_norm": 7.415375876007602e-05, "learning_rate": 7.90803403104733e-06, "loss": 0.0, "step": 1548 }, { "epoch": 0.6014636423716585, "grad_norm": 3.634074528235942e-05, "learning_rate": 7.90286887854184e-06, "loss": 0.0, "step": 1550 }, { "epoch": 0.6022397244908477, "grad_norm": 9.296066855313256e-05, "learning_rate": 7.897699048935875e-06, "loss": 0.0, "step": 1552 }, { "epoch": 0.603015806610037, "grad_norm": 3.713753540068865e-05, "learning_rate": 7.892524550559056e-06, "loss": 0.0, "step": 1554 }, { "epoch": 0.6037918887292262, "grad_norm": 7.454553997376934e-05, "learning_rate": 7.887345391748533e-06, "loss": 0.0, "step": 1556 }, { "epoch": 0.6045679708484154, "grad_norm": 0.00012221022916492075, "learning_rate": 7.882161580848966e-06, "loss": 0.0, "step": 1558 }, { "epoch": 0.6053440529676046, "grad_norm": 4.862304194830358e-05, "learning_rate": 7.876973126212507e-06, "loss": 0.0, "step": 1560 }, { "epoch": 0.6053440529676046, "eval_accuracy": 0.999998406374502, "eval_loss": 1.1647991414065473e-05, "eval_runtime": 126.6719, "eval_samples_per_second": 39.472, "eval_steps_per_second": 9.868, "step": 1560 }, { "epoch": 0.6061201350867939, "grad_norm": 0.00022768386406823993, "learning_rate": 7.87178003619879e-06, "loss": 0.0, "step": 1562 }, { "epoch": 0.6068962172059831, "grad_norm": 3.143977664876729e-05, "learning_rate": 7.866582319174918e-06, "loss": 0.0, "step": 1564 }, { "epoch": 0.6076722993251723, "grad_norm": 5.5969627283047885e-05, "learning_rate": 7.861379983515449e-06, "loss": 0.0, "step": 1566 }, { "epoch": 0.6084483814443615, "grad_norm": 0.0006040379521436989, "learning_rate": 7.856173037602383e-06, "loss": 0.0, "step": 1568 }, { "epoch": 0.6092244635635509, "grad_norm": 4.633105345419608e-05, "learning_rate": 7.85096148982515e-06, "loss": 0.0, "step": 1570 }, { "epoch": 0.6100005456827401, "grad_norm": 8.987903856905177e-05, "learning_rate": 7.845745348580592e-06, "loss": 0.0, "step": 1572 }, { "epoch": 0.6107766278019293, "grad_norm": 4.60273731732741e-05, "learning_rate": 7.840524622272949e-06, "loss": 0.0, "step": 1574 }, { "epoch": 0.6115527099211185, "grad_norm": 4.6285938879009336e-05, "learning_rate": 7.835299319313854e-06, "loss": 0.0, "step": 1576 }, { "epoch": 0.6123287920403078, "grad_norm": 3.3300537324976176e-05, "learning_rate": 7.830069448122313e-06, "loss": 0.0, "step": 1578 }, { "epoch": 0.613104874159497, "grad_norm": 5.020232856622897e-05, "learning_rate": 7.82483501712469e-06, "loss": 0.0, "step": 1580 }, { "epoch": 0.6138809562786862, "grad_norm": 4.64909608126618e-05, "learning_rate": 7.819596034754696e-06, "loss": 0.0, "step": 1582 }, { "epoch": 0.6146570383978754, "grad_norm": 5.460707689053379e-05, "learning_rate": 7.81435250945338e-06, "loss": 0.0, "step": 1584 }, { "epoch": 0.6154331205170647, "grad_norm": 5.152116500539705e-05, "learning_rate": 7.8091044496691e-06, "loss": 0.0, "step": 1586 }, { "epoch": 0.6162092026362539, "grad_norm": 4.749984509544447e-05, "learning_rate": 7.803851863857533e-06, "loss": 0.0, "step": 1588 }, { "epoch": 0.6169852847554432, "grad_norm": 0.0002174829423893243, "learning_rate": 7.798594760481639e-06, "loss": 0.0, "step": 1590 }, { "epoch": 0.6177613668746325, "grad_norm": 9.113615669775754e-05, "learning_rate": 7.793333148011658e-06, "loss": 0.0, "step": 1592 }, { "epoch": 0.6185374489938217, "grad_norm": 0.00023632499505765736, "learning_rate": 7.7880670349251e-06, "loss": 0.0, "step": 1594 }, { "epoch": 0.6193135311130109, "grad_norm": 3.2929434382822365e-05, "learning_rate": 7.782796429706721e-06, "loss": 0.0, "step": 1596 }, { "epoch": 0.6200896132322001, "grad_norm": 5.9075075114378706e-05, "learning_rate": 7.777521340848515e-06, "loss": 0.0, "step": 1598 }, { "epoch": 0.6208656953513894, "grad_norm": 7.67604069551453e-05, "learning_rate": 7.772241776849705e-06, "loss": 0.0001, "step": 1600 }, { "epoch": 0.6208656953513894, "eval_accuracy": 1.0, "eval_loss": 1.5241286064338055e-06, "eval_runtime": 126.4724, "eval_samples_per_second": 39.534, "eval_steps_per_second": 9.884, "step": 1600 }, { "epoch": 0.6216417774705786, "grad_norm": 4.4303851609583944e-05, "learning_rate": 7.76695774621672e-06, "loss": 0.0, "step": 1602 }, { "epoch": 0.6224178595897678, "grad_norm": 0.0007986262789927423, "learning_rate": 7.761669257463188e-06, "loss": 0.0, "step": 1604 }, { "epoch": 0.623193941708957, "grad_norm": 0.000619521364569664, "learning_rate": 7.756376319109917e-06, "loss": 0.0, "step": 1606 }, { "epoch": 0.6239700238281464, "grad_norm": 0.2434842884540558, "learning_rate": 7.751078939684886e-06, "loss": 0.0002, "step": 1608 }, { "epoch": 0.6247461059473356, "grad_norm": 0.07336213439702988, "learning_rate": 7.74577712772323e-06, "loss": 0.0001, "step": 1610 }, { "epoch": 0.6255221880665248, "grad_norm": 0.0007875768351368606, "learning_rate": 7.740470891767225e-06, "loss": 0.0, "step": 1612 }, { "epoch": 0.626298270185714, "grad_norm": 0.03258689120411873, "learning_rate": 7.735160240366276e-06, "loss": 0.0, "step": 1614 }, { "epoch": 0.6270743523049033, "grad_norm": 0.0002722474164329469, "learning_rate": 7.729845182076896e-06, "loss": 0.0, "step": 1616 }, { "epoch": 0.6278504344240925, "grad_norm": 0.000947869208175689, "learning_rate": 7.72452572546271e-06, "loss": 0.0, "step": 1618 }, { "epoch": 0.6286265165432817, "grad_norm": 0.007816344499588013, "learning_rate": 7.71920187909442e-06, "loss": 0.0, "step": 1620 }, { "epoch": 0.629402598662471, "grad_norm": 0.0013816261198371649, "learning_rate": 7.713873651549805e-06, "loss": 0.0, "step": 1622 }, { "epoch": 0.6301786807816602, "grad_norm": 0.003947379998862743, "learning_rate": 7.7085410514137e-06, "loss": 0.0, "step": 1624 }, { "epoch": 0.6309547629008494, "grad_norm": 0.3224280774593353, "learning_rate": 7.703204087277989e-06, "loss": 0.0004, "step": 1626 }, { "epoch": 0.6317308450200387, "grad_norm": 0.00790658313781023, "learning_rate": 7.697862767741584e-06, "loss": 0.0, "step": 1628 }, { "epoch": 0.632506927139228, "grad_norm": 0.0011794422753155231, "learning_rate": 7.692517101410414e-06, "loss": 0.0, "step": 1630 }, { "epoch": 0.6332830092584172, "grad_norm": 0.17586196959018707, "learning_rate": 7.68716709689742e-06, "loss": 0.0002, "step": 1632 }, { "epoch": 0.6340590913776064, "grad_norm": 0.015726491808891296, "learning_rate": 7.681812762822517e-06, "loss": 0.0, "step": 1634 }, { "epoch": 0.6348351734967956, "grad_norm": 0.01240396685898304, "learning_rate": 7.676454107812608e-06, "loss": 0.0, "step": 1636 }, { "epoch": 0.6356112556159849, "grad_norm": 0.004802124574780464, "learning_rate": 7.671091140501557e-06, "loss": 0.0, "step": 1638 }, { "epoch": 0.6363873377351741, "grad_norm": 0.11473255604505539, "learning_rate": 7.66572386953017e-06, "loss": 0.0003, "step": 1640 }, { "epoch": 0.6363873377351741, "eval_accuracy": 0.999995579606225, "eval_loss": 2.3172529836301692e-05, "eval_runtime": 127.4263, "eval_samples_per_second": 39.238, "eval_steps_per_second": 9.81, "step": 1640 }, { "epoch": 0.6371634198543633, "grad_norm": 0.00966943334788084, "learning_rate": 7.660352303546192e-06, "loss": 0.0, "step": 1642 }, { "epoch": 0.6379395019735525, "grad_norm": 0.002207924146205187, "learning_rate": 7.654976451204288e-06, "loss": 0.0, "step": 1644 }, { "epoch": 0.6387155840927418, "grad_norm": 0.05040845647454262, "learning_rate": 7.649596321166024e-06, "loss": 0.0, "step": 1646 }, { "epoch": 0.6394916662119311, "grad_norm": 0.16841939091682434, "learning_rate": 7.644211922099867e-06, "loss": 0.0002, "step": 1648 }, { "epoch": 0.6402677483311203, "grad_norm": 0.002209783298894763, "learning_rate": 7.638823262681155e-06, "loss": 0.0, "step": 1650 }, { "epoch": 0.6410438304503095, "grad_norm": 0.12204953283071518, "learning_rate": 7.633430351592093e-06, "loss": 0.0001, "step": 1652 }, { "epoch": 0.6418199125694988, "grad_norm": 0.10411413758993149, "learning_rate": 7.6280331975217356e-06, "loss": 0.0001, "step": 1654 }, { "epoch": 0.642595994688688, "grad_norm": 0.09621908515691757, "learning_rate": 7.622631809165972e-06, "loss": 0.0006, "step": 1656 }, { "epoch": 0.6433720768078772, "grad_norm": 0.10735712945461273, "learning_rate": 7.617226195227518e-06, "loss": 0.0001, "step": 1658 }, { "epoch": 0.6441481589270664, "grad_norm": 0.006281985901296139, "learning_rate": 7.611816364415896e-06, "loss": 0.0002, "step": 1660 }, { "epoch": 0.6449242410462557, "grad_norm": 0.2644466459751129, "learning_rate": 7.606402325447421e-06, "loss": 0.0004, "step": 1662 }, { "epoch": 0.6457003231654449, "grad_norm": 0.09610844403505325, "learning_rate": 7.600984087045187e-06, "loss": 0.0001, "step": 1664 }, { "epoch": 0.6464764052846341, "grad_norm": 0.05891406908631325, "learning_rate": 7.595561657939061e-06, "loss": 0.0001, "step": 1666 }, { "epoch": 0.6472524874038235, "grad_norm": 0.019790878519415855, "learning_rate": 7.590135046865652e-06, "loss": 0.0001, "step": 1668 }, { "epoch": 0.6480285695230127, "grad_norm": 0.0463184118270874, "learning_rate": 7.584704262568315e-06, "loss": 0.0002, "step": 1670 }, { "epoch": 0.6488046516422019, "grad_norm": 0.09230075031518936, "learning_rate": 7.579269313797126e-06, "loss": 0.0, "step": 1672 }, { "epoch": 0.6495807337613911, "grad_norm": 0.05759420990943909, "learning_rate": 7.573830209308872e-06, "loss": 0.0006, "step": 1674 }, { "epoch": 0.6503568158805804, "grad_norm": 0.009661559015512466, "learning_rate": 7.568386957867033e-06, "loss": 0.0, "step": 1676 }, { "epoch": 0.6511328979997696, "grad_norm": 0.16799117624759674, "learning_rate": 7.562939568241772e-06, "loss": 0.0008, "step": 1678 }, { "epoch": 0.6519089801189588, "grad_norm": 0.09230285882949829, "learning_rate": 7.557488049209921e-06, "loss": 0.0004, "step": 1680 }, { "epoch": 0.6519089801189588, "eval_accuracy": 0.9998690327564542, "eval_loss": 0.0003383007424417883, "eval_runtime": 126.8809, "eval_samples_per_second": 39.407, "eval_steps_per_second": 9.852, "step": 1680 }, { "epoch": 0.652685062238148, "grad_norm": 0.22043685615062714, "learning_rate": 7.552032409554963e-06, "loss": 0.0003, "step": 1682 }, { "epoch": 0.6534611443573373, "grad_norm": 0.01799617148935795, "learning_rate": 7.546572658067022e-06, "loss": 0.0002, "step": 1684 }, { "epoch": 0.6542372264765266, "grad_norm": 0.02793813869357109, "learning_rate": 7.541108803542846e-06, "loss": 0.0003, "step": 1686 }, { "epoch": 0.6550133085957158, "grad_norm": 0.03266725316643715, "learning_rate": 7.535640854785793e-06, "loss": 0.0001, "step": 1688 }, { "epoch": 0.655789390714905, "grad_norm": 0.04410627484321594, "learning_rate": 7.530168820605819e-06, "loss": 0.0001, "step": 1690 }, { "epoch": 0.6565654728340943, "grad_norm": 0.047289930284023285, "learning_rate": 7.5246927098194636e-06, "loss": 0.0001, "step": 1692 }, { "epoch": 0.6573415549532835, "grad_norm": 0.027208736166357994, "learning_rate": 7.51921253124983e-06, "loss": 0.0001, "step": 1694 }, { "epoch": 0.6581176370724727, "grad_norm": 0.1260233372449875, "learning_rate": 7.5137282937265796e-06, "loss": 0.0003, "step": 1696 }, { "epoch": 0.658893719191662, "grad_norm": 0.02470492385327816, "learning_rate": 7.508240006085914e-06, "loss": 0.0001, "step": 1698 }, { "epoch": 0.6596698013108512, "grad_norm": 0.14346683025360107, "learning_rate": 7.502747677170556e-06, "loss": 0.0001, "step": 1700 }, { "epoch": 0.6604458834300404, "grad_norm": 0.005949016194790602, "learning_rate": 7.497251315829744e-06, "loss": 0.0002, "step": 1702 }, { "epoch": 0.6612219655492296, "grad_norm": 0.019219020381569862, "learning_rate": 7.4917509309192125e-06, "loss": 0.0, "step": 1704 }, { "epoch": 0.661998047668419, "grad_norm": 0.026751527562737465, "learning_rate": 7.486246531301178e-06, "loss": 0.0001, "step": 1706 }, { "epoch": 0.6627741297876082, "grad_norm": 0.0762225091457367, "learning_rate": 7.480738125844322e-06, "loss": 0.0002, "step": 1708 }, { "epoch": 0.6635502119067974, "grad_norm": 0.011667470447719097, "learning_rate": 7.475225723423789e-06, "loss": 0.0006, "step": 1710 }, { "epoch": 0.6643262940259866, "grad_norm": 0.258180171251297, "learning_rate": 7.469709332921155e-06, "loss": 0.0002, "step": 1712 }, { "epoch": 0.6651023761451759, "grad_norm": 0.010778096504509449, "learning_rate": 7.464188963224428e-06, "loss": 0.0003, "step": 1714 }, { "epoch": 0.6658784582643651, "grad_norm": 0.043908413499593735, "learning_rate": 7.45866462322802e-06, "loss": 0.0001, "step": 1716 }, { "epoch": 0.6666545403835543, "grad_norm": 0.11645996570587158, "learning_rate": 7.453136321832746e-06, "loss": 0.0002, "step": 1718 }, { "epoch": 0.6674306225027435, "grad_norm": 0.14430993795394897, "learning_rate": 7.447604067945803e-06, "loss": 0.0002, "step": 1720 }, { "epoch": 0.6674306225027435, "eval_accuracy": 0.9999496007737516, "eval_loss": 0.00019341686856932938, "eval_runtime": 133.0924, "eval_samples_per_second": 37.568, "eval_steps_per_second": 9.392, "step": 1720 }, { "epoch": 0.6682067046219328, "grad_norm": 0.016881542280316353, "learning_rate": 7.442067870480752e-06, "loss": 0.0001, "step": 1722 }, { "epoch": 0.668982786741122, "grad_norm": 0.12716498970985413, "learning_rate": 7.436527738357514e-06, "loss": 0.0002, "step": 1724 }, { "epoch": 0.6697588688603113, "grad_norm": 0.005419870838522911, "learning_rate": 7.430983680502344e-06, "loss": 0.0002, "step": 1726 }, { "epoch": 0.6705349509795006, "grad_norm": 0.021987633779644966, "learning_rate": 7.425435705847825e-06, "loss": 0.0001, "step": 1728 }, { "epoch": 0.6713110330986898, "grad_norm": 0.01175680197775364, "learning_rate": 7.419883823332851e-06, "loss": 0.0001, "step": 1730 }, { "epoch": 0.672087115217879, "grad_norm": 0.03698015958070755, "learning_rate": 7.414328041902611e-06, "loss": 0.0, "step": 1732 }, { "epoch": 0.6728631973370682, "grad_norm": 0.07087601721286774, "learning_rate": 7.408768370508577e-06, "loss": 0.0002, "step": 1734 }, { "epoch": 0.6736392794562575, "grad_norm": 0.005914826411753893, "learning_rate": 7.403204818108487e-06, "loss": 0.0, "step": 1736 }, { "epoch": 0.6744153615754467, "grad_norm": 0.009205642156302929, "learning_rate": 7.397637393666333e-06, "loss": 0.0, "step": 1738 }, { "epoch": 0.6751914436946359, "grad_norm": 0.0026831270661205053, "learning_rate": 7.392066106152347e-06, "loss": 0.0, "step": 1740 }, { "epoch": 0.6759675258138251, "grad_norm": 0.003940435126423836, "learning_rate": 7.386490964542983e-06, "loss": 0.0, "step": 1742 }, { "epoch": 0.6767436079330144, "grad_norm": 0.013651663437485695, "learning_rate": 7.380911977820907e-06, "loss": 0.0, "step": 1744 }, { "epoch": 0.6775196900522037, "grad_norm": 0.0022897187154740095, "learning_rate": 7.3753291549749764e-06, "loss": 0.0, "step": 1746 }, { "epoch": 0.6782957721713929, "grad_norm": 0.006075785495340824, "learning_rate": 7.369742505000232e-06, "loss": 0.0, "step": 1748 }, { "epoch": 0.6790718542905821, "grad_norm": 0.001669196761213243, "learning_rate": 7.364152036897883e-06, "loss": 0.0, "step": 1750 }, { "epoch": 0.6798479364097714, "grad_norm": 0.0020326990634202957, "learning_rate": 7.358557759675284e-06, "loss": 0.0, "step": 1752 }, { "epoch": 0.6806240185289606, "grad_norm": 0.002198468893766403, "learning_rate": 7.352959682345936e-06, "loss": 0.0, "step": 1754 }, { "epoch": 0.6814001006481498, "grad_norm": 0.01640300825238228, "learning_rate": 7.347357813929455e-06, "loss": 0.0, "step": 1756 }, { "epoch": 0.682176182767339, "grad_norm": 0.0019347390625625849, "learning_rate": 7.341752163451568e-06, "loss": 0.0, "step": 1758 }, { "epoch": 0.6829522648865283, "grad_norm": 0.049371421337127686, "learning_rate": 7.3361427399440945e-06, "loss": 0.0, "step": 1760 }, { "epoch": 0.6829522648865283, "eval_accuracy": 0.9999949435028248, "eval_loss": 2.500028676877264e-05, "eval_runtime": 131.8111, "eval_samples_per_second": 37.933, "eval_steps_per_second": 9.483, "step": 1760 }, { "epoch": 0.6837283470057175, "grad_norm": 0.003058637725189328, "learning_rate": 7.330529552444934e-06, "loss": 0.0, "step": 1762 }, { "epoch": 0.6845044291249068, "grad_norm": 0.0005531050264835358, "learning_rate": 7.324912609998054e-06, "loss": 0.0, "step": 1764 }, { "epoch": 0.6852805112440961, "grad_norm": 0.030952438712120056, "learning_rate": 7.319291921653464e-06, "loss": 0.0, "step": 1766 }, { "epoch": 0.6860565933632853, "grad_norm": 0.08425784856081009, "learning_rate": 7.313667496467216e-06, "loss": 0.0001, "step": 1768 }, { "epoch": 0.6868326754824745, "grad_norm": 0.002320807194337249, "learning_rate": 7.308039343501381e-06, "loss": 0.0, "step": 1770 }, { "epoch": 0.6876087576016637, "grad_norm": 0.00026889421860687435, "learning_rate": 7.302407471824034e-06, "loss": 0.0, "step": 1772 }, { "epoch": 0.688384839720853, "grad_norm": 0.0003629926359280944, "learning_rate": 7.296771890509242e-06, "loss": 0.0, "step": 1774 }, { "epoch": 0.6891609218400422, "grad_norm": 0.00028336691320873797, "learning_rate": 7.291132608637053e-06, "loss": 0.0, "step": 1776 }, { "epoch": 0.6899370039592314, "grad_norm": 0.000375633651856333, "learning_rate": 7.285489635293472e-06, "loss": 0.0, "step": 1778 }, { "epoch": 0.6907130860784206, "grad_norm": 0.001007127808406949, "learning_rate": 7.279842979570454e-06, "loss": 0.0, "step": 1780 }, { "epoch": 0.6914891681976099, "grad_norm": 0.00027274570311419666, "learning_rate": 7.27419265056589e-06, "loss": 0.0, "step": 1782 }, { "epoch": 0.6922652503167992, "grad_norm": 0.0005685545038431883, "learning_rate": 7.268538657383581e-06, "loss": 0.0, "step": 1784 }, { "epoch": 0.6930413324359884, "grad_norm": 0.007831143215298653, "learning_rate": 7.262881009133242e-06, "loss": 0.0, "step": 1786 }, { "epoch": 0.6938174145551776, "grad_norm": 0.008233286440372467, "learning_rate": 7.2572197149304715e-06, "loss": 0.0004, "step": 1788 }, { "epoch": 0.6945934966743669, "grad_norm": 0.0013201547553762794, "learning_rate": 7.251554783896741e-06, "loss": 0.0, "step": 1790 }, { "epoch": 0.6953695787935561, "grad_norm": 0.0009992806008085608, "learning_rate": 7.245886225159386e-06, "loss": 0.0, "step": 1792 }, { "epoch": 0.6961456609127453, "grad_norm": 0.00044993869960308075, "learning_rate": 7.240214047851583e-06, "loss": 0.0, "step": 1794 }, { "epoch": 0.6969217430319345, "grad_norm": 0.023531010374426842, "learning_rate": 7.234538261112342e-06, "loss": 0.0, "step": 1796 }, { "epoch": 0.6976978251511238, "grad_norm": 0.0028708255849778652, "learning_rate": 7.2288588740864855e-06, "loss": 0.0, "step": 1798 }, { "epoch": 0.698473907270313, "grad_norm": 0.0005221987375989556, "learning_rate": 7.223175895924638e-06, "loss": 0.0, "step": 1800 }, { "epoch": 0.698473907270313, "eval_accuracy": 0.9999992031872509, "eval_loss": 5.176351805857848e-06, "eval_runtime": 132.3731, "eval_samples_per_second": 37.772, "eval_steps_per_second": 9.443, "step": 1800 }, { "epoch": 0.6992499893895022, "grad_norm": 0.000758216658141464, "learning_rate": 7.217489335783212e-06, "loss": 0.0, "step": 1802 }, { "epoch": 0.7000260715086916, "grad_norm": 0.0005488329334184527, "learning_rate": 7.211799202824389e-06, "loss": 0.0, "step": 1804 }, { "epoch": 0.7008021536278808, "grad_norm": 0.0038576379884034395, "learning_rate": 7.206105506216107e-06, "loss": 0.0, "step": 1806 }, { "epoch": 0.70157823574707, "grad_norm": 0.0004235364613123238, "learning_rate": 7.200408255132046e-06, "loss": 0.0, "step": 1808 }, { "epoch": 0.7023543178662592, "grad_norm": 0.00023026720737107098, "learning_rate": 7.194707458751615e-06, "loss": 0.0, "step": 1810 }, { "epoch": 0.7031303999854485, "grad_norm": 0.0003047767677344382, "learning_rate": 7.189003126259932e-06, "loss": 0.0, "step": 1812 }, { "epoch": 0.7039064821046377, "grad_norm": 0.0005498046521097422, "learning_rate": 7.1832952668478155e-06, "loss": 0.0, "step": 1814 }, { "epoch": 0.7046825642238269, "grad_norm": 0.0020994949154555798, "learning_rate": 7.177583889711763e-06, "loss": 0.0, "step": 1816 }, { "epoch": 0.7054586463430161, "grad_norm": 0.00045528257032856345, "learning_rate": 7.1718690040539404e-06, "loss": 0.0, "step": 1818 }, { "epoch": 0.7062347284622054, "grad_norm": 0.0027946773916482925, "learning_rate": 7.166150619082171e-06, "loss": 0.0, "step": 1820 }, { "epoch": 0.7070108105813946, "grad_norm": 0.0007769826916046441, "learning_rate": 7.160428744009913e-06, "loss": 0.0, "step": 1822 }, { "epoch": 0.7077868927005839, "grad_norm": 0.0002779439673759043, "learning_rate": 7.154703388056246e-06, "loss": 0.0, "step": 1824 }, { "epoch": 0.7085629748197732, "grad_norm": 0.0034867366775870323, "learning_rate": 7.148974560445859e-06, "loss": 0.0, "step": 1826 }, { "epoch": 0.7093390569389624, "grad_norm": 0.0006701332167722285, "learning_rate": 7.143242270409039e-06, "loss": 0.0, "step": 1828 }, { "epoch": 0.7101151390581516, "grad_norm": 0.0007799357990734279, "learning_rate": 7.137506527181643e-06, "loss": 0.0, "step": 1830 }, { "epoch": 0.7108912211773408, "grad_norm": 0.00032961563556455076, "learning_rate": 7.131767340005102e-06, "loss": 0.0, "step": 1832 }, { "epoch": 0.71166730329653, "grad_norm": 0.0006901177694089711, "learning_rate": 7.126024718126388e-06, "loss": 0.0, "step": 1834 }, { "epoch": 0.7124433854157193, "grad_norm": 0.002607217989861965, "learning_rate": 7.12027867079801e-06, "loss": 0.0, "step": 1836 }, { "epoch": 0.7132194675349085, "grad_norm": 0.0003781074483413249, "learning_rate": 7.114529207277996e-06, "loss": 0.0, "step": 1838 }, { "epoch": 0.7139955496540977, "grad_norm": 0.0009922360768541694, "learning_rate": 7.1087763368298764e-06, "loss": 0.0, "step": 1840 }, { "epoch": 0.7139955496540977, "eval_accuracy": 0.9999992031872509, "eval_loss": 2.5917022412613733e-06, "eval_runtime": 131.2473, "eval_samples_per_second": 38.096, "eval_steps_per_second": 9.524, "step": 1840 }, { "epoch": 0.7147716317732871, "grad_norm": 0.0003326364967506379, "learning_rate": 7.103020068722675e-06, "loss": 0.0, "step": 1842 }, { "epoch": 0.7155477138924763, "grad_norm": 0.00019628361042123288, "learning_rate": 7.0972604122308865e-06, "loss": 0.0, "step": 1844 }, { "epoch": 0.7163237960116655, "grad_norm": 0.00039899934199638665, "learning_rate": 7.0914973766344645e-06, "loss": 0.0, "step": 1846 }, { "epoch": 0.7170998781308547, "grad_norm": 0.001567089930176735, "learning_rate": 7.085730971218809e-06, "loss": 0.0, "step": 1848 }, { "epoch": 0.717875960250044, "grad_norm": 0.0020182374864816666, "learning_rate": 7.079961205274749e-06, "loss": 0.0, "step": 1850 }, { "epoch": 0.7186520423692332, "grad_norm": 0.0002532765211071819, "learning_rate": 7.074188088098528e-06, "loss": 0.0, "step": 1852 }, { "epoch": 0.7194281244884224, "grad_norm": 0.00031112690339796245, "learning_rate": 7.0684116289917885e-06, "loss": 0.0, "step": 1854 }, { "epoch": 0.7202042066076116, "grad_norm": 0.0002947974717244506, "learning_rate": 7.062631837261556e-06, "loss": 0.0, "step": 1856 }, { "epoch": 0.7209802887268009, "grad_norm": 0.00020873638277407736, "learning_rate": 7.05684872222023e-06, "loss": 0.0, "step": 1858 }, { "epoch": 0.7217563708459901, "grad_norm": 0.00025417693541385233, "learning_rate": 7.05106229318556e-06, "loss": 0.0, "step": 1860 }, { "epoch": 0.7225324529651794, "grad_norm": 0.004179807845503092, "learning_rate": 7.045272559480636e-06, "loss": 0.0, "step": 1862 }, { "epoch": 0.7233085350843687, "grad_norm": 0.00014824530808255076, "learning_rate": 7.039479530433875e-06, "loss": 0.0, "step": 1864 }, { "epoch": 0.7240846172035579, "grad_norm": 0.00033756025368347764, "learning_rate": 7.033683215379002e-06, "loss": 0.0, "step": 1866 }, { "epoch": 0.7248606993227471, "grad_norm": 0.00023106423032004386, "learning_rate": 7.027883623655035e-06, "loss": 0.0, "step": 1868 }, { "epoch": 0.7256367814419363, "grad_norm": 0.0001783316838555038, "learning_rate": 7.022080764606272e-06, "loss": 0.0, "step": 1870 }, { "epoch": 0.7264128635611256, "grad_norm": 0.00014225227641873062, "learning_rate": 7.016274647582276e-06, "loss": 0.0, "step": 1872 }, { "epoch": 0.7271889456803148, "grad_norm": 0.00034868030343204737, "learning_rate": 7.010465281937859e-06, "loss": 0.0, "step": 1874 }, { "epoch": 0.727965027799504, "grad_norm": 0.0002338520425837487, "learning_rate": 7.004652677033069e-06, "loss": 0.0, "step": 1876 }, { "epoch": 0.7287411099186932, "grad_norm": 0.00016042635252233595, "learning_rate": 6.99883684223317e-06, "loss": 0.0, "step": 1878 }, { "epoch": 0.7295171920378825, "grad_norm": 0.00019118768977932632, "learning_rate": 6.993017786908631e-06, "loss": 0.0, "step": 1880 }, { "epoch": 0.7295171920378825, "eval_accuracy": 1.0, "eval_loss": 1.2011416856694268e-06, "eval_runtime": 129.1183, "eval_samples_per_second": 38.724, "eval_steps_per_second": 9.681, "step": 1880 }, { "epoch": 0.7302932741570718, "grad_norm": 0.0001150187526945956, "learning_rate": 6.9871955204351094e-06, "loss": 0.0, "step": 1882 }, { "epoch": 0.731069356276261, "grad_norm": 0.00026088356389664114, "learning_rate": 6.9813700521934394e-06, "loss": 0.0, "step": 1884 }, { "epoch": 0.7318454383954502, "grad_norm": 0.00018814741633832455, "learning_rate": 6.9755413915696105e-06, "loss": 0.0, "step": 1886 }, { "epoch": 0.7326215205146395, "grad_norm": 0.0003330265171825886, "learning_rate": 6.9697095479547564e-06, "loss": 0.0, "step": 1888 }, { "epoch": 0.7333976026338287, "grad_norm": 0.0002486956655047834, "learning_rate": 6.963874530745141e-06, "loss": 0.0, "step": 1890 }, { "epoch": 0.7341736847530179, "grad_norm": 0.0003181362117175013, "learning_rate": 6.95803634934214e-06, "loss": 0.0, "step": 1892 }, { "epoch": 0.7349497668722071, "grad_norm": 0.00022188774892129004, "learning_rate": 6.952195013152227e-06, "loss": 0.0, "step": 1894 }, { "epoch": 0.7357258489913964, "grad_norm": 0.00041119265370070934, "learning_rate": 6.946350531586959e-06, "loss": 0.0, "step": 1896 }, { "epoch": 0.7365019311105856, "grad_norm": 0.00017194538668263704, "learning_rate": 6.940502914062961e-06, "loss": 0.0, "step": 1898 }, { "epoch": 0.7372780132297748, "grad_norm": 9.44953499129042e-05, "learning_rate": 6.934652170001911e-06, "loss": 0.0, "step": 1900 }, { "epoch": 0.7380540953489642, "grad_norm": 0.00015205293311737478, "learning_rate": 6.928798308830524e-06, "loss": 0.0, "step": 1902 }, { "epoch": 0.7388301774681534, "grad_norm": 0.00014523952268064022, "learning_rate": 6.922941339980538e-06, "loss": 0.0, "step": 1904 }, { "epoch": 0.7396062595873426, "grad_norm": 0.00016248523024842143, "learning_rate": 6.917081272888697e-06, "loss": 0.0, "step": 1906 }, { "epoch": 0.7403823417065318, "grad_norm": 6.581814523087814e-05, "learning_rate": 6.911218116996738e-06, "loss": 0.0, "step": 1908 }, { "epoch": 0.7411584238257211, "grad_norm": 0.0001226611202582717, "learning_rate": 6.905351881751372e-06, "loss": 0.0, "step": 1910 }, { "epoch": 0.7419345059449103, "grad_norm": 0.0001127978612203151, "learning_rate": 6.899482576604275e-06, "loss": 0.0, "step": 1912 }, { "epoch": 0.7427105880640995, "grad_norm": 0.0001167935406556353, "learning_rate": 6.893610211012067e-06, "loss": 0.0, "step": 1914 }, { "epoch": 0.7434866701832887, "grad_norm": 0.011720952577888966, "learning_rate": 6.887734794436301e-06, "loss": 0.0, "step": 1916 }, { "epoch": 0.744262752302478, "grad_norm": 0.00018908412312157452, "learning_rate": 6.881856336343442e-06, "loss": 0.0, "step": 1918 }, { "epoch": 0.7450388344216673, "grad_norm": 0.000153981483890675, "learning_rate": 6.8759748462048595e-06, "loss": 0.0, "step": 1920 }, { "epoch": 0.7450388344216673, "eval_accuracy": 1.0, "eval_loss": 1.1019168368875398e-06, "eval_runtime": 130.6974, "eval_samples_per_second": 38.256, "eval_steps_per_second": 9.564, "step": 1920 }, { "epoch": 0.7458149165408565, "grad_norm": 0.00013338137068785727, "learning_rate": 6.870090333496807e-06, "loss": 0.0, "step": 1922 }, { "epoch": 0.7465909986600457, "grad_norm": 0.00016509337001480162, "learning_rate": 6.864202807700407e-06, "loss": 0.0, "step": 1924 }, { "epoch": 0.747367080779235, "grad_norm": 8.096831879811361e-05, "learning_rate": 6.858312278301638e-06, "loss": 0.0, "step": 1926 }, { "epoch": 0.7481431628984242, "grad_norm": 9.679955837782472e-05, "learning_rate": 6.852418754791317e-06, "loss": 0.0, "step": 1928 }, { "epoch": 0.7489192450176134, "grad_norm": 0.00014151426148600876, "learning_rate": 6.8465222466650835e-06, "loss": 0.0, "step": 1930 }, { "epoch": 0.7496953271368026, "grad_norm": 0.0002099966222885996, "learning_rate": 6.840622763423391e-06, "loss": 0.0, "step": 1932 }, { "epoch": 0.7504714092559919, "grad_norm": 7.767569331917912e-05, "learning_rate": 6.83472031457148e-06, "loss": 0.0, "step": 1934 }, { "epoch": 0.7512474913751811, "grad_norm": 0.000112774723675102, "learning_rate": 6.828814909619374e-06, "loss": 0.0, "step": 1936 }, { "epoch": 0.7520235734943703, "grad_norm": 0.000226987773203291, "learning_rate": 6.822906558081856e-06, "loss": 0.0, "step": 1938 }, { "epoch": 0.7527996556135597, "grad_norm": 0.00021889850904699415, "learning_rate": 6.81699526947846e-06, "loss": 0.0, "step": 1940 }, { "epoch": 0.7535757377327489, "grad_norm": 8.806942059891298e-05, "learning_rate": 6.81108105333345e-06, "loss": 0.0, "step": 1942 }, { "epoch": 0.7543518198519381, "grad_norm": 0.00015354371862486005, "learning_rate": 6.8051639191758065e-06, "loss": 0.0, "step": 1944 }, { "epoch": 0.7551279019711273, "grad_norm": 8.168874774128199e-05, "learning_rate": 6.799243876539213e-06, "loss": 0.0, "step": 1946 }, { "epoch": 0.7559039840903166, "grad_norm": 0.00012035737745463848, "learning_rate": 6.793320934962039e-06, "loss": 0.0, "step": 1948 }, { "epoch": 0.7566800662095058, "grad_norm": 0.00016538999625481665, "learning_rate": 6.787395103987323e-06, "loss": 0.0, "step": 1950 }, { "epoch": 0.757456148328695, "grad_norm": 0.0001929654972627759, "learning_rate": 6.781466393162761e-06, "loss": 0.0, "step": 1952 }, { "epoch": 0.7582322304478842, "grad_norm": 0.00014818822091910988, "learning_rate": 6.775534812040686e-06, "loss": 0.0, "step": 1954 }, { "epoch": 0.7590083125670735, "grad_norm": 0.00010574848420219496, "learning_rate": 6.76960037017806e-06, "loss": 0.0, "step": 1956 }, { "epoch": 0.7597843946862627, "grad_norm": 0.00020344011136330664, "learning_rate": 6.763663077136451e-06, "loss": 0.0, "step": 1958 }, { "epoch": 0.760560476805452, "grad_norm": 0.00011606552288867533, "learning_rate": 6.757722942482022e-06, "loss": 0.0, "step": 1960 }, { "epoch": 0.760560476805452, "eval_accuracy": 1.0, "eval_loss": 1.0638005960572627e-06, "eval_runtime": 129.1205, "eval_samples_per_second": 38.724, "eval_steps_per_second": 9.681, "step": 1960 }, { "epoch": 0.7613365589246412, "grad_norm": 0.00011785321112256497, "learning_rate": 6.751779975785515e-06, "loss": 0.0, "step": 1962 }, { "epoch": 0.7621126410438305, "grad_norm": 0.00010330312215955928, "learning_rate": 6.745834186622232e-06, "loss": 0.0, "step": 1964 }, { "epoch": 0.7628887231630197, "grad_norm": 9.924966434482485e-05, "learning_rate": 6.739885584572026e-06, "loss": 0.0, "step": 1966 }, { "epoch": 0.7636648052822089, "grad_norm": 0.0001395126455463469, "learning_rate": 6.733934179219281e-06, "loss": 0.0, "step": 1968 }, { "epoch": 0.7644408874013982, "grad_norm": 0.00010618566739140078, "learning_rate": 6.727979980152899e-06, "loss": 0.0, "step": 1970 }, { "epoch": 0.7652169695205874, "grad_norm": 0.00011451529280748218, "learning_rate": 6.7220229969662776e-06, "loss": 0.0, "step": 1972 }, { "epoch": 0.7659930516397766, "grad_norm": 0.00010447852400830016, "learning_rate": 6.716063239257307e-06, "loss": 0.0, "step": 1974 }, { "epoch": 0.7667691337589658, "grad_norm": 6.806269084336236e-05, "learning_rate": 6.710100716628345e-06, "loss": 0.0, "step": 1976 }, { "epoch": 0.7675452158781552, "grad_norm": 0.00021106898202560842, "learning_rate": 6.704135438686203e-06, "loss": 0.0, "step": 1978 }, { "epoch": 0.7683212979973444, "grad_norm": 0.0003254037583246827, "learning_rate": 6.698167415042135e-06, "loss": 0.0, "step": 1980 }, { "epoch": 0.7690973801165336, "grad_norm": 8.719584729988128e-05, "learning_rate": 6.692196655311814e-06, "loss": 0.0, "step": 1982 }, { "epoch": 0.7698734622357228, "grad_norm": 7.821103645255789e-05, "learning_rate": 6.686223169115328e-06, "loss": 0.0, "step": 1984 }, { "epoch": 0.7706495443549121, "grad_norm": 9.548355592414737e-05, "learning_rate": 6.680246966077151e-06, "loss": 0.0, "step": 1986 }, { "epoch": 0.7714256264741013, "grad_norm": 8.589580829720944e-05, "learning_rate": 6.674268055826139e-06, "loss": 0.0, "step": 1988 }, { "epoch": 0.7722017085932905, "grad_norm": 8.828957652440295e-05, "learning_rate": 6.6682864479955075e-06, "loss": 0.0, "step": 1990 }, { "epoch": 0.7729777907124797, "grad_norm": 0.00014582987932953984, "learning_rate": 6.66230215222282e-06, "loss": 0.0, "step": 1992 }, { "epoch": 0.773753872831669, "grad_norm": 8.873114711605012e-05, "learning_rate": 6.656315178149971e-06, "loss": 0.0, "step": 1994 }, { "epoch": 0.7745299549508582, "grad_norm": 9.8653239547275e-05, "learning_rate": 6.650325535423166e-06, "loss": 0.0, "step": 1996 }, { "epoch": 0.7753060370700475, "grad_norm": 0.00010092699812958017, "learning_rate": 6.644333233692917e-06, "loss": 0.0, "step": 1998 }, { "epoch": 0.7760821191892368, "grad_norm": 0.00010230349289486185, "learning_rate": 6.638338282614014e-06, "loss": 0.0, "step": 2000 }, { "epoch": 0.7760821191892368, "eval_accuracy": 1.0, "eval_loss": 1.031831970976782e-06, "eval_runtime": 128.6542, "eval_samples_per_second": 38.864, "eval_steps_per_second": 9.716, "step": 2000 }, { "epoch": 0.776858201308426, "grad_norm": 0.0001485104439780116, "learning_rate": 6.6323406918455205e-06, "loss": 0.0, "step": 2002 }, { "epoch": 0.7776342834276152, "grad_norm": 8.486644219374284e-05, "learning_rate": 6.6263404710507495e-06, "loss": 0.0, "step": 2004 }, { "epoch": 0.7784103655468044, "grad_norm": 0.00011163012095494196, "learning_rate": 6.6203376298972535e-06, "loss": 0.0, "step": 2006 }, { "epoch": 0.7791864476659937, "grad_norm": 5.672009865520522e-05, "learning_rate": 6.614332178056806e-06, "loss": 0.0, "step": 2008 }, { "epoch": 0.7799625297851829, "grad_norm": 7.772738172207028e-05, "learning_rate": 6.608324125205389e-06, "loss": 0.0, "step": 2010 }, { "epoch": 0.7807386119043721, "grad_norm": 9.467496420256793e-05, "learning_rate": 6.60231348102317e-06, "loss": 0.0, "step": 2012 }, { "epoch": 0.7815146940235613, "grad_norm": 0.00020423560636118054, "learning_rate": 6.596300255194496e-06, "loss": 0.0, "step": 2014 }, { "epoch": 0.7822907761427506, "grad_norm": 8.248398080468178e-05, "learning_rate": 6.590284457407876e-06, "loss": 0.0, "step": 2016 }, { "epoch": 0.7830668582619399, "grad_norm": 0.0002861861139535904, "learning_rate": 6.5842660973559545e-06, "loss": 0.0, "step": 2018 }, { "epoch": 0.7838429403811291, "grad_norm": 6.330267933662981e-05, "learning_rate": 6.578245184735513e-06, "loss": 0.0, "step": 2020 }, { "epoch": 0.7846190225003183, "grad_norm": 6.448883505072445e-05, "learning_rate": 6.572221729247441e-06, "loss": 0.0, "step": 2022 }, { "epoch": 0.7853951046195076, "grad_norm": 0.0002733370929490775, "learning_rate": 6.5661957405967255e-06, "loss": 0.0, "step": 2024 }, { "epoch": 0.7861711867386968, "grad_norm": 6.26511755399406e-05, "learning_rate": 6.560167228492436e-06, "loss": 0.0, "step": 2026 }, { "epoch": 0.786947268857886, "grad_norm": 9.563386265654117e-05, "learning_rate": 6.554136202647707e-06, "loss": 0.0, "step": 2028 }, { "epoch": 0.7877233509770752, "grad_norm": 7.283008744707331e-05, "learning_rate": 6.548102672779725e-06, "loss": 0.0, "step": 2030 }, { "epoch": 0.7884994330962645, "grad_norm": 0.0007823503110557795, "learning_rate": 6.5420666486097084e-06, "loss": 0.0, "step": 2032 }, { "epoch": 0.7892755152154537, "grad_norm": 0.00011290707334410399, "learning_rate": 6.536028139862895e-06, "loss": 0.0, "step": 2034 }, { "epoch": 0.7900515973346429, "grad_norm": 0.0006826606695540249, "learning_rate": 6.529987156268527e-06, "loss": 0.0, "step": 2036 }, { "epoch": 0.7908276794538323, "grad_norm": 7.063138036755845e-05, "learning_rate": 6.523943707559832e-06, "loss": 0.0, "step": 2038 }, { "epoch": 0.7916037615730215, "grad_norm": 6.516786379506811e-05, "learning_rate": 6.517897803474011e-06, "loss": 0.0, "step": 2040 }, { "epoch": 0.7916037615730215, "eval_accuracy": 1.0, "eval_loss": 8.988279773802788e-07, "eval_runtime": 132.8465, "eval_samples_per_second": 37.637, "eval_steps_per_second": 9.409, "step": 2040 }, { "epoch": 0.7923798436922107, "grad_norm": 7.547021959908307e-05, "learning_rate": 6.5118494537522235e-06, "loss": 0.0, "step": 2042 }, { "epoch": 0.7931559258113999, "grad_norm": 5.888815940124914e-05, "learning_rate": 6.505798668139563e-06, "loss": 0.0, "step": 2044 }, { "epoch": 0.7939320079305892, "grad_norm": 6.389299960574135e-05, "learning_rate": 6.499745456385054e-06, "loss": 0.0, "step": 2046 }, { "epoch": 0.7947080900497784, "grad_norm": 9.587859676685184e-05, "learning_rate": 6.493689828241625e-06, "loss": 0.0, "step": 2048 }, { "epoch": 0.7954841721689676, "grad_norm": 9.449503704672679e-05, "learning_rate": 6.4876317934661036e-06, "loss": 0.0, "step": 2050 }, { "epoch": 0.7962602542881568, "grad_norm": 0.00022104787058196962, "learning_rate": 6.481571361819189e-06, "loss": 0.0, "step": 2052 }, { "epoch": 0.7970363364073461, "grad_norm": 6.70891095069237e-05, "learning_rate": 6.475508543065445e-06, "loss": 0.0, "step": 2054 }, { "epoch": 0.7978124185265354, "grad_norm": 0.00019334866374265403, "learning_rate": 6.469443346973281e-06, "loss": 0.0, "step": 2056 }, { "epoch": 0.7985885006457246, "grad_norm": 6.618146289838478e-05, "learning_rate": 6.463375783314938e-06, "loss": 0.0, "step": 2058 }, { "epoch": 0.7993645827649138, "grad_norm": 8.032534969970584e-05, "learning_rate": 6.457305861866471e-06, "loss": 0.0, "step": 2060 }, { "epoch": 0.8001406648841031, "grad_norm": 6.887259951326996e-05, "learning_rate": 6.451233592407732e-06, "loss": 0.0, "step": 2062 }, { "epoch": 0.8009167470032923, "grad_norm": 0.00010397352889413014, "learning_rate": 6.445158984722358e-06, "loss": 0.0, "step": 2064 }, { "epoch": 0.8016928291224815, "grad_norm": 5.919776958762668e-05, "learning_rate": 6.439082048597755e-06, "loss": 0.0, "step": 2066 }, { "epoch": 0.8024689112416707, "grad_norm": 0.00010084972745971754, "learning_rate": 6.433002793825076e-06, "loss": 0.0, "step": 2068 }, { "epoch": 0.80324499336086, "grad_norm": 5.972270082565956e-05, "learning_rate": 6.426921230199215e-06, "loss": 0.0, "step": 2070 }, { "epoch": 0.8040210754800492, "grad_norm": 6.114527786849067e-05, "learning_rate": 6.420837367518781e-06, "loss": 0.0, "step": 2072 }, { "epoch": 0.8047971575992384, "grad_norm": 0.00010356724669691175, "learning_rate": 6.414751215586091e-06, "loss": 0.0, "step": 2074 }, { "epoch": 0.8055732397184278, "grad_norm": 6.264915282372385e-05, "learning_rate": 6.408662784207149e-06, "loss": 0.0, "step": 2076 }, { "epoch": 0.806349321837617, "grad_norm": 7.387966616079211e-05, "learning_rate": 6.402572083191632e-06, "loss": 0.0, "step": 2078 }, { "epoch": 0.8071254039568062, "grad_norm": 7.030357664916664e-05, "learning_rate": 6.396479122352872e-06, "loss": 0.0, "step": 2080 }, { "epoch": 0.8071254039568062, "eval_accuracy": 1.0, "eval_loss": 8.613870363660681e-07, "eval_runtime": 129.2417, "eval_samples_per_second": 38.687, "eval_steps_per_second": 9.672, "step": 2080 }, { "epoch": 0.8079014860759954, "grad_norm": 6.143885548226535e-05, "learning_rate": 6.390383911507845e-06, "loss": 0.0, "step": 2082 }, { "epoch": 0.8086775681951847, "grad_norm": 5.496588710229844e-05, "learning_rate": 6.384286460477149e-06, "loss": 0.0, "step": 2084 }, { "epoch": 0.8094536503143739, "grad_norm": 5.879181844647974e-05, "learning_rate": 6.378186779084996e-06, "loss": 0.0, "step": 2086 }, { "epoch": 0.8102297324335631, "grad_norm": 7.97929460532032e-05, "learning_rate": 6.3720848771591884e-06, "loss": 0.0, "step": 2088 }, { "epoch": 0.8110058145527523, "grad_norm": 9.012407826958224e-05, "learning_rate": 6.3659807645311056e-06, "loss": 0.0, "step": 2090 }, { "epoch": 0.8117818966719416, "grad_norm": 6.274648330872878e-05, "learning_rate": 6.359874451035688e-06, "loss": 0.0, "step": 2092 }, { "epoch": 0.8125579787911308, "grad_norm": 0.00030235707527026534, "learning_rate": 6.3537659465114275e-06, "loss": 0.0, "step": 2094 }, { "epoch": 0.8133340609103201, "grad_norm": 5.93493168707937e-05, "learning_rate": 6.34765526080034e-06, "loss": 0.0, "step": 2096 }, { "epoch": 0.8141101430295093, "grad_norm": 7.00261298334226e-05, "learning_rate": 6.34154240374796e-06, "loss": 0.0, "step": 2098 }, { "epoch": 0.8148862251486986, "grad_norm": 3.803496292675845e-05, "learning_rate": 6.33542738520332e-06, "loss": 0.0, "step": 2100 }, { "epoch": 0.8156623072678878, "grad_norm": 5.5201875511556864e-05, "learning_rate": 6.329310215018931e-06, "loss": 0.0, "step": 2102 }, { "epoch": 0.816438389387077, "grad_norm": 5.329560735845007e-05, "learning_rate": 6.323190903050776e-06, "loss": 0.0, "step": 2104 }, { "epoch": 0.8172144715062662, "grad_norm": 9.465152106713504e-05, "learning_rate": 6.317069459158284e-06, "loss": 0.0, "step": 2106 }, { "epoch": 0.8179905536254555, "grad_norm": 5.647428042721003e-05, "learning_rate": 6.310945893204324e-06, "loss": 0.0, "step": 2108 }, { "epoch": 0.8187666357446447, "grad_norm": 9.46095897234045e-05, "learning_rate": 6.30482021505518e-06, "loss": 0.0, "step": 2110 }, { "epoch": 0.8195427178638339, "grad_norm": 4.6596753236372024e-05, "learning_rate": 6.298692434580543e-06, "loss": 0.0, "step": 2112 }, { "epoch": 0.8203187999830232, "grad_norm": 5.60645385121461e-05, "learning_rate": 6.292562561653486e-06, "loss": 0.0, "step": 2114 }, { "epoch": 0.8210948821022125, "grad_norm": 0.0001269608037546277, "learning_rate": 6.286430606150458e-06, "loss": 0.0, "step": 2116 }, { "epoch": 0.8218709642214017, "grad_norm": 8.256758883362636e-05, "learning_rate": 6.280296577951262e-06, "loss": 0.0, "step": 2118 }, { "epoch": 0.8226470463405909, "grad_norm": 6.662441592197865e-05, "learning_rate": 6.27416048693904e-06, "loss": 0.0, "step": 2120 }, { "epoch": 0.8226470463405909, "eval_accuracy": 1.0, "eval_loss": 7.355536126851803e-07, "eval_runtime": 135.9782, "eval_samples_per_second": 36.771, "eval_steps_per_second": 9.193, "step": 2120 }, { "epoch": 0.8234231284597802, "grad_norm": 8.367696864297614e-05, "learning_rate": 6.268022343000258e-06, "loss": 0.0, "step": 2122 }, { "epoch": 0.8241992105789694, "grad_norm": 5.656502253259532e-05, "learning_rate": 6.261882156024688e-06, "loss": 0.0, "step": 2124 }, { "epoch": 0.8249752926981586, "grad_norm": 6.76195923006162e-05, "learning_rate": 6.255739935905396e-06, "loss": 0.0, "step": 2126 }, { "epoch": 0.8257513748173478, "grad_norm": 7.083230593707412e-05, "learning_rate": 6.249595692538726e-06, "loss": 0.0, "step": 2128 }, { "epoch": 0.8265274569365371, "grad_norm": 0.00015189322584774345, "learning_rate": 6.243449435824276e-06, "loss": 0.0, "step": 2130 }, { "epoch": 0.8273035390557263, "grad_norm": 7.22018739907071e-05, "learning_rate": 6.2373011756648905e-06, "loss": 0.0, "step": 2132 }, { "epoch": 0.8280796211749156, "grad_norm": 5.055622023064643e-05, "learning_rate": 6.231150921966643e-06, "loss": 0.0, "step": 2134 }, { "epoch": 0.8288557032941049, "grad_norm": 4.879253538092598e-05, "learning_rate": 6.22499868463882e-06, "loss": 0.0, "step": 2136 }, { "epoch": 0.8296317854132941, "grad_norm": 4.9753500206861645e-05, "learning_rate": 6.2188444735939e-06, "loss": 0.0, "step": 2138 }, { "epoch": 0.8304078675324833, "grad_norm": 9.38455414143391e-05, "learning_rate": 6.212688298747546e-06, "loss": 0.0, "step": 2140 }, { "epoch": 0.8311839496516725, "grad_norm": 5.881295874132775e-05, "learning_rate": 6.206530170018581e-06, "loss": 0.0, "step": 2142 }, { "epoch": 0.8319600317708618, "grad_norm": 3.5772693081526086e-05, "learning_rate": 6.2003700973289785e-06, "loss": 0.0, "step": 2144 }, { "epoch": 0.832736113890051, "grad_norm": 7.31175096007064e-05, "learning_rate": 6.194208090603845e-06, "loss": 0.0, "step": 2146 }, { "epoch": 0.8335121960092402, "grad_norm": 5.577166666625999e-05, "learning_rate": 6.1880441597714e-06, "loss": 0.0, "step": 2148 }, { "epoch": 0.8342882781284294, "grad_norm": 0.00013712100917473435, "learning_rate": 6.181878314762968e-06, "loss": 0.0, "step": 2150 }, { "epoch": 0.8350643602476187, "grad_norm": 8.808437269181013e-05, "learning_rate": 6.17571056551295e-06, "loss": 0.0, "step": 2152 }, { "epoch": 0.835840442366808, "grad_norm": 6.489011866506189e-05, "learning_rate": 6.169540921958823e-06, "loss": 0.0, "step": 2154 }, { "epoch": 0.8366165244859972, "grad_norm": 0.0002766057150438428, "learning_rate": 6.163369394041112e-06, "loss": 0.0, "step": 2156 }, { "epoch": 0.8373926066051864, "grad_norm": 5.5486641940660775e-05, "learning_rate": 6.157195991703378e-06, "loss": 0.0, "step": 2158 }, { "epoch": 0.8381686887243757, "grad_norm": 7.48790189391002e-05, "learning_rate": 6.151020724892205e-06, "loss": 0.0, "step": 2160 }, { "epoch": 0.8381686887243757, "eval_accuracy": 1.0, "eval_loss": 8.351366886927281e-07, "eval_runtime": 129.3151, "eval_samples_per_second": 38.665, "eval_steps_per_second": 9.666, "step": 2160 }, { "epoch": 0.8389447708435649, "grad_norm": 5.235821663518436e-05, "learning_rate": 6.144843603557176e-06, "loss": 0.0, "step": 2162 }, { "epoch": 0.8397208529627541, "grad_norm": 4.783396434504539e-05, "learning_rate": 6.138664637650867e-06, "loss": 0.0, "step": 2164 }, { "epoch": 0.8404969350819433, "grad_norm": 4.1697534470586106e-05, "learning_rate": 6.132483837128823e-06, "loss": 0.0, "step": 2166 }, { "epoch": 0.8412730172011326, "grad_norm": 5.896832954022102e-05, "learning_rate": 6.1263012119495455e-06, "loss": 0.0, "step": 2168 }, { "epoch": 0.8420490993203218, "grad_norm": 0.0012509578373283148, "learning_rate": 6.120116772074478e-06, "loss": 0.0, "step": 2170 }, { "epoch": 0.842825181439511, "grad_norm": 4.461203934624791e-05, "learning_rate": 6.1139305274679835e-06, "loss": 0.0, "step": 2172 }, { "epoch": 0.8436012635587004, "grad_norm": 4.714369788416661e-05, "learning_rate": 6.107742488097338e-06, "loss": 0.0, "step": 2174 }, { "epoch": 0.8443773456778896, "grad_norm": 6.070686140446924e-05, "learning_rate": 6.101552663932704e-06, "loss": 0.0, "step": 2176 }, { "epoch": 0.8451534277970788, "grad_norm": 4.287466799723916e-05, "learning_rate": 6.095361064947124e-06, "loss": 0.0, "step": 2178 }, { "epoch": 0.845929509916268, "grad_norm": 6.500168092316017e-05, "learning_rate": 6.089167701116498e-06, "loss": 0.0, "step": 2180 }, { "epoch": 0.8467055920354573, "grad_norm": 4.867304232902825e-05, "learning_rate": 6.082972582419569e-06, "loss": 0.0, "step": 2182 }, { "epoch": 0.8474816741546465, "grad_norm": 8.729327964829281e-05, "learning_rate": 6.076775718837911e-06, "loss": 0.0, "step": 2184 }, { "epoch": 0.8482577562738357, "grad_norm": 4.093181269126944e-05, "learning_rate": 6.070577120355903e-06, "loss": 0.0, "step": 2186 }, { "epoch": 0.8490338383930249, "grad_norm": 0.00010701356950448826, "learning_rate": 6.064376796960723e-06, "loss": 0.0, "step": 2188 }, { "epoch": 0.8498099205122142, "grad_norm": 5.5773834901629016e-05, "learning_rate": 6.058174758642332e-06, "loss": 0.0, "step": 2190 }, { "epoch": 0.8505860026314034, "grad_norm": 6.0533511714311317e-05, "learning_rate": 6.051971015393447e-06, "loss": 0.0, "step": 2192 }, { "epoch": 0.8513620847505927, "grad_norm": 5.2021106967004016e-05, "learning_rate": 6.045765577209536e-06, "loss": 0.0, "step": 2194 }, { "epoch": 0.8521381668697819, "grad_norm": 4.812583210878074e-05, "learning_rate": 6.039558454088796e-06, "loss": 0.0, "step": 2196 }, { "epoch": 0.8529142489889712, "grad_norm": 0.00014483649283647537, "learning_rate": 6.033349656032143e-06, "loss": 0.0, "step": 2198 }, { "epoch": 0.8536903311081604, "grad_norm": 5.994698221911676e-05, "learning_rate": 6.027139193043185e-06, "loss": 0.0, "step": 2200 }, { "epoch": 0.8536903311081604, "eval_accuracy": 1.0, "eval_loss": 7.086546816026384e-07, "eval_runtime": 128.1682, "eval_samples_per_second": 39.011, "eval_steps_per_second": 9.753, "step": 2200 }, { "epoch": 0.8544664132273496, "grad_norm": 4.5254299038788304e-05, "learning_rate": 6.0209270751282165e-06, "loss": 0.0, "step": 2202 }, { "epoch": 0.8552424953465388, "grad_norm": 6.307959847617894e-05, "learning_rate": 6.014713312296198e-06, "loss": 0.0, "step": 2204 }, { "epoch": 0.8560185774657281, "grad_norm": 0.007048322819173336, "learning_rate": 6.0084979145587444e-06, "loss": 0.0, "step": 2206 }, { "epoch": 0.8567946595849173, "grad_norm": 3.5707907954929397e-05, "learning_rate": 6.002280891930093e-06, "loss": 0.0, "step": 2208 }, { "epoch": 0.8575707417041065, "grad_norm": 7.197361264843494e-05, "learning_rate": 5.996062254427112e-06, "loss": 0.0, "step": 2210 }, { "epoch": 0.8583468238232959, "grad_norm": 0.00021210667910054326, "learning_rate": 5.989842012069265e-06, "loss": 0.0, "step": 2212 }, { "epoch": 0.8591229059424851, "grad_norm": 5.329148916644044e-05, "learning_rate": 5.983620174878601e-06, "loss": 0.0, "step": 2214 }, { "epoch": 0.8598989880616743, "grad_norm": 4.61207891930826e-05, "learning_rate": 5.977396752879742e-06, "loss": 0.0, "step": 2216 }, { "epoch": 0.8606750701808635, "grad_norm": 5.778288686997257e-05, "learning_rate": 5.97117175609986e-06, "loss": 0.0, "step": 2218 }, { "epoch": 0.8614511523000528, "grad_norm": 4.77270114060957e-05, "learning_rate": 5.964945194568669e-06, "loss": 0.0, "step": 2220 }, { "epoch": 0.862227234419242, "grad_norm": 5.989255441818386e-05, "learning_rate": 5.958717078318397e-06, "loss": 0.0, "step": 2222 }, { "epoch": 0.8630033165384312, "grad_norm": 3.358606045367196e-05, "learning_rate": 5.952487417383782e-06, "loss": 0.0, "step": 2224 }, { "epoch": 0.8637793986576204, "grad_norm": 6.597671745112166e-05, "learning_rate": 5.946256221802052e-06, "loss": 0.0, "step": 2226 }, { "epoch": 0.8645554807768097, "grad_norm": 9.850203059613705e-05, "learning_rate": 5.940023501612902e-06, "loss": 0.0, "step": 2228 }, { "epoch": 0.8653315628959989, "grad_norm": 0.00010081121581606567, "learning_rate": 5.9337892668584896e-06, "loss": 0.0, "step": 2230 }, { "epoch": 0.8661076450151882, "grad_norm": 6.350975309032947e-05, "learning_rate": 5.927553527583407e-06, "loss": 0.0, "step": 2232 }, { "epoch": 0.8668837271343774, "grad_norm": 7.55402070353739e-05, "learning_rate": 5.9213162938346765e-06, "loss": 0.0, "step": 2234 }, { "epoch": 0.8676598092535667, "grad_norm": 5.821433660457842e-05, "learning_rate": 5.915077575661723e-06, "loss": 0.0, "step": 2236 }, { "epoch": 0.8684358913727559, "grad_norm": 5.73672768950928e-05, "learning_rate": 5.908837383116367e-06, "loss": 0.0, "step": 2238 }, { "epoch": 0.8692119734919451, "grad_norm": 4.653895666706376e-05, "learning_rate": 5.902595726252801e-06, "loss": 0.0, "step": 2240 }, { "epoch": 0.8692119734919451, "eval_accuracy": 1.0, "eval_loss": 7.551036560471402e-07, "eval_runtime": 128.1934, "eval_samples_per_second": 39.004, "eval_steps_per_second": 9.751, "step": 2240 }, { "epoch": 0.8699880556111343, "grad_norm": 8.506119047524408e-05, "learning_rate": 5.896352615127578e-06, "loss": 0.0, "step": 2242 }, { "epoch": 0.8707641377303236, "grad_norm": 4.079376958543435e-05, "learning_rate": 5.890108059799596e-06, "loss": 0.0, "step": 2244 }, { "epoch": 0.8715402198495128, "grad_norm": 3.7677615182474256e-05, "learning_rate": 5.883862070330079e-06, "loss": 0.0, "step": 2246 }, { "epoch": 0.872316301968702, "grad_norm": 5.546142710954882e-05, "learning_rate": 5.877614656782559e-06, "loss": 0.0, "step": 2248 }, { "epoch": 0.8730923840878912, "grad_norm": 7.051986904116347e-05, "learning_rate": 5.8713658292228695e-06, "loss": 0.0, "step": 2250 }, { "epoch": 0.8738684662070806, "grad_norm": 5.4527925385627896e-05, "learning_rate": 5.865115597719111e-06, "loss": 0.0, "step": 2252 }, { "epoch": 0.8746445483262698, "grad_norm": 4.737853305414319e-05, "learning_rate": 5.858863972341656e-06, "loss": 0.0, "step": 2254 }, { "epoch": 0.875420630445459, "grad_norm": 4.212254862068221e-05, "learning_rate": 5.85261096316312e-06, "loss": 0.0, "step": 2256 }, { "epoch": 0.8761967125646483, "grad_norm": 6.051822128938511e-05, "learning_rate": 5.846356580258345e-06, "loss": 0.0, "step": 2258 }, { "epoch": 0.8769727946838375, "grad_norm": 4.117765274713747e-05, "learning_rate": 5.840100833704392e-06, "loss": 0.0, "step": 2260 }, { "epoch": 0.8777488768030267, "grad_norm": 7.199056562967598e-05, "learning_rate": 5.8338437335805124e-06, "loss": 0.0, "step": 2262 }, { "epoch": 0.8785249589222159, "grad_norm": 7.011962588876486e-05, "learning_rate": 5.827585289968143e-06, "loss": 0.0, "step": 2264 }, { "epoch": 0.8793010410414052, "grad_norm": 3.5901935916626826e-05, "learning_rate": 5.821325512950886e-06, "loss": 0.0, "step": 2266 }, { "epoch": 0.8800771231605944, "grad_norm": 6.797764945076779e-05, "learning_rate": 5.815064412614487e-06, "loss": 0.0, "step": 2268 }, { "epoch": 0.8808532052797836, "grad_norm": 4.7395566070917994e-05, "learning_rate": 5.80880199904683e-06, "loss": 0.0, "step": 2270 }, { "epoch": 0.881629287398973, "grad_norm": 4.402499689604156e-05, "learning_rate": 5.80253828233791e-06, "loss": 0.0, "step": 2272 }, { "epoch": 0.8824053695181622, "grad_norm": 4.1063631215365604e-05, "learning_rate": 5.796273272579823e-06, "loss": 0.0, "step": 2274 }, { "epoch": 0.8831814516373514, "grad_norm": 4.774528133566491e-05, "learning_rate": 5.79000697986675e-06, "loss": 0.0, "step": 2276 }, { "epoch": 0.8839575337565406, "grad_norm": 3.76619391317945e-05, "learning_rate": 5.783739414294938e-06, "loss": 0.0, "step": 2278 }, { "epoch": 0.8847336158757299, "grad_norm": 4.986881322111003e-05, "learning_rate": 5.777470585962682e-06, "loss": 0.0, "step": 2280 }, { "epoch": 0.8847336158757299, "eval_accuracy": 1.0, "eval_loss": 7.392063707811758e-07, "eval_runtime": 127.8595, "eval_samples_per_second": 39.105, "eval_steps_per_second": 9.776, "step": 2280 }, { "epoch": 0.8855096979949191, "grad_norm": 4.4260694266995415e-05, "learning_rate": 5.771200504970316e-06, "loss": 0.0, "step": 2282 }, { "epoch": 0.8862857801141083, "grad_norm": 7.83668365329504e-05, "learning_rate": 5.764929181420191e-06, "loss": 0.0, "step": 2284 }, { "epoch": 0.8870618622332975, "grad_norm": 3.579173790058121e-05, "learning_rate": 5.758656625416659e-06, "loss": 0.0, "step": 2286 }, { "epoch": 0.8878379443524868, "grad_norm": 9.019627759698778e-05, "learning_rate": 5.752382847066058e-06, "loss": 0.0, "step": 2288 }, { "epoch": 0.8886140264716761, "grad_norm": 3.581602868507616e-05, "learning_rate": 5.7461078564766945e-06, "loss": 0.0, "step": 2290 }, { "epoch": 0.8893901085908653, "grad_norm": 3.4041298931697384e-05, "learning_rate": 5.739831663758834e-06, "loss": 0.0, "step": 2292 }, { "epoch": 0.8901661907100545, "grad_norm": 4.7927573177730665e-05, "learning_rate": 5.733554279024668e-06, "loss": 0.0, "step": 2294 }, { "epoch": 0.8909422728292438, "grad_norm": 6.324730202322826e-05, "learning_rate": 5.727275712388318e-06, "loss": 0.0, "step": 2296 }, { "epoch": 0.891718354948433, "grad_norm": 8.095883822534233e-05, "learning_rate": 5.720995973965806e-06, "loss": 0.0, "step": 2298 }, { "epoch": 0.8924944370676222, "grad_norm": 4.244003866915591e-05, "learning_rate": 5.714715073875043e-06, "loss": 0.0, "step": 2300 }, { "epoch": 0.8932705191868114, "grad_norm": 3.395311068743467e-05, "learning_rate": 5.7084330222358106e-06, "loss": 0.0, "step": 2302 }, { "epoch": 0.8940466013060007, "grad_norm": 0.0001490003924118355, "learning_rate": 5.7021498291697465e-06, "loss": 0.0, "step": 2304 }, { "epoch": 0.8948226834251899, "grad_norm": 3.589324842323549e-05, "learning_rate": 5.695865504800328e-06, "loss": 0.0, "step": 2306 }, { "epoch": 0.8955987655443791, "grad_norm": 3.4185512049589306e-05, "learning_rate": 5.689580059252852e-06, "loss": 0.0, "step": 2308 }, { "epoch": 0.8963748476635685, "grad_norm": 4.0242739487439394e-05, "learning_rate": 5.683293502654429e-06, "loss": 0.0, "step": 2310 }, { "epoch": 0.8971509297827577, "grad_norm": 3.512334296829067e-05, "learning_rate": 5.6770058451339514e-06, "loss": 0.0, "step": 2312 }, { "epoch": 0.8979270119019469, "grad_norm": 3.1275991204893216e-05, "learning_rate": 5.6707170968220895e-06, "loss": 0.0, "step": 2314 }, { "epoch": 0.8987030940211361, "grad_norm": 3.105650102952495e-05, "learning_rate": 5.664427267851271e-06, "loss": 0.0, "step": 2316 }, { "epoch": 0.8994791761403254, "grad_norm": 2.5433662813156843e-05, "learning_rate": 5.658136368355665e-06, "loss": 0.0, "step": 2318 }, { "epoch": 0.9002552582595146, "grad_norm": 3.3490086934762076e-05, "learning_rate": 5.651844408471162e-06, "loss": 0.0, "step": 2320 }, { "epoch": 0.9002552582595146, "eval_accuracy": 1.0, "eval_loss": 7.633988730049168e-07, "eval_runtime": 129.4401, "eval_samples_per_second": 38.628, "eval_steps_per_second": 9.657, "step": 2320 }, { "epoch": 0.9010313403787038, "grad_norm": 0.00026165382587350905, "learning_rate": 5.645551398335367e-06, "loss": 0.0, "step": 2322 }, { "epoch": 0.901807422497893, "grad_norm": 4.723898018710315e-05, "learning_rate": 5.6392573480875724e-06, "loss": 0.0, "step": 2324 }, { "epoch": 0.9025835046170823, "grad_norm": 5.388520730775781e-05, "learning_rate": 5.632962267868747e-06, "loss": 0.0, "step": 2326 }, { "epoch": 0.9033595867362715, "grad_norm": 3.0265769964898936e-05, "learning_rate": 5.626666167821522e-06, "loss": 0.0, "step": 2328 }, { "epoch": 0.9041356688554608, "grad_norm": 3.531000766088255e-05, "learning_rate": 5.620369058090168e-06, "loss": 0.0, "step": 2330 }, { "epoch": 0.90491175097465, "grad_norm": 3.94788476114627e-05, "learning_rate": 5.6140709488205854e-06, "loss": 0.0, "step": 2332 }, { "epoch": 0.9056878330938393, "grad_norm": 3.280627788626589e-05, "learning_rate": 5.607771850160285e-06, "loss": 0.0, "step": 2334 }, { "epoch": 0.9064639152130285, "grad_norm": 2.856614264601376e-05, "learning_rate": 5.601471772258368e-06, "loss": 0.0, "step": 2336 }, { "epoch": 0.9072399973322177, "grad_norm": 7.2697272116784e-05, "learning_rate": 5.595170725265517e-06, "loss": 0.0, "step": 2338 }, { "epoch": 0.9080160794514069, "grad_norm": 3.925288183381781e-05, "learning_rate": 5.588868719333974e-06, "loss": 0.0, "step": 2340 }, { "epoch": 0.9087921615705962, "grad_norm": 4.3010670196963474e-05, "learning_rate": 5.582565764617528e-06, "loss": 0.0, "step": 2342 }, { "epoch": 0.9095682436897854, "grad_norm": 4.602810804499313e-05, "learning_rate": 5.576261871271494e-06, "loss": 0.0, "step": 2344 }, { "epoch": 0.9103443258089746, "grad_norm": 4.22390949097462e-05, "learning_rate": 5.569957049452703e-06, "loss": 0.0, "step": 2346 }, { "epoch": 0.9111204079281638, "grad_norm": 6.809924525441602e-05, "learning_rate": 5.56365130931948e-06, "loss": 0.0, "step": 2348 }, { "epoch": 0.9118964900473532, "grad_norm": 4.36231421190314e-05, "learning_rate": 5.557344661031628e-06, "loss": 0.0, "step": 2350 }, { "epoch": 0.9126725721665424, "grad_norm": 5.4554071539314464e-05, "learning_rate": 5.551037114750415e-06, "loss": 0.0, "step": 2352 }, { "epoch": 0.9134486542857316, "grad_norm": 4.67685422336217e-05, "learning_rate": 5.544728680638557e-06, "loss": 0.0, "step": 2354 }, { "epoch": 0.9142247364049209, "grad_norm": 4.879764674114995e-05, "learning_rate": 5.538419368860196e-06, "loss": 0.0, "step": 2356 }, { "epoch": 0.9150008185241101, "grad_norm": 3.448760253377259e-05, "learning_rate": 5.532109189580893e-06, "loss": 0.0, "step": 2358 }, { "epoch": 0.9157769006432993, "grad_norm": 4.206347875879146e-05, "learning_rate": 5.525798152967605e-06, "loss": 0.0, "step": 2360 }, { "epoch": 0.9157769006432993, "eval_accuracy": 1.0, "eval_loss": 6.675672921119258e-07, "eval_runtime": 128.9874, "eval_samples_per_second": 38.763, "eval_steps_per_second": 9.691, "step": 2360 }, { "epoch": 0.9165529827624885, "grad_norm": 3.9369071600958705e-05, "learning_rate": 5.519486269188669e-06, "loss": 0.0, "step": 2362 }, { "epoch": 0.9173290648816778, "grad_norm": 6.862558075226843e-05, "learning_rate": 5.513173548413789e-06, "loss": 0.0, "step": 2364 }, { "epoch": 0.918105147000867, "grad_norm": 0.00010242811549687758, "learning_rate": 5.506860000814017e-06, "loss": 0.0, "step": 2366 }, { "epoch": 0.9188812291200563, "grad_norm": 4.119973164051771e-05, "learning_rate": 5.500545636561737e-06, "loss": 0.0, "step": 2368 }, { "epoch": 0.9196573112392455, "grad_norm": 0.0002903382119257003, "learning_rate": 5.494230465830648e-06, "loss": 0.0, "step": 2370 }, { "epoch": 0.9204333933584348, "grad_norm": 6.094573109294288e-05, "learning_rate": 5.487914498795748e-06, "loss": 0.0, "step": 2372 }, { "epoch": 0.921209475477624, "grad_norm": 4.35071560787037e-05, "learning_rate": 5.4815977456333205e-06, "loss": 0.0, "step": 2374 }, { "epoch": 0.9219855575968132, "grad_norm": 7.302317681023851e-05, "learning_rate": 5.475280216520913e-06, "loss": 0.0, "step": 2376 }, { "epoch": 0.9227616397160024, "grad_norm": 3.8269605283858255e-05, "learning_rate": 5.468961921637327e-06, "loss": 0.0, "step": 2378 }, { "epoch": 0.9235377218351917, "grad_norm": 4.568966687656939e-05, "learning_rate": 5.462642871162592e-06, "loss": 0.0, "step": 2380 }, { "epoch": 0.9243138039543809, "grad_norm": 5.007812796975486e-05, "learning_rate": 5.4563230752779595e-06, "loss": 0.0, "step": 2382 }, { "epoch": 0.9250898860735701, "grad_norm": 4.256696774973534e-05, "learning_rate": 5.450002544165881e-06, "loss": 0.0, "step": 2384 }, { "epoch": 0.9258659681927593, "grad_norm": 3.766133886529133e-05, "learning_rate": 5.443681288009991e-06, "loss": 0.0, "step": 2386 }, { "epoch": 0.9266420503119487, "grad_norm": 0.00022620403615292162, "learning_rate": 5.437359316995094e-06, "loss": 0.0, "step": 2388 }, { "epoch": 0.9274181324311379, "grad_norm": 2.6086177967954427e-05, "learning_rate": 5.431036641307146e-06, "loss": 0.0, "step": 2390 }, { "epoch": 0.9281942145503271, "grad_norm": 0.0001312434033025056, "learning_rate": 5.424713271133237e-06, "loss": 0.0, "step": 2392 }, { "epoch": 0.9289702966695164, "grad_norm": 4.4677108235191554e-05, "learning_rate": 5.41838921666158e-06, "loss": 0.0, "step": 2394 }, { "epoch": 0.9297463787887056, "grad_norm": 3.142909918096848e-05, "learning_rate": 5.412064488081482e-06, "loss": 0.0, "step": 2396 }, { "epoch": 0.9305224609078948, "grad_norm": 3.184907109243795e-05, "learning_rate": 5.4057390955833455e-06, "loss": 0.0, "step": 2398 }, { "epoch": 0.931298543027084, "grad_norm": 3.680320878629573e-05, "learning_rate": 5.3994130493586385e-06, "loss": 0.0, "step": 2400 }, { "epoch": 0.931298543027084, "eval_accuracy": 1.0, "eval_loss": 7.848901759643923e-07, "eval_runtime": 128.5127, "eval_samples_per_second": 38.907, "eval_steps_per_second": 9.727, "step": 2400 }, { "epoch": 0.9320746251462733, "grad_norm": 4.2067425965797156e-05, "learning_rate": 5.393086359599882e-06, "loss": 0.0, "step": 2402 }, { "epoch": 0.9328507072654625, "grad_norm": 4.140937016927637e-05, "learning_rate": 5.386759036500635e-06, "loss": 0.0, "step": 2404 }, { "epoch": 0.9336267893846517, "grad_norm": 4.108841312699951e-05, "learning_rate": 5.380431090255475e-06, "loss": 0.0, "step": 2406 }, { "epoch": 0.934402871503841, "grad_norm": 0.003133020829409361, "learning_rate": 5.3741025310599885e-06, "loss": 0.0, "step": 2408 }, { "epoch": 0.9351789536230303, "grad_norm": 7.869315595598891e-05, "learning_rate": 5.367773369110741e-06, "loss": 0.0, "step": 2410 }, { "epoch": 0.9359550357422195, "grad_norm": 3.957453736802563e-05, "learning_rate": 5.361443614605279e-06, "loss": 0.0, "step": 2412 }, { "epoch": 0.9367311178614087, "grad_norm": 0.0002614989352878183, "learning_rate": 5.355113277742095e-06, "loss": 0.0, "step": 2414 }, { "epoch": 0.937507199980598, "grad_norm": 3.491802635835484e-05, "learning_rate": 5.348782368720627e-06, "loss": 0.0, "step": 2416 }, { "epoch": 0.9382832820997872, "grad_norm": 2.2348587663145736e-05, "learning_rate": 5.3424508977412285e-06, "loss": 0.0, "step": 2418 }, { "epoch": 0.9390593642189764, "grad_norm": 0.00011292284034425393, "learning_rate": 5.336118875005165e-06, "loss": 0.0, "step": 2420 }, { "epoch": 0.9398354463381656, "grad_norm": 5.8705085393739864e-05, "learning_rate": 5.329786310714583e-06, "loss": 0.0, "step": 2422 }, { "epoch": 0.9406115284573549, "grad_norm": 3.963152630603872e-05, "learning_rate": 5.3234532150725096e-06, "loss": 0.0, "step": 2424 }, { "epoch": 0.9413876105765441, "grad_norm": 4.580015956889838e-05, "learning_rate": 5.317119598282823e-06, "loss": 0.0, "step": 2426 }, { "epoch": 0.9421636926957334, "grad_norm": 6.212648440850899e-05, "learning_rate": 5.310785470550243e-06, "loss": 0.0, "step": 2428 }, { "epoch": 0.9429397748149226, "grad_norm": 3.1238592782756314e-05, "learning_rate": 5.304450842080312e-06, "loss": 0.0, "step": 2430 }, { "epoch": 0.9437158569341119, "grad_norm": 3.790382106672041e-05, "learning_rate": 5.29811572307938e-06, "loss": 0.0, "step": 2432 }, { "epoch": 0.9444919390533011, "grad_norm": 3.788993490161374e-05, "learning_rate": 5.291780123754585e-06, "loss": 0.0, "step": 2434 }, { "epoch": 0.9452680211724903, "grad_norm": 5.075983426650055e-05, "learning_rate": 5.285444054313841e-06, "loss": 0.0, "step": 2436 }, { "epoch": 0.9460441032916795, "grad_norm": 3.219065911252983e-05, "learning_rate": 5.27910752496582e-06, "loss": 0.0, "step": 2438 }, { "epoch": 0.9468201854108688, "grad_norm": 2.8665250283665955e-05, "learning_rate": 5.2727705459199345e-06, "loss": 0.0, "step": 2440 }, { "epoch": 0.9468201854108688, "eval_accuracy": 1.0, "eval_loss": 6.560118208653876e-07, "eval_runtime": 127.7074, "eval_samples_per_second": 39.152, "eval_steps_per_second": 9.788, "step": 2440 }, { "epoch": 0.947596267530058, "grad_norm": 3.558880416676402e-05, "learning_rate": 5.266433127386319e-06, "loss": 0.0, "step": 2442 }, { "epoch": 0.9483723496492472, "grad_norm": 3.688091965159401e-05, "learning_rate": 5.260095279575818e-06, "loss": 0.0, "step": 2444 }, { "epoch": 0.9491484317684366, "grad_norm": 4.0884588088374585e-05, "learning_rate": 5.253757012699972e-06, "loss": 0.0, "step": 2446 }, { "epoch": 0.9499245138876258, "grad_norm": 2.3549753677798435e-05, "learning_rate": 5.247418336970989e-06, "loss": 0.0, "step": 2448 }, { "epoch": 0.950700596006815, "grad_norm": 1.4427140740735922e-05, "learning_rate": 5.241079262601738e-06, "loss": 0.0, "step": 2450 }, { "epoch": 0.9514766781260042, "grad_norm": 4.3853513489011675e-05, "learning_rate": 5.234739799805735e-06, "loss": 0.0, "step": 2452 }, { "epoch": 0.9522527602451935, "grad_norm": 2.824706825776957e-05, "learning_rate": 5.228399958797117e-06, "loss": 0.0, "step": 2454 }, { "epoch": 0.9530288423643827, "grad_norm": 5.596975825028494e-05, "learning_rate": 5.2220597497906315e-06, "loss": 0.0, "step": 2456 }, { "epoch": 0.9538049244835719, "grad_norm": 4.121051824768074e-05, "learning_rate": 5.215719183001619e-06, "loss": 0.0, "step": 2458 }, { "epoch": 0.9545810066027611, "grad_norm": 5.6017473980318755e-05, "learning_rate": 5.209378268645998e-06, "loss": 0.0, "step": 2460 }, { "epoch": 0.9553570887219504, "grad_norm": 3.110716352239251e-05, "learning_rate": 5.203037016940245e-06, "loss": 0.0, "step": 2462 }, { "epoch": 0.9561331708411396, "grad_norm": 7.766463386360556e-05, "learning_rate": 5.19669543810138e-06, "loss": 0.0, "step": 2464 }, { "epoch": 0.9569092529603289, "grad_norm": 6.563676288351417e-05, "learning_rate": 5.190353542346951e-06, "loss": 0.0, "step": 2466 }, { "epoch": 0.9576853350795181, "grad_norm": 4.2788909922819585e-05, "learning_rate": 5.184011339895015e-06, "loss": 0.0, "step": 2468 }, { "epoch": 0.9584614171987074, "grad_norm": 3.1015857530292124e-05, "learning_rate": 5.177668840964128e-06, "loss": 0.0, "step": 2470 }, { "epoch": 0.9592374993178966, "grad_norm": 3.478724102023989e-05, "learning_rate": 5.171326055773318e-06, "loss": 0.0, "step": 2472 }, { "epoch": 0.9600135814370858, "grad_norm": 5.9747220802819356e-05, "learning_rate": 5.164982994542076e-06, "loss": 0.0, "step": 2474 }, { "epoch": 0.960789663556275, "grad_norm": 4.1762155888136476e-05, "learning_rate": 5.15863966749034e-06, "loss": 0.0, "step": 2476 }, { "epoch": 0.9615657456754643, "grad_norm": 2.615702942421194e-05, "learning_rate": 5.1522960848384715e-06, "loss": 0.0, "step": 2478 }, { "epoch": 0.9623418277946535, "grad_norm": 2.7091376978205517e-05, "learning_rate": 5.1459522568072495e-06, "loss": 0.0, "step": 2480 }, { "epoch": 0.9623418277946535, "eval_accuracy": 1.0, "eval_loss": 6.308707156676974e-07, "eval_runtime": 129.2458, "eval_samples_per_second": 38.686, "eval_steps_per_second": 9.671, "step": 2480 }, { "epoch": 0.9631179099138427, "grad_norm": 4.1229410271625966e-05, "learning_rate": 5.139608193617846e-06, "loss": 0.0, "step": 2482 }, { "epoch": 0.9638939920330319, "grad_norm": 4.6652869059471413e-05, "learning_rate": 5.133263905491809e-06, "loss": 0.0, "step": 2484 }, { "epoch": 0.9646700741522213, "grad_norm": 3.790379923884757e-05, "learning_rate": 5.126919402651053e-06, "loss": 0.0, "step": 2486 }, { "epoch": 0.9654461562714105, "grad_norm": 2.7518190108821727e-05, "learning_rate": 5.120574695317837e-06, "loss": 0.0, "step": 2488 }, { "epoch": 0.9662222383905997, "grad_norm": 3.830927380477078e-05, "learning_rate": 5.114229793714749e-06, "loss": 0.0, "step": 2490 }, { "epoch": 0.966998320509789, "grad_norm": 4.4493175664683804e-05, "learning_rate": 5.1078847080646894e-06, "loss": 0.0, "step": 2492 }, { "epoch": 0.9677744026289782, "grad_norm": 2.6731620891951025e-05, "learning_rate": 5.101539448590859e-06, "loss": 0.0, "step": 2494 }, { "epoch": 0.9685504847481674, "grad_norm": 3.9096106775105e-05, "learning_rate": 5.095194025516733e-06, "loss": 0.0, "step": 2496 }, { "epoch": 0.9693265668673566, "grad_norm": 4.291687946533784e-05, "learning_rate": 5.088848449066055e-06, "loss": 0.0, "step": 2498 }, { "epoch": 0.9701026489865459, "grad_norm": 2.8111671781516634e-05, "learning_rate": 5.082502729462813e-06, "loss": 0.0, "step": 2500 }, { "epoch": 0.9708787311057351, "grad_norm": 2.473478889442049e-05, "learning_rate": 5.076156876931225e-06, "loss": 0.0, "step": 2502 }, { "epoch": 0.9716548132249243, "grad_norm": 3.1256680813385174e-05, "learning_rate": 5.069810901695727e-06, "loss": 0.0, "step": 2504 }, { "epoch": 0.9724308953441136, "grad_norm": 8.504558354616165e-05, "learning_rate": 5.063464813980948e-06, "loss": 0.0, "step": 2506 }, { "epoch": 0.9732069774633029, "grad_norm": 3.6866145819658414e-05, "learning_rate": 5.057118624011702e-06, "loss": 0.0, "step": 2508 }, { "epoch": 0.9739830595824921, "grad_norm": 3.746883157873526e-05, "learning_rate": 5.050772342012966e-06, "loss": 0.0, "step": 2510 }, { "epoch": 0.9747591417016813, "grad_norm": 4.5784752728650346e-05, "learning_rate": 5.044425978209864e-06, "loss": 0.0, "step": 2512 }, { "epoch": 0.9755352238208705, "grad_norm": 7.232214557006955e-05, "learning_rate": 5.038079542827654e-06, "loss": 0.0, "step": 2514 }, { "epoch": 0.9763113059400598, "grad_norm": 2.3140726625570096e-05, "learning_rate": 5.03173304609171e-06, "loss": 0.0, "step": 2516 }, { "epoch": 0.977087388059249, "grad_norm": 3.0297260309453122e-05, "learning_rate": 5.025386498227501e-06, "loss": 0.0, "step": 2518 }, { "epoch": 0.9778634701784382, "grad_norm": 2.9542878110078163e-05, "learning_rate": 5.019039909460584e-06, "loss": 0.0, "step": 2520 }, { "epoch": 0.9778634701784382, "eval_accuracy": 1.0, "eval_loss": 7.142343747545965e-07, "eval_runtime": 126.9479, "eval_samples_per_second": 39.386, "eval_steps_per_second": 9.847, "step": 2520 }, { "epoch": 0.9786395522976274, "grad_norm": 3.342224590596743e-05, "learning_rate": 5.012693290016576e-06, "loss": 0.0, "step": 2522 }, { "epoch": 0.9794156344168168, "grad_norm": 4.349211303633638e-05, "learning_rate": 5.006346650121148e-06, "loss": 0.0, "step": 2524 }, { "epoch": 0.980191716536006, "grad_norm": 5.371720180846751e-05, "learning_rate": 5e-06, "loss": 0.0, "step": 2526 }, { "epoch": 0.9809677986551952, "grad_norm": 8.413937757723033e-05, "learning_rate": 4.993653349878854e-06, "loss": 0.0, "step": 2528 }, { "epoch": 0.9817438807743845, "grad_norm": 3.4149699786212295e-05, "learning_rate": 4.987306709983426e-06, "loss": 0.0, "step": 2530 }, { "epoch": 0.9825199628935737, "grad_norm": 2.7057965780841187e-05, "learning_rate": 4.980960090539417e-06, "loss": 0.0, "step": 2532 }, { "epoch": 0.9832960450127629, "grad_norm": 3.129825563519262e-05, "learning_rate": 4.9746135017725e-06, "loss": 0.0, "step": 2534 }, { "epoch": 0.9840721271319521, "grad_norm": 2.621794556034729e-05, "learning_rate": 4.9682669539082914e-06, "loss": 0.0, "step": 2536 }, { "epoch": 0.9848482092511414, "grad_norm": 2.528310324123595e-05, "learning_rate": 4.961920457172347e-06, "loss": 0.0, "step": 2538 }, { "epoch": 0.9856242913703306, "grad_norm": 2.6144169169128872e-05, "learning_rate": 4.955574021790138e-06, "loss": 0.0, "step": 2540 }, { "epoch": 0.9864003734895198, "grad_norm": 3.326757359900512e-05, "learning_rate": 4.9492276579870355e-06, "loss": 0.0, "step": 2542 }, { "epoch": 0.9871764556087091, "grad_norm": 2.1525325792026706e-05, "learning_rate": 4.9428813759883e-06, "loss": 0.0, "step": 2544 }, { "epoch": 0.9879525377278984, "grad_norm": 3.851413930533454e-05, "learning_rate": 4.936535186019053e-06, "loss": 0.0, "step": 2546 }, { "epoch": 0.9887286198470876, "grad_norm": 2.7478643460199237e-05, "learning_rate": 4.9301890983042744e-06, "loss": 0.0, "step": 2548 }, { "epoch": 0.9895047019662768, "grad_norm": 7.681578426854685e-05, "learning_rate": 4.923843123068776e-06, "loss": 0.0, "step": 2550 }, { "epoch": 0.990280784085466, "grad_norm": 2.4914035748224705e-05, "learning_rate": 4.917497270537188e-06, "loss": 0.0, "step": 2552 }, { "epoch": 0.9910568662046553, "grad_norm": 2.8056459996150807e-05, "learning_rate": 4.911151550933946e-06, "loss": 0.0, "step": 2554 }, { "epoch": 0.9918329483238445, "grad_norm": 3.2389394618803635e-05, "learning_rate": 4.904805974483267e-06, "loss": 0.0, "step": 2556 }, { "epoch": 0.9926090304430337, "grad_norm": 2.2351039660861716e-05, "learning_rate": 4.898460551409141e-06, "loss": 0.0, "step": 2558 }, { "epoch": 0.993385112562223, "grad_norm": 3.937369183404371e-05, "learning_rate": 4.8921152919353105e-06, "loss": 0.0, "step": 2560 }, { "epoch": 0.993385112562223, "eval_accuracy": 1.0, "eval_loss": 5.370461053644249e-07, "eval_runtime": 130.1696, "eval_samples_per_second": 38.411, "eval_steps_per_second": 9.603, "step": 2560 }, { "epoch": 0.9941611946814122, "grad_norm": 3.8049420254537836e-05, "learning_rate": 4.8857702062852515e-06, "loss": 0.0, "step": 2562 }, { "epoch": 0.9949372768006015, "grad_norm": 2.30417208513245e-05, "learning_rate": 4.879425304682164e-06, "loss": 0.0, "step": 2564 }, { "epoch": 0.9957133589197907, "grad_norm": 2.3349539333139546e-05, "learning_rate": 4.873080597348948e-06, "loss": 0.0, "step": 2566 }, { "epoch": 0.99648944103898, "grad_norm": 0.00010747493797680363, "learning_rate": 4.866736094508191e-06, "loss": 0.0, "step": 2568 }, { "epoch": 0.9972655231581692, "grad_norm": 2.7326119379722513e-05, "learning_rate": 4.860391806382157e-06, "loss": 0.0, "step": 2570 }, { "epoch": 0.9980416052773584, "grad_norm": 2.021576619881671e-05, "learning_rate": 4.854047743192752e-06, "loss": 0.0, "step": 2572 }, { "epoch": 0.9988176873965476, "grad_norm": 3.7446698115672916e-05, "learning_rate": 4.847703915161531e-06, "loss": 0.0, "step": 2574 }, { "epoch": 0.9995937695157369, "grad_norm": 2.8720238333335146e-05, "learning_rate": 4.841360332509663e-06, "loss": 0.0, "step": 2576 }, { "epoch": 1.0, "grad_norm": 8.942670683609322e-05, "learning_rate": 4.835017005457926e-06, "loss": 0.0, "step": 2578 }, { "epoch": 1.0007760821191893, "grad_norm": 4.199489194434136e-05, "learning_rate": 4.828673944226684e-06, "loss": 0.0, "step": 2580 }, { "epoch": 1.0015521642383785, "grad_norm": 3.3291569707216695e-05, "learning_rate": 4.822331159035873e-06, "loss": 0.0, "step": 2582 }, { "epoch": 1.0023282463575678, "grad_norm": 2.7658177714329213e-05, "learning_rate": 4.815988660104986e-06, "loss": 0.0, "step": 2584 }, { "epoch": 1.003104328476757, "grad_norm": 2.017567567236256e-05, "learning_rate": 4.809646457653051e-06, "loss": 0.0, "step": 2586 }, { "epoch": 1.0038804105959462, "grad_norm": 2.243245398858562e-05, "learning_rate": 4.803304561898622e-06, "loss": 0.0, "step": 2588 }, { "epoch": 1.0046564927151354, "grad_norm": 2.4132619728334248e-05, "learning_rate": 4.796962983059757e-06, "loss": 0.0, "step": 2590 }, { "epoch": 1.0054325748343247, "grad_norm": 3.528342131176032e-05, "learning_rate": 4.7906217313540035e-06, "loss": 0.0, "step": 2592 }, { "epoch": 1.0062086569535138, "grad_norm": 1.6356214473489672e-05, "learning_rate": 4.784280816998382e-06, "loss": 0.0, "step": 2594 }, { "epoch": 1.0069847390727031, "grad_norm": 2.475641667842865e-05, "learning_rate": 4.777940250209369e-06, "loss": 0.0, "step": 2596 }, { "epoch": 1.0077608211918925, "grad_norm": 2.6695679480326362e-05, "learning_rate": 4.771600041202884e-06, "loss": 0.0, "step": 2598 }, { "epoch": 1.0085369033110816, "grad_norm": 3.237533746869303e-05, "learning_rate": 4.765260200194266e-06, "loss": 0.0, "step": 2600 }, { "epoch": 1.0085369033110816, "eval_accuracy": 1.0, "eval_loss": 6.079633294575615e-07, "eval_runtime": 127.1999, "eval_samples_per_second": 39.308, "eval_steps_per_second": 9.827, "step": 2600 }, { "epoch": 1.009312985430271, "grad_norm": 2.389101791777648e-05, "learning_rate": 4.7589207373982635e-06, "loss": 0.0, "step": 2602 }, { "epoch": 1.01008906754946, "grad_norm": 9.792613855097443e-05, "learning_rate": 4.7525816630290135e-06, "loss": 0.0, "step": 2604 }, { "epoch": 1.0108651496686494, "grad_norm": 2.7967547794105485e-05, "learning_rate": 4.74624298730003e-06, "loss": 0.0, "step": 2606 }, { "epoch": 1.0116412317878385, "grad_norm": 2.7776595743489452e-05, "learning_rate": 4.7399047204241826e-06, "loss": 0.0, "step": 2608 }, { "epoch": 1.0124173139070278, "grad_norm": 2.291197597514838e-05, "learning_rate": 4.733566872613683e-06, "loss": 0.0, "step": 2610 }, { "epoch": 1.013193396026217, "grad_norm": 6.0257974837441e-05, "learning_rate": 4.727229454080068e-06, "loss": 0.0, "step": 2612 }, { "epoch": 1.0139694781454063, "grad_norm": 1.9055862139794044e-05, "learning_rate": 4.720892475034181e-06, "loss": 0.0, "step": 2614 }, { "epoch": 1.0147455602645954, "grad_norm": 2.2528010958922096e-05, "learning_rate": 4.71455594568616e-06, "loss": 0.0, "step": 2616 }, { "epoch": 1.0155216423837847, "grad_norm": 4.432563582668081e-05, "learning_rate": 4.7082198762454165e-06, "loss": 0.0, "step": 2618 }, { "epoch": 1.016297724502974, "grad_norm": 2.9877963243052363e-05, "learning_rate": 4.701884276920622e-06, "loss": 0.0, "step": 2620 }, { "epoch": 1.0170738066221632, "grad_norm": 3.16669320454821e-05, "learning_rate": 4.69554915791969e-06, "loss": 0.0, "step": 2622 }, { "epoch": 1.0178498887413525, "grad_norm": 2.8538424885482527e-05, "learning_rate": 4.689214529449758e-06, "loss": 0.0, "step": 2624 }, { "epoch": 1.0186259708605416, "grad_norm": 4.4377615267876536e-05, "learning_rate": 4.682880401717178e-06, "loss": 0.0, "step": 2626 }, { "epoch": 1.019402052979731, "grad_norm": 4.03338490286842e-05, "learning_rate": 4.676546784927491e-06, "loss": 0.0, "step": 2628 }, { "epoch": 1.02017813509892, "grad_norm": 2.113764094247017e-05, "learning_rate": 4.670213689285418e-06, "loss": 0.0, "step": 2630 }, { "epoch": 1.0209542172181094, "grad_norm": 2.6853454983211122e-05, "learning_rate": 4.663881124994837e-06, "loss": 0.0, "step": 2632 }, { "epoch": 1.0217302993372985, "grad_norm": 4.063413507537916e-05, "learning_rate": 4.6575491022587714e-06, "loss": 0.0, "step": 2634 }, { "epoch": 1.0225063814564879, "grad_norm": 7.893393194535747e-05, "learning_rate": 4.651217631279374e-06, "loss": 0.0, "step": 2636 }, { "epoch": 1.0232824635756772, "grad_norm": 2.4139138986356556e-05, "learning_rate": 4.644886722257905e-06, "loss": 0.0, "step": 2638 }, { "epoch": 1.0240585456948663, "grad_norm": 3.022322380275e-05, "learning_rate": 4.638556385394721e-06, "loss": 0.0, "step": 2640 }, { "epoch": 1.0240585456948663, "eval_accuracy": 1.0, "eval_loss": 6.413867481569469e-07, "eval_runtime": 128.5888, "eval_samples_per_second": 38.884, "eval_steps_per_second": 9.721, "step": 2640 }, { "epoch": 1.0248346278140557, "grad_norm": 2.86602971755201e-05, "learning_rate": 4.632226630889258e-06, "loss": 0.0, "step": 2642 }, { "epoch": 1.0256107099332448, "grad_norm": 2.431630127830431e-05, "learning_rate": 4.625897468940012e-06, "loss": 0.0, "step": 2644 }, { "epoch": 1.026386792052434, "grad_norm": 3.166431270074099e-05, "learning_rate": 4.619568909744524e-06, "loss": 0.0, "step": 2646 }, { "epoch": 1.0271628741716232, "grad_norm": 3.738111263373867e-05, "learning_rate": 4.6132409634993655e-06, "loss": 0.0, "step": 2648 }, { "epoch": 1.0279389562908126, "grad_norm": 2.731166205194313e-05, "learning_rate": 4.606913640400118e-06, "loss": 0.0, "step": 2650 }, { "epoch": 1.0287150384100017, "grad_norm": 2.4937800844782032e-05, "learning_rate": 4.600586950641362e-06, "loss": 0.0, "step": 2652 }, { "epoch": 1.029491120529191, "grad_norm": 1.641696144361049e-05, "learning_rate": 4.594260904416656e-06, "loss": 0.0, "step": 2654 }, { "epoch": 1.0302672026483801, "grad_norm": 3.1487983505940065e-05, "learning_rate": 4.587935511918521e-06, "loss": 0.0, "step": 2656 }, { "epoch": 1.0310432847675695, "grad_norm": 2.828125434461981e-05, "learning_rate": 4.581610783338424e-06, "loss": 0.0, "step": 2658 }, { "epoch": 1.0318193668867588, "grad_norm": 2.7230045816395432e-05, "learning_rate": 4.575286728866765e-06, "loss": 0.0, "step": 2660 }, { "epoch": 1.032595449005948, "grad_norm": 3.8712336390744895e-05, "learning_rate": 4.568963358692856e-06, "loss": 0.0, "step": 2662 }, { "epoch": 1.0333715311251372, "grad_norm": 1.5129097846511286e-05, "learning_rate": 4.562640683004907e-06, "loss": 0.0, "step": 2664 }, { "epoch": 1.0341476132443264, "grad_norm": 2.079862679238431e-05, "learning_rate": 4.55631871199001e-06, "loss": 0.0, "step": 2666 }, { "epoch": 1.0349236953635157, "grad_norm": 3.13880700559821e-05, "learning_rate": 4.549997455834121e-06, "loss": 0.0, "step": 2668 }, { "epoch": 1.0356997774827048, "grad_norm": 1.3894719813833944e-05, "learning_rate": 4.543676924722042e-06, "loss": 0.0, "step": 2670 }, { "epoch": 1.0364758596018941, "grad_norm": 3.6356126656755805e-05, "learning_rate": 4.53735712883741e-06, "loss": 0.0, "step": 2672 }, { "epoch": 1.0372519417210833, "grad_norm": 3.1543146178591996e-05, "learning_rate": 4.531038078362675e-06, "loss": 0.0, "step": 2674 }, { "epoch": 1.0380280238402726, "grad_norm": 2.4299282813444734e-05, "learning_rate": 4.524719783479088e-06, "loss": 0.0, "step": 2676 }, { "epoch": 1.038804105959462, "grad_norm": 3.419563654460944e-05, "learning_rate": 4.518402254366681e-06, "loss": 0.0, "step": 2678 }, { "epoch": 1.039580188078651, "grad_norm": 2.9530883693951182e-05, "learning_rate": 4.512085501204254e-06, "loss": 0.0, "step": 2680 }, { "epoch": 1.039580188078651, "eval_accuracy": 1.0, "eval_loss": 6.419026590265275e-07, "eval_runtime": 125.2164, "eval_samples_per_second": 39.931, "eval_steps_per_second": 9.983, "step": 2680 } ], "logging_steps": 2, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 40, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.9423125972189184e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }