{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.305175263849447, "eval_steps": 100, "global_step": 3850, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006781672529987708, "grad_norm": 19.302452087402344, "learning_rate": 1.0000000000000002e-06, "loss": 0.7636, "step": 2 }, { "epoch": 0.0013563345059975417, "grad_norm": 17.56575584411621, "learning_rate": 3e-06, "loss": 0.6728, "step": 4 }, { "epoch": 0.0020345017589963126, "grad_norm": 13.646787643432617, "learning_rate": 5e-06, "loss": 0.5993, "step": 6 }, { "epoch": 0.0027126690119950833, "grad_norm": 4.150851726531982, "learning_rate": 7.000000000000001e-06, "loss": 0.3705, "step": 8 }, { "epoch": 0.003390836264993854, "grad_norm": 3.0254945755004883, "learning_rate": 9e-06, "loss": 0.2513, "step": 10 }, { "epoch": 0.004069003517992625, "grad_norm": 2.2863237857818604, "learning_rate": 1.1000000000000001e-05, "loss": 0.189, "step": 12 }, { "epoch": 0.004747170770991396, "grad_norm": 1.6792925596237183, "learning_rate": 1.3000000000000001e-05, "loss": 0.0997, "step": 14 }, { "epoch": 0.005425338023990167, "grad_norm": 1.6237634420394897, "learning_rate": 1.5e-05, "loss": 0.0595, "step": 16 }, { "epoch": 0.006103505276988937, "grad_norm": 1.5864434242248535, "learning_rate": 1.7000000000000003e-05, "loss": 0.0527, "step": 18 }, { "epoch": 0.006781672529987708, "grad_norm": 1.0857024192810059, "learning_rate": 1.9e-05, "loss": 0.0437, "step": 20 }, { "epoch": 0.007459839782986479, "grad_norm": 0.7278645634651184, "learning_rate": 2.1e-05, "loss": 0.0376, "step": 22 }, { "epoch": 0.00813800703598525, "grad_norm": 1.2589967250823975, "learning_rate": 2.3000000000000003e-05, "loss": 0.0301, "step": 24 }, { "epoch": 0.00881617428898402, "grad_norm": 1.5093748569488525, "learning_rate": 2.5e-05, "loss": 0.0269, "step": 26 }, { "epoch": 0.009494341541982792, "grad_norm": 1.0473034381866455, "learning_rate": 2.7000000000000002e-05, "loss": 0.0284, "step": 28 }, { "epoch": 0.010172508794981562, "grad_norm": 0.6267300844192505, "learning_rate": 2.9e-05, "loss": 0.0188, "step": 30 }, { "epoch": 0.010850676047980333, "grad_norm": 0.6511037349700928, "learning_rate": 3.1e-05, "loss": 0.0195, "step": 32 }, { "epoch": 0.011528843300979103, "grad_norm": 0.47719255089759827, "learning_rate": 3.3e-05, "loss": 0.0133, "step": 34 }, { "epoch": 0.012207010553977875, "grad_norm": 1.834301233291626, "learning_rate": 3.5e-05, "loss": 0.0234, "step": 36 }, { "epoch": 0.012885177806976646, "grad_norm": 0.5575568675994873, "learning_rate": 3.7e-05, "loss": 0.0178, "step": 38 }, { "epoch": 0.013563345059975416, "grad_norm": 0.4530166983604431, "learning_rate": 3.9000000000000006e-05, "loss": 0.0158, "step": 40 }, { "epoch": 0.014241512312974188, "grad_norm": 0.41800159215927124, "learning_rate": 4.1e-05, "loss": 0.0168, "step": 42 }, { "epoch": 0.014919679565972958, "grad_norm": 13.026510238647461, "learning_rate": 4.3e-05, "loss": 0.0292, "step": 44 }, { "epoch": 0.01559784681897173, "grad_norm": 0.39133763313293457, "learning_rate": 4.5e-05, "loss": 0.0147, "step": 46 }, { "epoch": 0.0162760140719705, "grad_norm": 0.3970816135406494, "learning_rate": 4.7e-05, "loss": 0.0121, "step": 48 }, { "epoch": 0.01695418132496927, "grad_norm": 0.34943029284477234, "learning_rate": 4.9e-05, "loss": 0.0109, "step": 50 }, { "epoch": 0.01763234857796804, "grad_norm": 0.46713560819625854, "learning_rate": 5.1000000000000006e-05, "loss": 0.0111, "step": 52 }, { "epoch": 0.018310515830966812, "grad_norm": 0.3805915415287018, "learning_rate": 5.300000000000001e-05, "loss": 0.0068, "step": 54 }, { "epoch": 0.018988683083965584, "grad_norm": 0.6533093452453613, "learning_rate": 5.500000000000001e-05, "loss": 0.0113, "step": 56 }, { "epoch": 0.019666850336964355, "grad_norm": 0.5974678993225098, "learning_rate": 5.6999999999999996e-05, "loss": 0.0073, "step": 58 }, { "epoch": 0.020345017589963123, "grad_norm": 0.44925084710121155, "learning_rate": 5.9e-05, "loss": 0.0082, "step": 60 }, { "epoch": 0.021023184842961895, "grad_norm": 0.7732060551643372, "learning_rate": 6.1e-05, "loss": 0.0101, "step": 62 }, { "epoch": 0.021701352095960667, "grad_norm": 0.9566466808319092, "learning_rate": 6.3e-05, "loss": 0.0128, "step": 64 }, { "epoch": 0.022379519348959438, "grad_norm": 0.5865803360939026, "learning_rate": 6.500000000000001e-05, "loss": 0.0091, "step": 66 }, { "epoch": 0.023057686601958206, "grad_norm": 0.3004082143306732, "learning_rate": 6.7e-05, "loss": 0.0097, "step": 68 }, { "epoch": 0.023735853854956978, "grad_norm": 0.34375691413879395, "learning_rate": 6.9e-05, "loss": 0.0086, "step": 70 }, { "epoch": 0.02441402110795575, "grad_norm": 0.5289464592933655, "learning_rate": 7.1e-05, "loss": 0.0554, "step": 72 }, { "epoch": 0.02509218836095452, "grad_norm": 0.5276769399642944, "learning_rate": 7.3e-05, "loss": 0.0089, "step": 74 }, { "epoch": 0.025770355613953293, "grad_norm": 0.3362698554992676, "learning_rate": 7.500000000000001e-05, "loss": 0.0075, "step": 76 }, { "epoch": 0.02644852286695206, "grad_norm": 0.3724222779273987, "learning_rate": 7.7e-05, "loss": 0.0069, "step": 78 }, { "epoch": 0.027126690119950832, "grad_norm": 0.33922865986824036, "learning_rate": 7.900000000000001e-05, "loss": 0.007, "step": 80 }, { "epoch": 0.027804857372949604, "grad_norm": 0.25453755259513855, "learning_rate": 8.1e-05, "loss": 0.005, "step": 82 }, { "epoch": 0.028483024625948376, "grad_norm": 0.21005849540233612, "learning_rate": 8.3e-05, "loss": 0.0043, "step": 84 }, { "epoch": 0.029161191878947144, "grad_norm": 0.22220443189144135, "learning_rate": 8.5e-05, "loss": 0.0046, "step": 86 }, { "epoch": 0.029839359131945915, "grad_norm": 0.2243790626525879, "learning_rate": 8.7e-05, "loss": 0.0036, "step": 88 }, { "epoch": 0.030517526384944687, "grad_norm": 0.2276381105184555, "learning_rate": 8.900000000000001e-05, "loss": 0.0051, "step": 90 }, { "epoch": 0.03119569363794346, "grad_norm": 0.19878089427947998, "learning_rate": 9.1e-05, "loss": 0.004, "step": 92 }, { "epoch": 0.03187386089094223, "grad_norm": 0.20180144906044006, "learning_rate": 9.300000000000001e-05, "loss": 0.0055, "step": 94 }, { "epoch": 0.032552028143941, "grad_norm": 0.22953402996063232, "learning_rate": 9.5e-05, "loss": 0.0036, "step": 96 }, { "epoch": 0.03323019539693977, "grad_norm": 0.20858259499073029, "learning_rate": 9.7e-05, "loss": 0.0039, "step": 98 }, { "epoch": 0.03390836264993854, "grad_norm": 0.19686263799667358, "learning_rate": 9.900000000000001e-05, "loss": 0.0042, "step": 100 }, { "epoch": 0.03458652990293731, "grad_norm": 0.1803613305091858, "learning_rate": 9.999999748250068e-05, "loss": 0.0048, "step": 102 }, { "epoch": 0.03526469715593608, "grad_norm": 0.28180426359176636, "learning_rate": 9.999997734250768e-05, "loss": 0.0051, "step": 104 }, { "epoch": 0.035942864408934856, "grad_norm": 0.2615203559398651, "learning_rate": 9.999993706252977e-05, "loss": 0.0036, "step": 106 }, { "epoch": 0.036621031661933624, "grad_norm": 0.200982928276062, "learning_rate": 9.999987664258321e-05, "loss": 0.0042, "step": 108 }, { "epoch": 0.03729919891493239, "grad_norm": 0.21157991886138916, "learning_rate": 9.999979608269231e-05, "loss": 0.0038, "step": 110 }, { "epoch": 0.03797736616793117, "grad_norm": 0.16271452605724335, "learning_rate": 9.999969538288952e-05, "loss": 0.0023, "step": 112 }, { "epoch": 0.038655533420929936, "grad_norm": 0.15799306333065033, "learning_rate": 9.999957454321543e-05, "loss": 0.0032, "step": 114 }, { "epoch": 0.03933370067392871, "grad_norm": 0.23333343863487244, "learning_rate": 9.999943356371866e-05, "loss": 0.0029, "step": 116 }, { "epoch": 0.04001186792692748, "grad_norm": 0.2120216190814972, "learning_rate": 9.999927244445606e-05, "loss": 0.0032, "step": 118 }, { "epoch": 0.04069003517992625, "grad_norm": 0.12919344007968903, "learning_rate": 9.999909118549248e-05, "loss": 0.0016, "step": 120 }, { "epoch": 0.04136820243292502, "grad_norm": 0.12390448898077011, "learning_rate": 9.999888978690095e-05, "loss": 0.0025, "step": 122 }, { "epoch": 0.04204636968592379, "grad_norm": 0.21758395433425903, "learning_rate": 9.99986682487626e-05, "loss": 0.0026, "step": 124 }, { "epoch": 0.042724536938922565, "grad_norm": 0.21877099573612213, "learning_rate": 9.999842657116665e-05, "loss": 0.0017, "step": 126 }, { "epoch": 0.04340270419192133, "grad_norm": 0.1546417772769928, "learning_rate": 9.999816475421046e-05, "loss": 0.0026, "step": 128 }, { "epoch": 0.0440808714449201, "grad_norm": 0.1697741150856018, "learning_rate": 9.999788279799951e-05, "loss": 0.0021, "step": 130 }, { "epoch": 0.044759038697918876, "grad_norm": 0.23591545224189758, "learning_rate": 9.999758070264732e-05, "loss": 0.0021, "step": 132 }, { "epoch": 0.045437205950917645, "grad_norm": 0.23597721755504608, "learning_rate": 9.999725846827562e-05, "loss": 0.0022, "step": 134 }, { "epoch": 0.04611537320391641, "grad_norm": 0.09806650131940842, "learning_rate": 9.999691609501417e-05, "loss": 0.0014, "step": 136 }, { "epoch": 0.04679354045691519, "grad_norm": 0.17510582506656647, "learning_rate": 9.99965535830009e-05, "loss": 0.0011, "step": 138 }, { "epoch": 0.047471707709913956, "grad_norm": 0.16027089953422546, "learning_rate": 9.999617093238182e-05, "loss": 0.002, "step": 140 }, { "epoch": 0.04814987496291273, "grad_norm": 0.14597536623477936, "learning_rate": 9.999576814331109e-05, "loss": 0.0013, "step": 142 }, { "epoch": 0.0488280422159115, "grad_norm": 0.2963673770427704, "learning_rate": 9.999534521595093e-05, "loss": 0.0019, "step": 144 }, { "epoch": 0.04950620946891027, "grad_norm": 0.32802748680114746, "learning_rate": 9.999490215047167e-05, "loss": 0.0022, "step": 146 }, { "epoch": 0.05018437672190904, "grad_norm": 0.17002712190151215, "learning_rate": 9.999443894705182e-05, "loss": 0.0016, "step": 148 }, { "epoch": 0.05086254397490781, "grad_norm": 0.10025887191295624, "learning_rate": 9.999395560587795e-05, "loss": 0.0014, "step": 150 }, { "epoch": 0.051540711227906585, "grad_norm": 0.24780897796154022, "learning_rate": 9.999345212714471e-05, "loss": 0.0022, "step": 152 }, { "epoch": 0.052218878480905354, "grad_norm": 0.18174393475055695, "learning_rate": 9.999292851105495e-05, "loss": 0.0023, "step": 154 }, { "epoch": 0.05289704573390412, "grad_norm": 0.34791404008865356, "learning_rate": 9.999238475781957e-05, "loss": 0.0033, "step": 156 }, { "epoch": 0.0535752129869029, "grad_norm": 0.30141982436180115, "learning_rate": 9.999182086765756e-05, "loss": 0.0033, "step": 158 }, { "epoch": 0.054253380239901665, "grad_norm": 0.26267531514167786, "learning_rate": 9.999123684079612e-05, "loss": 0.002, "step": 160 }, { "epoch": 0.05493154749290044, "grad_norm": 0.08853629976511002, "learning_rate": 9.999063267747044e-05, "loss": 0.0022, "step": 162 }, { "epoch": 0.05560971474589921, "grad_norm": 2.4076087474823, "learning_rate": 9.999000837792391e-05, "loss": 0.0052, "step": 164 }, { "epoch": 0.056287881998897976, "grad_norm": 1.0381807088851929, "learning_rate": 9.998936394240797e-05, "loss": 0.0046, "step": 166 }, { "epoch": 0.05696604925189675, "grad_norm": 0.7304306626319885, "learning_rate": 9.998869937118222e-05, "loss": 0.015, "step": 168 }, { "epoch": 0.05764421650489552, "grad_norm": 1.530617594718933, "learning_rate": 9.998801466451433e-05, "loss": 0.0075, "step": 170 }, { "epoch": 0.05832238375789429, "grad_norm": 11.509839057922363, "learning_rate": 9.998730982268012e-05, "loss": 0.0751, "step": 172 }, { "epoch": 0.05900055101089306, "grad_norm": 2.877173662185669, "learning_rate": 9.998658484596349e-05, "loss": 0.0189, "step": 174 }, { "epoch": 0.05967871826389183, "grad_norm": 0.45325320959091187, "learning_rate": 9.998583973465646e-05, "loss": 0.0055, "step": 176 }, { "epoch": 0.060356885516890606, "grad_norm": 0.32094088196754456, "learning_rate": 9.998507448905917e-05, "loss": 0.0037, "step": 178 }, { "epoch": 0.061035052769889374, "grad_norm": 0.36653316020965576, "learning_rate": 9.998428910947983e-05, "loss": 0.0098, "step": 180 }, { "epoch": 0.06171322002288814, "grad_norm": 0.2647911608219147, "learning_rate": 9.998348359623484e-05, "loss": 0.0063, "step": 182 }, { "epoch": 0.06239138727588692, "grad_norm": 1.8155499696731567, "learning_rate": 9.998265794964863e-05, "loss": 0.019, "step": 184 }, { "epoch": 0.06306955452888569, "grad_norm": 0.6207036972045898, "learning_rate": 9.998181217005376e-05, "loss": 0.0094, "step": 186 }, { "epoch": 0.06374772178188445, "grad_norm": 0.26838263869285583, "learning_rate": 9.998094625779094e-05, "loss": 0.0048, "step": 188 }, { "epoch": 0.06442588903488322, "grad_norm": 0.21736088395118713, "learning_rate": 9.998006021320893e-05, "loss": 0.0046, "step": 190 }, { "epoch": 0.065104056287882, "grad_norm": 0.22532005608081818, "learning_rate": 9.997915403666466e-05, "loss": 0.004, "step": 192 }, { "epoch": 0.06578222354088077, "grad_norm": 0.35037961602211, "learning_rate": 9.997822772852312e-05, "loss": 0.0028, "step": 194 }, { "epoch": 0.06646039079387954, "grad_norm": 0.5995946526527405, "learning_rate": 9.997728128915742e-05, "loss": 0.0031, "step": 196 }, { "epoch": 0.06713855804687831, "grad_norm": 0.1885141283273697, "learning_rate": 9.99763147189488e-05, "loss": 0.0035, "step": 198 }, { "epoch": 0.06781672529987708, "grad_norm": 0.6732299327850342, "learning_rate": 9.997532801828658e-05, "loss": 0.0037, "step": 200 }, { "epoch": 0.06849489255287586, "grad_norm": 0.6802183389663696, "learning_rate": 9.997432118756822e-05, "loss": 0.0161, "step": 202 }, { "epoch": 0.06917305980587463, "grad_norm": 0.1995115876197815, "learning_rate": 9.997329422719926e-05, "loss": 0.0036, "step": 204 }, { "epoch": 0.0698512270588734, "grad_norm": 0.156790092587471, "learning_rate": 9.997224713759335e-05, "loss": 0.0053, "step": 206 }, { "epoch": 0.07052939431187216, "grad_norm": 0.11744780093431473, "learning_rate": 9.997117991917228e-05, "loss": 0.0022, "step": 208 }, { "epoch": 0.07120756156487093, "grad_norm": 0.16716186702251434, "learning_rate": 9.997009257236592e-05, "loss": 0.0026, "step": 210 }, { "epoch": 0.07188572881786971, "grad_norm": 0.15055015683174133, "learning_rate": 9.996898509761224e-05, "loss": 0.0028, "step": 212 }, { "epoch": 0.07256389607086848, "grad_norm": 0.5399858951568604, "learning_rate": 9.996785749535733e-05, "loss": 0.0031, "step": 214 }, { "epoch": 0.07324206332386725, "grad_norm": 0.3503137528896332, "learning_rate": 9.99667097660554e-05, "loss": 0.0036, "step": 216 }, { "epoch": 0.07392023057686602, "grad_norm": 0.09014566987752914, "learning_rate": 9.996554191016875e-05, "loss": 0.0022, "step": 218 }, { "epoch": 0.07459839782986478, "grad_norm": 0.11608397960662842, "learning_rate": 9.996435392816781e-05, "loss": 0.0023, "step": 220 }, { "epoch": 0.07527656508286357, "grad_norm": 0.08935557305812836, "learning_rate": 9.996314582053106e-05, "loss": 0.0021, "step": 222 }, { "epoch": 0.07595473233586233, "grad_norm": 0.21059103310108185, "learning_rate": 9.996191758774515e-05, "loss": 0.0023, "step": 224 }, { "epoch": 0.0766328995888611, "grad_norm": 0.09767665714025497, "learning_rate": 9.996066923030484e-05, "loss": 0.0009, "step": 226 }, { "epoch": 0.07731106684185987, "grad_norm": 0.10100996494293213, "learning_rate": 9.995940074871291e-05, "loss": 0.0019, "step": 228 }, { "epoch": 0.07798923409485864, "grad_norm": 0.12427397817373276, "learning_rate": 9.995811214348034e-05, "loss": 0.001, "step": 230 }, { "epoch": 0.07866740134785742, "grad_norm": 0.16718646883964539, "learning_rate": 9.995680341512619e-05, "loss": 0.001, "step": 232 }, { "epoch": 0.07934556860085619, "grad_norm": 0.09956853836774826, "learning_rate": 9.995547456417757e-05, "loss": 0.0012, "step": 234 }, { "epoch": 0.08002373585385496, "grad_norm": 0.10336117446422577, "learning_rate": 9.995412559116979e-05, "loss": 0.0012, "step": 236 }, { "epoch": 0.08070190310685373, "grad_norm": 0.08326272666454315, "learning_rate": 9.995275649664619e-05, "loss": 0.0009, "step": 238 }, { "epoch": 0.0813800703598525, "grad_norm": 0.1553446352481842, "learning_rate": 9.995136728115825e-05, "loss": 0.0018, "step": 240 }, { "epoch": 0.08205823761285128, "grad_norm": 0.19187629222869873, "learning_rate": 9.994995794526554e-05, "loss": 0.0009, "step": 242 }, { "epoch": 0.08273640486585004, "grad_norm": 0.12646210193634033, "learning_rate": 9.994852848953574e-05, "loss": 0.0021, "step": 244 }, { "epoch": 0.08341457211884881, "grad_norm": 0.0978604182600975, "learning_rate": 9.994707891454464e-05, "loss": 0.0011, "step": 246 }, { "epoch": 0.08409273937184758, "grad_norm": 0.1135120764374733, "learning_rate": 9.994560922087612e-05, "loss": 0.001, "step": 248 }, { "epoch": 0.08477090662484635, "grad_norm": 0.1019337996840477, "learning_rate": 9.994411940912219e-05, "loss": 0.0011, "step": 250 }, { "epoch": 0.08544907387784513, "grad_norm": 0.08082269132137299, "learning_rate": 9.994260947988293e-05, "loss": 0.0008, "step": 252 }, { "epoch": 0.0861272411308439, "grad_norm": 0.03959016129374504, "learning_rate": 9.994107943376654e-05, "loss": 0.0007, "step": 254 }, { "epoch": 0.08680540838384267, "grad_norm": 0.06835722178220749, "learning_rate": 9.993952927138934e-05, "loss": 0.0007, "step": 256 }, { "epoch": 0.08748357563684143, "grad_norm": 0.03966667875647545, "learning_rate": 9.99379589933757e-05, "loss": 0.0007, "step": 258 }, { "epoch": 0.0881617428898402, "grad_norm": 0.050738293677568436, "learning_rate": 9.993636860035816e-05, "loss": 0.0012, "step": 260 }, { "epoch": 0.08883991014283897, "grad_norm": 0.03119371458888054, "learning_rate": 9.993475809297732e-05, "loss": 0.001, "step": 262 }, { "epoch": 0.08951807739583775, "grad_norm": 0.05491959676146507, "learning_rate": 9.99331274718819e-05, "loss": 0.0009, "step": 264 }, { "epoch": 0.09019624464883652, "grad_norm": 0.07895030081272125, "learning_rate": 9.99314767377287e-05, "loss": 0.0009, "step": 266 }, { "epoch": 0.09087441190183529, "grad_norm": 0.062128305435180664, "learning_rate": 9.992980589118264e-05, "loss": 0.0008, "step": 268 }, { "epoch": 0.09155257915483406, "grad_norm": 0.08582911640405655, "learning_rate": 9.992811493291674e-05, "loss": 0.001, "step": 270 }, { "epoch": 0.09223074640783283, "grad_norm": 0.055452585220336914, "learning_rate": 9.992640386361211e-05, "loss": 0.0006, "step": 272 }, { "epoch": 0.09290891366083161, "grad_norm": 0.035648979246616364, "learning_rate": 9.992467268395798e-05, "loss": 0.0004, "step": 274 }, { "epoch": 0.09358708091383038, "grad_norm": 0.09037120640277863, "learning_rate": 9.992292139465165e-05, "loss": 0.0008, "step": 276 }, { "epoch": 0.09426524816682914, "grad_norm": 0.05466959998011589, "learning_rate": 9.992114999639857e-05, "loss": 0.0006, "step": 278 }, { "epoch": 0.09494341541982791, "grad_norm": 0.048906926065683365, "learning_rate": 9.991935848991222e-05, "loss": 0.0011, "step": 280 }, { "epoch": 0.09562158267282668, "grad_norm": 0.028933236375451088, "learning_rate": 9.991754687591428e-05, "loss": 0.0004, "step": 282 }, { "epoch": 0.09629974992582546, "grad_norm": 0.2024703323841095, "learning_rate": 9.991571515513439e-05, "loss": 0.0012, "step": 284 }, { "epoch": 0.09697791717882423, "grad_norm": 0.05048273503780365, "learning_rate": 9.991386332831042e-05, "loss": 0.0007, "step": 286 }, { "epoch": 0.097656084431823, "grad_norm": 0.04887642711400986, "learning_rate": 9.991199139618827e-05, "loss": 0.0004, "step": 288 }, { "epoch": 0.09833425168482177, "grad_norm": 0.13716323673725128, "learning_rate": 9.991009935952196e-05, "loss": 0.0014, "step": 290 }, { "epoch": 0.09901241893782053, "grad_norm": 0.06719297915697098, "learning_rate": 9.990818721907358e-05, "loss": 0.0004, "step": 292 }, { "epoch": 0.09969058619081932, "grad_norm": 0.024962617084383965, "learning_rate": 9.990625497561337e-05, "loss": 0.0005, "step": 294 }, { "epoch": 0.10036875344381808, "grad_norm": 0.07603111863136292, "learning_rate": 9.990430262991962e-05, "loss": 0.0009, "step": 296 }, { "epoch": 0.10104692069681685, "grad_norm": 0.06402367353439331, "learning_rate": 9.990233018277872e-05, "loss": 0.0004, "step": 298 }, { "epoch": 0.10172508794981562, "grad_norm": 0.013269501738250256, "learning_rate": 9.990033763498523e-05, "loss": 0.0002, "step": 300 }, { "epoch": 0.10240325520281439, "grad_norm": 0.04440806433558464, "learning_rate": 9.989832498734168e-05, "loss": 0.0006, "step": 302 }, { "epoch": 0.10308142245581317, "grad_norm": 0.09003011137247086, "learning_rate": 9.989629224065879e-05, "loss": 0.0009, "step": 304 }, { "epoch": 0.10375958970881194, "grad_norm": 0.055034276098012924, "learning_rate": 9.989423939575538e-05, "loss": 0.0005, "step": 306 }, { "epoch": 0.10443775696181071, "grad_norm": 0.0314326211810112, "learning_rate": 9.989216645345827e-05, "loss": 0.0003, "step": 308 }, { "epoch": 0.10511592421480948, "grad_norm": 0.08611449599266052, "learning_rate": 9.98900734146025e-05, "loss": 0.0003, "step": 310 }, { "epoch": 0.10579409146780824, "grad_norm": 0.0826747715473175, "learning_rate": 9.988796028003114e-05, "loss": 0.0009, "step": 312 }, { "epoch": 0.10647225872080703, "grad_norm": 0.056868743151426315, "learning_rate": 9.988582705059531e-05, "loss": 0.0003, "step": 314 }, { "epoch": 0.1071504259738058, "grad_norm": 0.04921082779765129, "learning_rate": 9.988367372715434e-05, "loss": 0.0002, "step": 316 }, { "epoch": 0.10782859322680456, "grad_norm": 0.11438657343387604, "learning_rate": 9.988150031057555e-05, "loss": 0.0004, "step": 318 }, { "epoch": 0.10850676047980333, "grad_norm": 0.027646591886878014, "learning_rate": 9.987930680173438e-05, "loss": 0.0002, "step": 320 }, { "epoch": 0.1091849277328021, "grad_norm": 0.07535082846879959, "learning_rate": 9.987709320151441e-05, "loss": 0.0005, "step": 322 }, { "epoch": 0.10986309498580088, "grad_norm": 0.011354583315551281, "learning_rate": 9.987485951080727e-05, "loss": 0.0003, "step": 324 }, { "epoch": 0.11054126223879965, "grad_norm": 0.10233359783887863, "learning_rate": 9.987260573051269e-05, "loss": 0.0011, "step": 326 }, { "epoch": 0.11121942949179842, "grad_norm": 0.04623352363705635, "learning_rate": 9.987033186153847e-05, "loss": 0.0004, "step": 328 }, { "epoch": 0.11189759674479718, "grad_norm": 0.09176378697156906, "learning_rate": 9.986803790480054e-05, "loss": 0.0005, "step": 330 }, { "epoch": 0.11257576399779595, "grad_norm": 0.007960623130202293, "learning_rate": 9.986572386122291e-05, "loss": 0.0004, "step": 332 }, { "epoch": 0.11325393125079472, "grad_norm": 0.07089328020811081, "learning_rate": 9.986338973173768e-05, "loss": 0.0006, "step": 334 }, { "epoch": 0.1139320985037935, "grad_norm": 0.03843646124005318, "learning_rate": 9.986103551728501e-05, "loss": 0.0005, "step": 336 }, { "epoch": 0.11461026575679227, "grad_norm": 0.05794384330511093, "learning_rate": 9.985866121881321e-05, "loss": 0.0004, "step": 338 }, { "epoch": 0.11528843300979104, "grad_norm": 0.07073888182640076, "learning_rate": 9.985626683727863e-05, "loss": 0.0004, "step": 340 }, { "epoch": 0.1159666002627898, "grad_norm": 0.04688240587711334, "learning_rate": 9.985385237364573e-05, "loss": 0.0004, "step": 342 }, { "epoch": 0.11664476751578857, "grad_norm": 0.03950873389840126, "learning_rate": 9.985141782888705e-05, "loss": 0.0005, "step": 344 }, { "epoch": 0.11732293476878736, "grad_norm": 0.08916309475898743, "learning_rate": 9.984896320398324e-05, "loss": 0.0004, "step": 346 }, { "epoch": 0.11800110202178613, "grad_norm": 0.06798672676086426, "learning_rate": 9.984648849992301e-05, "loss": 0.0003, "step": 348 }, { "epoch": 0.1186792692747849, "grad_norm": 0.034365102648735046, "learning_rate": 9.984399371770318e-05, "loss": 0.0003, "step": 350 }, { "epoch": 0.11935743652778366, "grad_norm": 0.0313638299703598, "learning_rate": 9.984147885832864e-05, "loss": 0.0006, "step": 352 }, { "epoch": 0.12003560378078243, "grad_norm": 0.031012045219540596, "learning_rate": 9.983894392281237e-05, "loss": 0.0005, "step": 354 }, { "epoch": 0.12071377103378121, "grad_norm": 0.059786684811115265, "learning_rate": 9.983638891217544e-05, "loss": 0.0005, "step": 356 }, { "epoch": 0.12139193828677998, "grad_norm": 0.12950845062732697, "learning_rate": 9.983381382744703e-05, "loss": 0.0007, "step": 358 }, { "epoch": 0.12207010553977875, "grad_norm": 0.09296132624149323, "learning_rate": 9.983121866966437e-05, "loss": 0.0008, "step": 360 }, { "epoch": 0.12274827279277752, "grad_norm": 0.024494891986250877, "learning_rate": 9.982860343987277e-05, "loss": 0.0002, "step": 362 }, { "epoch": 0.12342644004577628, "grad_norm": 0.0354764498770237, "learning_rate": 9.982596813912569e-05, "loss": 0.0007, "step": 364 }, { "epoch": 0.12410460729877507, "grad_norm": 0.06026894226670265, "learning_rate": 9.982331276848458e-05, "loss": 0.0008, "step": 366 }, { "epoch": 0.12478277455177383, "grad_norm": 0.07126794010400772, "learning_rate": 9.982063732901906e-05, "loss": 0.0007, "step": 368 }, { "epoch": 0.1254609418047726, "grad_norm": 0.04826202988624573, "learning_rate": 9.981794182180677e-05, "loss": 0.0005, "step": 370 }, { "epoch": 0.12613910905777137, "grad_norm": 0.02070901356637478, "learning_rate": 9.981522624793347e-05, "loss": 0.0006, "step": 372 }, { "epoch": 0.12681727631077014, "grad_norm": 0.05529174581170082, "learning_rate": 9.9812490608493e-05, "loss": 0.0005, "step": 374 }, { "epoch": 0.1274954435637689, "grad_norm": 0.0771322250366211, "learning_rate": 9.980973490458728e-05, "loss": 0.0006, "step": 376 }, { "epoch": 0.12817361081676767, "grad_norm": 0.06000866740942001, "learning_rate": 9.98069591373263e-05, "loss": 0.0004, "step": 378 }, { "epoch": 0.12885177806976644, "grad_norm": 0.019623633474111557, "learning_rate": 9.980416330782811e-05, "loss": 0.0003, "step": 380 }, { "epoch": 0.12952994532276524, "grad_norm": 0.02568269707262516, "learning_rate": 9.980134741721891e-05, "loss": 0.0003, "step": 382 }, { "epoch": 0.130208112575764, "grad_norm": 0.02139587514102459, "learning_rate": 9.979851146663293e-05, "loss": 0.0002, "step": 384 }, { "epoch": 0.13088627982876277, "grad_norm": 0.013906003907322884, "learning_rate": 9.979565545721248e-05, "loss": 0.0002, "step": 386 }, { "epoch": 0.13156444708176154, "grad_norm": 0.09719321131706238, "learning_rate": 9.979277939010798e-05, "loss": 0.0003, "step": 388 }, { "epoch": 0.1322426143347603, "grad_norm": 0.012353319674730301, "learning_rate": 9.978988326647788e-05, "loss": 0.0001, "step": 390 }, { "epoch": 0.13292078158775908, "grad_norm": 0.09881331026554108, "learning_rate": 9.978696708748875e-05, "loss": 0.0002, "step": 392 }, { "epoch": 0.13359894884075785, "grad_norm": 0.006447671912610531, "learning_rate": 9.978403085431524e-05, "loss": 0.0005, "step": 394 }, { "epoch": 0.13427711609375662, "grad_norm": 0.02782581001520157, "learning_rate": 9.978107456814007e-05, "loss": 0.0004, "step": 396 }, { "epoch": 0.13495528334675538, "grad_norm": 0.03197741508483887, "learning_rate": 9.977809823015401e-05, "loss": 0.0003, "step": 398 }, { "epoch": 0.13563345059975415, "grad_norm": 0.121083565056324, "learning_rate": 9.977510184155591e-05, "loss": 0.0005, "step": 400 }, { "epoch": 0.13631161785275295, "grad_norm": 0.014267323538661003, "learning_rate": 9.977208540355278e-05, "loss": 0.0003, "step": 402 }, { "epoch": 0.13698978510575172, "grad_norm": 0.018701685592532158, "learning_rate": 9.976904891735958e-05, "loss": 0.0001, "step": 404 }, { "epoch": 0.13766795235875048, "grad_norm": 0.042165644466876984, "learning_rate": 9.976599238419944e-05, "loss": 0.0002, "step": 406 }, { "epoch": 0.13834611961174925, "grad_norm": 0.013971824198961258, "learning_rate": 9.97629158053035e-05, "loss": 0.0002, "step": 408 }, { "epoch": 0.13902428686474802, "grad_norm": 0.035180144011974335, "learning_rate": 9.975981918191103e-05, "loss": 0.0001, "step": 410 }, { "epoch": 0.1397024541177468, "grad_norm": 0.03857795521616936, "learning_rate": 9.975670251526936e-05, "loss": 0.0001, "step": 412 }, { "epoch": 0.14038062137074556, "grad_norm": 0.0060835485346615314, "learning_rate": 9.975356580663384e-05, "loss": 0.0, "step": 414 }, { "epoch": 0.14105878862374432, "grad_norm": 0.07395796477794647, "learning_rate": 9.975040905726798e-05, "loss": 0.0004, "step": 416 }, { "epoch": 0.1417369558767431, "grad_norm": 0.018834367394447327, "learning_rate": 9.974723226844331e-05, "loss": 0.0001, "step": 418 }, { "epoch": 0.14241512312974186, "grad_norm": 0.0059880041517317295, "learning_rate": 9.974403544143941e-05, "loss": 0.0004, "step": 420 }, { "epoch": 0.14309329038274066, "grad_norm": 0.019147314131259918, "learning_rate": 9.974081857754401e-05, "loss": 0.0001, "step": 422 }, { "epoch": 0.14377145763573942, "grad_norm": 0.0023199408315122128, "learning_rate": 9.973758167805281e-05, "loss": 0.0001, "step": 424 }, { "epoch": 0.1444496248887382, "grad_norm": 0.013452797196805477, "learning_rate": 9.973432474426967e-05, "loss": 0.0, "step": 426 }, { "epoch": 0.14512779214173696, "grad_norm": 0.06036541238427162, "learning_rate": 9.973104777750646e-05, "loss": 0.0007, "step": 428 }, { "epoch": 0.14580595939473573, "grad_norm": 0.03896476328372955, "learning_rate": 9.972775077908317e-05, "loss": 0.0003, "step": 430 }, { "epoch": 0.1464841266477345, "grad_norm": 0.017291929572820663, "learning_rate": 9.972443375032779e-05, "loss": 0.0, "step": 432 }, { "epoch": 0.14716229390073327, "grad_norm": 0.0299264844506979, "learning_rate": 9.972109669257646e-05, "loss": 0.0001, "step": 434 }, { "epoch": 0.14784046115373203, "grad_norm": 0.002126836683601141, "learning_rate": 9.971773960717332e-05, "loss": 0.0001, "step": 436 }, { "epoch": 0.1485186284067308, "grad_norm": 0.004080125596374273, "learning_rate": 9.971436249547061e-05, "loss": 0.0, "step": 438 }, { "epoch": 0.14919679565972957, "grad_norm": 0.021544527262449265, "learning_rate": 9.971096535882865e-05, "loss": 0.0, "step": 440 }, { "epoch": 0.14987496291272834, "grad_norm": 0.01083934772759676, "learning_rate": 9.970754819861577e-05, "loss": 0.0, "step": 442 }, { "epoch": 0.15055313016572713, "grad_norm": 0.005995896179229021, "learning_rate": 9.970411101620843e-05, "loss": 0.0001, "step": 444 }, { "epoch": 0.1512312974187259, "grad_norm": 0.03495006263256073, "learning_rate": 9.970065381299112e-05, "loss": 0.0002, "step": 446 }, { "epoch": 0.15190946467172467, "grad_norm": 0.012965913861989975, "learning_rate": 9.969717659035638e-05, "loss": 0.0003, "step": 448 }, { "epoch": 0.15258763192472344, "grad_norm": 0.04326948896050453, "learning_rate": 9.969367934970486e-05, "loss": 0.0001, "step": 450 }, { "epoch": 0.1532657991777222, "grad_norm": 0.03606222942471504, "learning_rate": 9.969016209244525e-05, "loss": 0.0002, "step": 452 }, { "epoch": 0.15394396643072097, "grad_norm": 0.017364950850605965, "learning_rate": 9.96866248199943e-05, "loss": 0.0001, "step": 454 }, { "epoch": 0.15462213368371974, "grad_norm": 0.029245370998978615, "learning_rate": 9.968306753377679e-05, "loss": 0.0002, "step": 456 }, { "epoch": 0.1553003009367185, "grad_norm": 0.011929360218346119, "learning_rate": 9.967949023522563e-05, "loss": 0.0001, "step": 458 }, { "epoch": 0.15597846818971728, "grad_norm": 0.02571127377450466, "learning_rate": 9.967589292578173e-05, "loss": 0.0001, "step": 460 }, { "epoch": 0.15665663544271605, "grad_norm": 0.0448257140815258, "learning_rate": 9.967227560689412e-05, "loss": 0.0002, "step": 462 }, { "epoch": 0.15733480269571484, "grad_norm": 0.014019777067005634, "learning_rate": 9.966863828001982e-05, "loss": 0.0002, "step": 464 }, { "epoch": 0.1580129699487136, "grad_norm": 0.009223355911672115, "learning_rate": 9.966498094662398e-05, "loss": 0.0001, "step": 466 }, { "epoch": 0.15869113720171238, "grad_norm": 0.010595602914690971, "learning_rate": 9.966130360817973e-05, "loss": 0.0, "step": 468 }, { "epoch": 0.15936930445471115, "grad_norm": 0.015226882882416248, "learning_rate": 9.965760626616834e-05, "loss": 0.0001, "step": 470 }, { "epoch": 0.16004747170770992, "grad_norm": 0.039237625896930695, "learning_rate": 9.96538889220791e-05, "loss": 0.0003, "step": 472 }, { "epoch": 0.16072563896070868, "grad_norm": 0.006564146839082241, "learning_rate": 9.965015157740931e-05, "loss": 0.0003, "step": 474 }, { "epoch": 0.16140380621370745, "grad_norm": 0.04217905178666115, "learning_rate": 9.964639423366442e-05, "loss": 0.0009, "step": 476 }, { "epoch": 0.16208197346670622, "grad_norm": 0.0012876184191554785, "learning_rate": 9.964261689235787e-05, "loss": 0.0, "step": 478 }, { "epoch": 0.162760140719705, "grad_norm": 0.004951702430844307, "learning_rate": 9.963881955501116e-05, "loss": 0.0001, "step": 480 }, { "epoch": 0.16343830797270376, "grad_norm": 0.02183827944099903, "learning_rate": 9.963500222315388e-05, "loss": 0.0003, "step": 482 }, { "epoch": 0.16411647522570255, "grad_norm": 0.020162342116236687, "learning_rate": 9.963116489832364e-05, "loss": 0.0004, "step": 484 }, { "epoch": 0.16479464247870132, "grad_norm": 0.002864987123757601, "learning_rate": 9.962730758206611e-05, "loss": 0.0, "step": 486 }, { "epoch": 0.1654728097317001, "grad_norm": 0.02149767428636551, "learning_rate": 9.962343027593502e-05, "loss": 0.0001, "step": 488 }, { "epoch": 0.16615097698469886, "grad_norm": 0.06681331247091293, "learning_rate": 9.961953298149215e-05, "loss": 0.0005, "step": 490 }, { "epoch": 0.16682914423769762, "grad_norm": 0.035218510776758194, "learning_rate": 9.961561570030734e-05, "loss": 0.0002, "step": 492 }, { "epoch": 0.1675073114906964, "grad_norm": 0.08965438604354858, "learning_rate": 9.961167843395845e-05, "loss": 0.0008, "step": 494 }, { "epoch": 0.16818547874369516, "grad_norm": 0.04331008344888687, "learning_rate": 9.960772118403141e-05, "loss": 0.0001, "step": 496 }, { "epoch": 0.16886364599669393, "grad_norm": 0.07946818321943283, "learning_rate": 9.960374395212022e-05, "loss": 0.0003, "step": 498 }, { "epoch": 0.1695418132496927, "grad_norm": 0.018603485077619553, "learning_rate": 9.95997467398269e-05, "loss": 0.0002, "step": 500 }, { "epoch": 0.17021998050269146, "grad_norm": 0.06275351345539093, "learning_rate": 9.959572954876151e-05, "loss": 0.0009, "step": 502 }, { "epoch": 0.17089814775569026, "grad_norm": 0.03865795210003853, "learning_rate": 9.95916923805422e-05, "loss": 0.0003, "step": 504 }, { "epoch": 0.17157631500868903, "grad_norm": 0.015474680811166763, "learning_rate": 9.958763523679514e-05, "loss": 0.0002, "step": 506 }, { "epoch": 0.1722544822616878, "grad_norm": 0.14019793272018433, "learning_rate": 9.958355811915451e-05, "loss": 0.0002, "step": 508 }, { "epoch": 0.17293264951468656, "grad_norm": 0.05070113018155098, "learning_rate": 9.957946102926263e-05, "loss": 0.0003, "step": 510 }, { "epoch": 0.17361081676768533, "grad_norm": 0.013154142536222935, "learning_rate": 9.957534396876976e-05, "loss": 0.0002, "step": 512 }, { "epoch": 0.1742889840206841, "grad_norm": 0.028667040169239044, "learning_rate": 9.957120693933428e-05, "loss": 0.0007, "step": 514 }, { "epoch": 0.17496715127368287, "grad_norm": 0.027827998623251915, "learning_rate": 9.956704994262256e-05, "loss": 0.0003, "step": 516 }, { "epoch": 0.17564531852668164, "grad_norm": 0.0170214232057333, "learning_rate": 9.956287298030905e-05, "loss": 0.0003, "step": 518 }, { "epoch": 0.1763234857796804, "grad_norm": 0.022373339161276817, "learning_rate": 9.955867605407624e-05, "loss": 0.0001, "step": 520 }, { "epoch": 0.17700165303267917, "grad_norm": 0.044036850333213806, "learning_rate": 9.955445916561463e-05, "loss": 0.0002, "step": 522 }, { "epoch": 0.17767982028567794, "grad_norm": 0.018346000462770462, "learning_rate": 9.955022231662281e-05, "loss": 0.0001, "step": 524 }, { "epoch": 0.17835798753867674, "grad_norm": 0.007154988124966621, "learning_rate": 9.954596550880735e-05, "loss": 0.0001, "step": 526 }, { "epoch": 0.1790361547916755, "grad_norm": 0.0529172420501709, "learning_rate": 9.954168874388292e-05, "loss": 0.0002, "step": 528 }, { "epoch": 0.17971432204467427, "grad_norm": 0.032745905220508575, "learning_rate": 9.953739202357218e-05, "loss": 0.0001, "step": 530 }, { "epoch": 0.18039248929767304, "grad_norm": 0.006617931183427572, "learning_rate": 9.953307534960587e-05, "loss": 0.0, "step": 532 }, { "epoch": 0.1810706565506718, "grad_norm": 0.028485778719186783, "learning_rate": 9.952873872372272e-05, "loss": 0.0001, "step": 534 }, { "epoch": 0.18174882380367058, "grad_norm": 0.005062984302639961, "learning_rate": 9.952438214766955e-05, "loss": 0.0, "step": 536 }, { "epoch": 0.18242699105666935, "grad_norm": 0.04665204510092735, "learning_rate": 9.952000562320115e-05, "loss": 0.0001, "step": 538 }, { "epoch": 0.18310515830966811, "grad_norm": 0.0015840464038774371, "learning_rate": 9.951560915208042e-05, "loss": 0.0, "step": 540 }, { "epoch": 0.18378332556266688, "grad_norm": 0.0015154321445152164, "learning_rate": 9.951119273607825e-05, "loss": 0.0, "step": 542 }, { "epoch": 0.18446149281566565, "grad_norm": 0.04031079262495041, "learning_rate": 9.950675637697355e-05, "loss": 0.0001, "step": 544 }, { "epoch": 0.18513966006866445, "grad_norm": 0.039752230048179626, "learning_rate": 9.950230007655332e-05, "loss": 0.0007, "step": 546 }, { "epoch": 0.18581782732166321, "grad_norm": 0.07987188547849655, "learning_rate": 9.949782383661254e-05, "loss": 0.0007, "step": 548 }, { "epoch": 0.18649599457466198, "grad_norm": 0.004725519567728043, "learning_rate": 9.949332765895423e-05, "loss": 0.0, "step": 550 }, { "epoch": 0.18717416182766075, "grad_norm": 0.014836054295301437, "learning_rate": 9.948881154538945e-05, "loss": 0.0001, "step": 552 }, { "epoch": 0.18785232908065952, "grad_norm": 0.0027612450066953897, "learning_rate": 9.94842754977373e-05, "loss": 0.0001, "step": 554 }, { "epoch": 0.1885304963336583, "grad_norm": 0.009966784156858921, "learning_rate": 9.94797195178249e-05, "loss": 0.0001, "step": 556 }, { "epoch": 0.18920866358665706, "grad_norm": 0.02951398678123951, "learning_rate": 9.947514360748741e-05, "loss": 0.0001, "step": 558 }, { "epoch": 0.18988683083965582, "grad_norm": 0.02646208181977272, "learning_rate": 9.947054776856798e-05, "loss": 0.0002, "step": 560 }, { "epoch": 0.1905649980926546, "grad_norm": 0.0137828653678298, "learning_rate": 9.946593200291783e-05, "loss": 0.0, "step": 562 }, { "epoch": 0.19124316534565336, "grad_norm": 0.06634095311164856, "learning_rate": 9.946129631239618e-05, "loss": 0.0002, "step": 564 }, { "epoch": 0.19192133259865216, "grad_norm": 0.03192392364144325, "learning_rate": 9.945664069887028e-05, "loss": 0.0001, "step": 566 }, { "epoch": 0.19259949985165092, "grad_norm": 0.009010384790599346, "learning_rate": 9.945196516421543e-05, "loss": 0.0, "step": 568 }, { "epoch": 0.1932776671046497, "grad_norm": 0.03663842752575874, "learning_rate": 9.944726971031493e-05, "loss": 0.0002, "step": 570 }, { "epoch": 0.19395583435764846, "grad_norm": 0.12181317061185837, "learning_rate": 9.94425543390601e-05, "loss": 0.0002, "step": 572 }, { "epoch": 0.19463400161064723, "grad_norm": 0.01086985133588314, "learning_rate": 9.94378190523503e-05, "loss": 0.0, "step": 574 }, { "epoch": 0.195312168863646, "grad_norm": 0.030524946749210358, "learning_rate": 9.943306385209289e-05, "loss": 0.0, "step": 576 }, { "epoch": 0.19599033611664476, "grad_norm": 0.007708962541073561, "learning_rate": 9.942828874020327e-05, "loss": 0.0, "step": 578 }, { "epoch": 0.19666850336964353, "grad_norm": 0.007101174909621477, "learning_rate": 9.942349371860487e-05, "loss": 0.0001, "step": 580 }, { "epoch": 0.1973466706226423, "grad_norm": 0.0024575525894761086, "learning_rate": 9.94186787892291e-05, "loss": 0.0001, "step": 582 }, { "epoch": 0.19802483787564107, "grad_norm": 0.008838510140776634, "learning_rate": 9.941384395401543e-05, "loss": 0.0, "step": 584 }, { "epoch": 0.19870300512863986, "grad_norm": 0.003978129476308823, "learning_rate": 9.940898921491131e-05, "loss": 0.0007, "step": 586 }, { "epoch": 0.19938117238163863, "grad_norm": 0.026870913803577423, "learning_rate": 9.940411457387227e-05, "loss": 0.0002, "step": 588 }, { "epoch": 0.2000593396346374, "grad_norm": 0.009151192381978035, "learning_rate": 9.939922003286176e-05, "loss": 0.0, "step": 590 }, { "epoch": 0.20073750688763617, "grad_norm": 0.051844075322151184, "learning_rate": 9.939430559385135e-05, "loss": 0.0002, "step": 592 }, { "epoch": 0.20141567414063494, "grad_norm": 0.06585478037595749, "learning_rate": 9.938937125882053e-05, "loss": 0.0002, "step": 594 }, { "epoch": 0.2020938413936337, "grad_norm": 0.05280110239982605, "learning_rate": 9.938441702975689e-05, "loss": 0.0002, "step": 596 }, { "epoch": 0.20277200864663247, "grad_norm": 0.03558439016342163, "learning_rate": 9.937944290865597e-05, "loss": 0.0001, "step": 598 }, { "epoch": 0.20345017589963124, "grad_norm": 0.005566301755607128, "learning_rate": 9.937444889752137e-05, "loss": 0.0, "step": 600 }, { "epoch": 0.20412834315263, "grad_norm": 0.004138810560107231, "learning_rate": 9.936943499836463e-05, "loss": 0.0, "step": 602 }, { "epoch": 0.20480651040562878, "grad_norm": 0.00421471381559968, "learning_rate": 9.936440121320539e-05, "loss": 0.0, "step": 604 }, { "epoch": 0.20548467765862755, "grad_norm": 0.0489053912460804, "learning_rate": 9.935934754407125e-05, "loss": 0.0002, "step": 606 }, { "epoch": 0.20616284491162634, "grad_norm": 0.050261691212654114, "learning_rate": 9.935427399299782e-05, "loss": 0.0003, "step": 608 }, { "epoch": 0.2068410121646251, "grad_norm": 0.01245457585901022, "learning_rate": 9.934918056202871e-05, "loss": 0.0001, "step": 610 }, { "epoch": 0.20751917941762388, "grad_norm": 0.031247129663825035, "learning_rate": 9.934406725321559e-05, "loss": 0.0002, "step": 612 }, { "epoch": 0.20819734667062265, "grad_norm": 0.06600417196750641, "learning_rate": 9.933893406861807e-05, "loss": 0.0002, "step": 614 }, { "epoch": 0.20887551392362141, "grad_norm": 0.023129012435674667, "learning_rate": 9.933378101030381e-05, "loss": 0.0007, "step": 616 }, { "epoch": 0.20955368117662018, "grad_norm": 0.018992137163877487, "learning_rate": 9.932860808034848e-05, "loss": 0.0001, "step": 618 }, { "epoch": 0.21023184842961895, "grad_norm": 0.01329959649592638, "learning_rate": 9.932341528083569e-05, "loss": 0.0, "step": 620 }, { "epoch": 0.21091001568261772, "grad_norm": 0.07119770348072052, "learning_rate": 9.931820261385714e-05, "loss": 0.0005, "step": 622 }, { "epoch": 0.2115881829356165, "grad_norm": 0.08495577424764633, "learning_rate": 9.931297008151246e-05, "loss": 0.0001, "step": 624 }, { "epoch": 0.21226635018861525, "grad_norm": 0.0634210929274559, "learning_rate": 9.930771768590933e-05, "loss": 0.0001, "step": 626 }, { "epoch": 0.21294451744161405, "grad_norm": 0.10992208123207092, "learning_rate": 9.930244542916342e-05, "loss": 0.0006, "step": 628 }, { "epoch": 0.21362268469461282, "grad_norm": 0.058085739612579346, "learning_rate": 9.929715331339839e-05, "loss": 0.0003, "step": 630 }, { "epoch": 0.2143008519476116, "grad_norm": 0.0160099845379591, "learning_rate": 9.92918413407459e-05, "loss": 0.0003, "step": 632 }, { "epoch": 0.21497901920061036, "grad_norm": 0.46785369515419006, "learning_rate": 9.928650951334559e-05, "loss": 0.0008, "step": 634 }, { "epoch": 0.21565718645360912, "grad_norm": 0.059186507016420364, "learning_rate": 9.928115783334518e-05, "loss": 0.0003, "step": 636 }, { "epoch": 0.2163353537066079, "grad_norm": 0.07586536556482315, "learning_rate": 9.927578630290025e-05, "loss": 0.0016, "step": 638 }, { "epoch": 0.21701352095960666, "grad_norm": 0.10785187780857086, "learning_rate": 9.927039492417452e-05, "loss": 0.0014, "step": 640 }, { "epoch": 0.21769168821260543, "grad_norm": 0.11385278403759003, "learning_rate": 9.926498369933959e-05, "loss": 0.0014, "step": 642 }, { "epoch": 0.2183698554656042, "grad_norm": 0.08722253888845444, "learning_rate": 9.925955263057511e-05, "loss": 0.0012, "step": 644 }, { "epoch": 0.21904802271860296, "grad_norm": 0.09422720223665237, "learning_rate": 9.925410172006874e-05, "loss": 0.0012, "step": 646 }, { "epoch": 0.21972618997160176, "grad_norm": 0.08135107904672623, "learning_rate": 9.924863097001607e-05, "loss": 0.0009, "step": 648 }, { "epoch": 0.22040435722460053, "grad_norm": 0.06382672488689423, "learning_rate": 9.924314038262074e-05, "loss": 0.0007, "step": 650 }, { "epoch": 0.2210825244775993, "grad_norm": 0.04187219962477684, "learning_rate": 9.923762996009438e-05, "loss": 0.0004, "step": 652 }, { "epoch": 0.22176069173059806, "grad_norm": 0.11694247275590897, "learning_rate": 9.923209970465652e-05, "loss": 0.0006, "step": 654 }, { "epoch": 0.22243885898359683, "grad_norm": 0.15844054520130157, "learning_rate": 9.922654961853481e-05, "loss": 0.0007, "step": 656 }, { "epoch": 0.2231170262365956, "grad_norm": 0.055115025490522385, "learning_rate": 9.92209797039648e-05, "loss": 0.0005, "step": 658 }, { "epoch": 0.22379519348959437, "grad_norm": 0.05294685438275337, "learning_rate": 9.921538996319004e-05, "loss": 0.0005, "step": 660 }, { "epoch": 0.22447336074259314, "grad_norm": 0.05556558817625046, "learning_rate": 9.92097803984621e-05, "loss": 0.0005, "step": 662 }, { "epoch": 0.2251515279955919, "grad_norm": 0.02577158622443676, "learning_rate": 9.92041510120405e-05, "loss": 0.0005, "step": 664 }, { "epoch": 0.22582969524859067, "grad_norm": 0.06684298813343048, "learning_rate": 9.919850180619274e-05, "loss": 0.0005, "step": 666 }, { "epoch": 0.22650786250158944, "grad_norm": 0.08713594824075699, "learning_rate": 9.919283278319436e-05, "loss": 0.0011, "step": 668 }, { "epoch": 0.22718602975458824, "grad_norm": 0.06460582464933395, "learning_rate": 9.91871439453288e-05, "loss": 0.0009, "step": 670 }, { "epoch": 0.227864197007587, "grad_norm": 0.06616802513599396, "learning_rate": 9.918143529488755e-05, "loss": 0.0007, "step": 672 }, { "epoch": 0.22854236426058577, "grad_norm": 0.026611236855387688, "learning_rate": 9.917570683417004e-05, "loss": 0.0011, "step": 674 }, { "epoch": 0.22922053151358454, "grad_norm": 0.07026272267103195, "learning_rate": 9.91699585654837e-05, "loss": 0.0006, "step": 676 }, { "epoch": 0.2298986987665833, "grad_norm": 0.11559715121984482, "learning_rate": 9.916419049114392e-05, "loss": 0.0009, "step": 678 }, { "epoch": 0.23057686601958208, "grad_norm": 0.0518777072429657, "learning_rate": 9.915840261347411e-05, "loss": 0.0003, "step": 680 }, { "epoch": 0.23125503327258085, "grad_norm": 0.060435306280851364, "learning_rate": 9.915259493480559e-05, "loss": 0.0007, "step": 682 }, { "epoch": 0.2319332005255796, "grad_norm": 0.038414761424064636, "learning_rate": 9.914676745747772e-05, "loss": 0.0004, "step": 684 }, { "epoch": 0.23261136777857838, "grad_norm": 0.031407903879880905, "learning_rate": 9.914092018383778e-05, "loss": 0.0004, "step": 686 }, { "epoch": 0.23328953503157715, "grad_norm": 0.023152906447649002, "learning_rate": 9.913505311624108e-05, "loss": 0.0005, "step": 688 }, { "epoch": 0.23396770228457595, "grad_norm": 0.0736289694905281, "learning_rate": 9.912916625705087e-05, "loss": 0.0006, "step": 690 }, { "epoch": 0.23464586953757471, "grad_norm": 0.03230787813663483, "learning_rate": 9.912325960863834e-05, "loss": 0.0002, "step": 692 }, { "epoch": 0.23532403679057348, "grad_norm": 0.06092187389731407, "learning_rate": 9.911733317338272e-05, "loss": 0.0003, "step": 694 }, { "epoch": 0.23600220404357225, "grad_norm": 0.02147577330470085, "learning_rate": 9.911138695367119e-05, "loss": 0.0003, "step": 696 }, { "epoch": 0.23668037129657102, "grad_norm": 0.10465848445892334, "learning_rate": 9.910542095189885e-05, "loss": 0.0011, "step": 698 }, { "epoch": 0.2373585385495698, "grad_norm": 0.051002729684114456, "learning_rate": 9.909943517046884e-05, "loss": 0.0003, "step": 700 }, { "epoch": 0.23803670580256855, "grad_norm": 0.04811201989650726, "learning_rate": 9.90934296117922e-05, "loss": 0.0003, "step": 702 }, { "epoch": 0.23871487305556732, "grad_norm": 0.024624748155474663, "learning_rate": 9.908740427828799e-05, "loss": 0.0004, "step": 704 }, { "epoch": 0.2393930403085661, "grad_norm": 0.05176735296845436, "learning_rate": 9.908135917238321e-05, "loss": 0.0002, "step": 706 }, { "epoch": 0.24007120756156486, "grad_norm": 0.04927726835012436, "learning_rate": 9.907529429651281e-05, "loss": 0.0009, "step": 708 }, { "epoch": 0.24074937481456365, "grad_norm": 0.031045028939843178, "learning_rate": 9.906920965311974e-05, "loss": 0.0001, "step": 710 }, { "epoch": 0.24142754206756242, "grad_norm": 0.10256350040435791, "learning_rate": 9.90631052446549e-05, "loss": 0.0006, "step": 712 }, { "epoch": 0.2421057093205612, "grad_norm": 0.03954285383224487, "learning_rate": 9.905698107357714e-05, "loss": 0.0006, "step": 714 }, { "epoch": 0.24278387657355996, "grad_norm": 0.05060918629169464, "learning_rate": 9.905083714235326e-05, "loss": 0.0007, "step": 716 }, { "epoch": 0.24346204382655873, "grad_norm": 0.019770212471485138, "learning_rate": 9.904467345345805e-05, "loss": 0.0008, "step": 718 }, { "epoch": 0.2441402110795575, "grad_norm": 0.0872698500752449, "learning_rate": 9.903849000937423e-05, "loss": 0.0006, "step": 720 }, { "epoch": 0.24481837833255626, "grad_norm": 0.08249643445014954, "learning_rate": 9.90322868125925e-05, "loss": 0.0006, "step": 722 }, { "epoch": 0.24549654558555503, "grad_norm": 0.1213611587882042, "learning_rate": 9.902606386561151e-05, "loss": 0.0009, "step": 724 }, { "epoch": 0.2461747128385538, "grad_norm": 0.12369897216558456, "learning_rate": 9.901982117093786e-05, "loss": 0.0009, "step": 726 }, { "epoch": 0.24685288009155257, "grad_norm": 0.04433707520365715, "learning_rate": 9.901355873108609e-05, "loss": 0.0003, "step": 728 }, { "epoch": 0.24753104734455136, "grad_norm": 0.03901137784123421, "learning_rate": 9.900727654857874e-05, "loss": 0.0005, "step": 730 }, { "epoch": 0.24820921459755013, "grad_norm": 0.060861486941576004, "learning_rate": 9.900097462594625e-05, "loss": 0.0008, "step": 732 }, { "epoch": 0.2488873818505489, "grad_norm": 0.0498674139380455, "learning_rate": 9.899465296572706e-05, "loss": 0.0005, "step": 734 }, { "epoch": 0.24956554910354767, "grad_norm": 0.10359224677085876, "learning_rate": 9.898831157046749e-05, "loss": 0.0006, "step": 736 }, { "epoch": 0.2502437163565464, "grad_norm": 0.06794043630361557, "learning_rate": 9.898195044272189e-05, "loss": 0.0005, "step": 738 }, { "epoch": 0.2509218836095452, "grad_norm": 0.02869260124862194, "learning_rate": 9.89755695850525e-05, "loss": 0.0002, "step": 740 }, { "epoch": 0.251600050862544, "grad_norm": 0.04206472635269165, "learning_rate": 9.896916900002954e-05, "loss": 0.0005, "step": 742 }, { "epoch": 0.25227821811554274, "grad_norm": 0.12347942590713501, "learning_rate": 9.896274869023117e-05, "loss": 0.0003, "step": 744 }, { "epoch": 0.25295638536854154, "grad_norm": 0.01147325336933136, "learning_rate": 9.895630865824347e-05, "loss": 0.0001, "step": 746 }, { "epoch": 0.2536345526215403, "grad_norm": 0.10761839151382446, "learning_rate": 9.89498489066605e-05, "loss": 0.0002, "step": 748 }, { "epoch": 0.2543127198745391, "grad_norm": 0.06379670649766922, "learning_rate": 9.894336943808426e-05, "loss": 0.0005, "step": 750 }, { "epoch": 0.2549908871275378, "grad_norm": 0.19594375789165497, "learning_rate": 9.893687025512465e-05, "loss": 0.001, "step": 752 }, { "epoch": 0.2556690543805366, "grad_norm": 0.04399771988391876, "learning_rate": 9.893035136039956e-05, "loss": 0.0002, "step": 754 }, { "epoch": 0.25634722163353535, "grad_norm": 0.09866727888584137, "learning_rate": 9.892381275653478e-05, "loss": 0.0006, "step": 756 }, { "epoch": 0.25702538888653415, "grad_norm": 0.03635840862989426, "learning_rate": 9.891725444616409e-05, "loss": 0.0011, "step": 758 }, { "epoch": 0.2577035561395329, "grad_norm": 0.026622148230671883, "learning_rate": 9.891067643192915e-05, "loss": 0.0003, "step": 760 }, { "epoch": 0.2583817233925317, "grad_norm": 0.047546133399009705, "learning_rate": 9.89040787164796e-05, "loss": 0.0004, "step": 762 }, { "epoch": 0.2590598906455305, "grad_norm": 0.17135459184646606, "learning_rate": 9.889746130247297e-05, "loss": 0.0005, "step": 764 }, { "epoch": 0.2597380578985292, "grad_norm": 0.05973830074071884, "learning_rate": 9.889082419257481e-05, "loss": 0.0003, "step": 766 }, { "epoch": 0.260416225151528, "grad_norm": 0.08419614285230637, "learning_rate": 9.888416738945848e-05, "loss": 0.0009, "step": 768 }, { "epoch": 0.26109439240452675, "grad_norm": 0.08141511678695679, "learning_rate": 9.887749089580538e-05, "loss": 0.0002, "step": 770 }, { "epoch": 0.26177255965752555, "grad_norm": 0.40493646264076233, "learning_rate": 9.88707947143048e-05, "loss": 0.0007, "step": 772 }, { "epoch": 0.2624507269105243, "grad_norm": 0.24903036653995514, "learning_rate": 9.886407884765395e-05, "loss": 0.0005, "step": 774 }, { "epoch": 0.2631288941635231, "grad_norm": 0.7945914268493652, "learning_rate": 9.885734329855798e-05, "loss": 0.0008, "step": 776 }, { "epoch": 0.2638070614165218, "grad_norm": 5.618687152862549, "learning_rate": 9.885058806972997e-05, "loss": 0.0147, "step": 778 }, { "epoch": 0.2644852286695206, "grad_norm": 0.3053636848926544, "learning_rate": 9.884381316389093e-05, "loss": 0.0051, "step": 780 }, { "epoch": 0.2651633959225194, "grad_norm": 0.8200685977935791, "learning_rate": 9.883701858376979e-05, "loss": 0.0078, "step": 782 }, { "epoch": 0.26584156317551816, "grad_norm": 0.3827979564666748, "learning_rate": 9.883020433210342e-05, "loss": 0.0048, "step": 784 }, { "epoch": 0.26651973042851695, "grad_norm": 49.57893753051758, "learning_rate": 9.882337041163656e-05, "loss": 0.3228, "step": 786 }, { "epoch": 0.2671978976815157, "grad_norm": 82.7600326538086, "learning_rate": 9.881651682512195e-05, "loss": 1.323, "step": 788 }, { "epoch": 0.2678760649345145, "grad_norm": 13.147876739501953, "learning_rate": 9.88096435753202e-05, "loss": 0.2319, "step": 790 }, { "epoch": 0.26855423218751323, "grad_norm": 4.195157527923584, "learning_rate": 9.880275066499985e-05, "loss": 0.1171, "step": 792 }, { "epoch": 0.269232399440512, "grad_norm": 0.9070939421653748, "learning_rate": 9.879583809693738e-05, "loss": 0.0663, "step": 794 }, { "epoch": 0.26991056669351077, "grad_norm": 0.3190385401248932, "learning_rate": 9.878890587391714e-05, "loss": 0.0182, "step": 796 }, { "epoch": 0.27058873394650956, "grad_norm": 0.15086790919303894, "learning_rate": 9.878195399873147e-05, "loss": 0.006, "step": 798 }, { "epoch": 0.2712669011995083, "grad_norm": 0.148417666554451, "learning_rate": 9.877498247418056e-05, "loss": 0.0021, "step": 800 }, { "epoch": 0.2719450684525071, "grad_norm": 0.1928234100341797, "learning_rate": 9.876799130307253e-05, "loss": 0.0024, "step": 802 }, { "epoch": 0.2726232357055059, "grad_norm": 0.4669875204563141, "learning_rate": 9.876098048822343e-05, "loss": 0.0257, "step": 804 }, { "epoch": 0.27330140295850464, "grad_norm": 1.2202422618865967, "learning_rate": 9.875395003245724e-05, "loss": 0.0405, "step": 806 }, { "epoch": 0.27397957021150343, "grad_norm": 0.32797545194625854, "learning_rate": 9.874689993860579e-05, "loss": 0.0319, "step": 808 }, { "epoch": 0.27465773746450217, "grad_norm": 0.9610121846199036, "learning_rate": 9.873983020950888e-05, "loss": 0.0205, "step": 810 }, { "epoch": 0.27533590471750097, "grad_norm": 0.3528335690498352, "learning_rate": 9.87327408480142e-05, "loss": 0.0073, "step": 812 }, { "epoch": 0.2760140719704997, "grad_norm": 0.41824105381965637, "learning_rate": 9.872563185697732e-05, "loss": 0.0054, "step": 814 }, { "epoch": 0.2766922392234985, "grad_norm": 0.2852812111377716, "learning_rate": 9.871850323926177e-05, "loss": 0.004, "step": 816 }, { "epoch": 0.27737040647649724, "grad_norm": 1.4140238761901855, "learning_rate": 9.871135499773894e-05, "loss": 0.0105, "step": 818 }, { "epoch": 0.27804857372949604, "grad_norm": 0.8318770527839661, "learning_rate": 9.870418713528812e-05, "loss": 0.006, "step": 820 }, { "epoch": 0.2787267409824948, "grad_norm": 0.7967255711555481, "learning_rate": 9.869699965479657e-05, "loss": 0.0044, "step": 822 }, { "epoch": 0.2794049082354936, "grad_norm": 0.5289302468299866, "learning_rate": 9.868979255915938e-05, "loss": 0.0131, "step": 824 }, { "epoch": 0.2800830754884924, "grad_norm": 3.1792924404144287, "learning_rate": 9.868256585127956e-05, "loss": 0.0473, "step": 826 }, { "epoch": 0.2807612427414911, "grad_norm": 0.2872530221939087, "learning_rate": 9.867531953406803e-05, "loss": 0.0073, "step": 828 }, { "epoch": 0.2814394099944899, "grad_norm": 0.14570827782154083, "learning_rate": 9.866805361044362e-05, "loss": 0.01, "step": 830 }, { "epoch": 0.28211757724748865, "grad_norm": 0.2351459115743637, "learning_rate": 9.866076808333304e-05, "loss": 0.0053, "step": 832 }, { "epoch": 0.28279574450048744, "grad_norm": 0.14194971323013306, "learning_rate": 9.86534629556709e-05, "loss": 0.006, "step": 834 }, { "epoch": 0.2834739117534862, "grad_norm": 0.21115270256996155, "learning_rate": 9.864613823039969e-05, "loss": 0.0041, "step": 836 }, { "epoch": 0.284152079006485, "grad_norm": 0.17921510338783264, "learning_rate": 9.863879391046984e-05, "loss": 0.0044, "step": 838 }, { "epoch": 0.2848302462594837, "grad_norm": 0.1330479234457016, "learning_rate": 9.863142999883959e-05, "loss": 0.003, "step": 840 }, { "epoch": 0.2855084135124825, "grad_norm": 0.09766431152820587, "learning_rate": 9.862404649847516e-05, "loss": 0.0021, "step": 842 }, { "epoch": 0.2861865807654813, "grad_norm": 0.12152712047100067, "learning_rate": 9.861664341235063e-05, "loss": 0.0042, "step": 844 }, { "epoch": 0.28686474801848005, "grad_norm": 0.1460103988647461, "learning_rate": 9.860922074344794e-05, "loss": 0.0022, "step": 846 }, { "epoch": 0.28754291527147885, "grad_norm": 0.16154733300209045, "learning_rate": 9.860177849475694e-05, "loss": 0.0029, "step": 848 }, { "epoch": 0.2882210825244776, "grad_norm": 0.08406810462474823, "learning_rate": 9.85943166692754e-05, "loss": 0.0012, "step": 850 }, { "epoch": 0.2888992497774764, "grad_norm": 0.15870791673660278, "learning_rate": 9.858683527000887e-05, "loss": 0.0018, "step": 852 }, { "epoch": 0.2895774170304751, "grad_norm": 0.07115360349416733, "learning_rate": 9.857933429997094e-05, "loss": 0.0011, "step": 854 }, { "epoch": 0.2902555842834739, "grad_norm": 0.052113912999629974, "learning_rate": 9.857181376218295e-05, "loss": 0.0008, "step": 856 }, { "epoch": 0.29093375153647266, "grad_norm": 0.059886470437049866, "learning_rate": 9.856427365967419e-05, "loss": 0.0009, "step": 858 }, { "epoch": 0.29161191878947146, "grad_norm": 0.0380706861615181, "learning_rate": 9.855671399548181e-05, "loss": 0.0009, "step": 860 }, { "epoch": 0.2922900860424702, "grad_norm": 0.017071694135665894, "learning_rate": 9.854913477265084e-05, "loss": 0.0005, "step": 862 }, { "epoch": 0.292968253295469, "grad_norm": 0.09737870842218399, "learning_rate": 9.854153599423418e-05, "loss": 0.0006, "step": 864 }, { "epoch": 0.2936464205484678, "grad_norm": 0.07049310207366943, "learning_rate": 9.853391766329263e-05, "loss": 0.0008, "step": 866 }, { "epoch": 0.29432458780146653, "grad_norm": 0.07267790287733078, "learning_rate": 9.852627978289486e-05, "loss": 0.0009, "step": 868 }, { "epoch": 0.2950027550544653, "grad_norm": 0.03373875841498375, "learning_rate": 9.851862235611737e-05, "loss": 0.0006, "step": 870 }, { "epoch": 0.29568092230746407, "grad_norm": 0.11455586552619934, "learning_rate": 9.851094538604461e-05, "loss": 0.0008, "step": 872 }, { "epoch": 0.29635908956046286, "grad_norm": 0.028519853949546814, "learning_rate": 9.850324887576887e-05, "loss": 0.0002, "step": 874 }, { "epoch": 0.2970372568134616, "grad_norm": 0.07312014698982239, "learning_rate": 9.849553282839025e-05, "loss": 0.0013, "step": 876 }, { "epoch": 0.2977154240664604, "grad_norm": 0.1544421762228012, "learning_rate": 9.848779724701684e-05, "loss": 0.0008, "step": 878 }, { "epoch": 0.29839359131945914, "grad_norm": 0.049141231924295425, "learning_rate": 9.848004213476449e-05, "loss": 0.0006, "step": 880 }, { "epoch": 0.29907175857245794, "grad_norm": 0.08961957693099976, "learning_rate": 9.847226749475695e-05, "loss": 0.0006, "step": 882 }, { "epoch": 0.2997499258254567, "grad_norm": 0.05696597695350647, "learning_rate": 9.846447333012588e-05, "loss": 0.0005, "step": 884 }, { "epoch": 0.30042809307845547, "grad_norm": 0.08603312820196152, "learning_rate": 9.845665964401071e-05, "loss": 0.0012, "step": 886 }, { "epoch": 0.30110626033145427, "grad_norm": 0.02400689572095871, "learning_rate": 9.844882643955887e-05, "loss": 0.0003, "step": 888 }, { "epoch": 0.301784427584453, "grad_norm": 0.028485383838415146, "learning_rate": 9.844097371992552e-05, "loss": 0.0003, "step": 890 }, { "epoch": 0.3024625948374518, "grad_norm": 0.042584579437971115, "learning_rate": 9.843310148827374e-05, "loss": 0.0004, "step": 892 }, { "epoch": 0.30314076209045054, "grad_norm": 0.051422711461782455, "learning_rate": 9.842520974777448e-05, "loss": 0.0003, "step": 894 }, { "epoch": 0.30381892934344934, "grad_norm": 0.04666488617658615, "learning_rate": 9.841729850160652e-05, "loss": 0.0006, "step": 896 }, { "epoch": 0.3044970965964481, "grad_norm": 0.030913488939404488, "learning_rate": 9.840936775295653e-05, "loss": 0.0005, "step": 898 }, { "epoch": 0.3051752638494469, "grad_norm": 0.07295496016740799, "learning_rate": 9.840141750501898e-05, "loss": 0.0003, "step": 900 }, { "epoch": 0.3058534311024456, "grad_norm": 0.04738324508070946, "learning_rate": 9.839344776099625e-05, "loss": 0.0006, "step": 902 }, { "epoch": 0.3065315983554444, "grad_norm": 0.021162323653697968, "learning_rate": 9.838545852409857e-05, "loss": 0.0002, "step": 904 }, { "epoch": 0.3072097656084432, "grad_norm": 0.013712971471250057, "learning_rate": 9.837744979754396e-05, "loss": 0.0001, "step": 906 }, { "epoch": 0.30788793286144195, "grad_norm": 0.04329269379377365, "learning_rate": 9.836942158455837e-05, "loss": 0.0004, "step": 908 }, { "epoch": 0.30856610011444074, "grad_norm": 0.011373177170753479, "learning_rate": 9.836137388837554e-05, "loss": 0.0002, "step": 910 }, { "epoch": 0.3092442673674395, "grad_norm": 0.1146940067410469, "learning_rate": 9.83533067122371e-05, "loss": 0.001, "step": 912 }, { "epoch": 0.3099224346204383, "grad_norm": 0.01689288392663002, "learning_rate": 9.834522005939248e-05, "loss": 0.0002, "step": 914 }, { "epoch": 0.310600601873437, "grad_norm": 0.03612351417541504, "learning_rate": 9.833711393309903e-05, "loss": 0.0004, "step": 916 }, { "epoch": 0.3112787691264358, "grad_norm": 0.05595879629254341, "learning_rate": 9.832898833662185e-05, "loss": 0.0002, "step": 918 }, { "epoch": 0.31195693637943456, "grad_norm": 0.03937789425253868, "learning_rate": 9.832084327323394e-05, "loss": 0.0002, "step": 920 }, { "epoch": 0.31263510363243335, "grad_norm": 0.00396231422200799, "learning_rate": 9.831267874621616e-05, "loss": 0.0, "step": 922 }, { "epoch": 0.3133132708854321, "grad_norm": 0.02203402668237686, "learning_rate": 9.830449475885715e-05, "loss": 0.0002, "step": 924 }, { "epoch": 0.3139914381384309, "grad_norm": 0.016906969249248505, "learning_rate": 9.829629131445342e-05, "loss": 0.0001, "step": 926 }, { "epoch": 0.3146696053914297, "grad_norm": 0.028574733063578606, "learning_rate": 9.828806841630933e-05, "loss": 0.0004, "step": 928 }, { "epoch": 0.3153477726444284, "grad_norm": 0.03310710936784744, "learning_rate": 9.827982606773705e-05, "loss": 0.0002, "step": 930 }, { "epoch": 0.3160259398974272, "grad_norm": 0.04199539124965668, "learning_rate": 9.827156427205662e-05, "loss": 0.0002, "step": 932 }, { "epoch": 0.31670410715042596, "grad_norm": 0.04112956300377846, "learning_rate": 9.826328303259586e-05, "loss": 0.0002, "step": 934 }, { "epoch": 0.31738227440342476, "grad_norm": 0.04984778165817261, "learning_rate": 9.825498235269046e-05, "loss": 0.0001, "step": 936 }, { "epoch": 0.3180604416564235, "grad_norm": 0.079465851187706, "learning_rate": 9.824666223568395e-05, "loss": 0.0004, "step": 938 }, { "epoch": 0.3187386089094223, "grad_norm": 0.007913575507700443, "learning_rate": 9.823832268492768e-05, "loss": 0.0001, "step": 940 }, { "epoch": 0.31941677616242103, "grad_norm": 0.027005091309547424, "learning_rate": 9.822996370378077e-05, "loss": 0.0002, "step": 942 }, { "epoch": 0.32009494341541983, "grad_norm": 0.03642921522259712, "learning_rate": 9.822158529561026e-05, "loss": 0.0002, "step": 944 }, { "epoch": 0.3207731106684186, "grad_norm": 0.006631182506680489, "learning_rate": 9.821318746379095e-05, "loss": 0.0004, "step": 946 }, { "epoch": 0.32145127792141737, "grad_norm": 0.028765078634023666, "learning_rate": 9.820477021170551e-05, "loss": 0.0002, "step": 948 }, { "epoch": 0.32212944517441616, "grad_norm": 0.03270328417420387, "learning_rate": 9.819633354274439e-05, "loss": 0.0002, "step": 950 }, { "epoch": 0.3228076124274149, "grad_norm": 0.00990514736622572, "learning_rate": 9.818787746030587e-05, "loss": 0.0001, "step": 952 }, { "epoch": 0.3234857796804137, "grad_norm": 0.028807222843170166, "learning_rate": 9.81794019677961e-05, "loss": 0.0001, "step": 954 }, { "epoch": 0.32416394693341244, "grad_norm": 0.02939584106206894, "learning_rate": 9.817090706862895e-05, "loss": 0.0003, "step": 956 }, { "epoch": 0.32484211418641123, "grad_norm": 0.01710331439971924, "learning_rate": 9.816239276622621e-05, "loss": 0.0001, "step": 958 }, { "epoch": 0.32552028143941, "grad_norm": 0.013261375017464161, "learning_rate": 9.815385906401741e-05, "loss": 0.0, "step": 960 }, { "epoch": 0.32619844869240877, "grad_norm": 0.025372454896569252, "learning_rate": 9.814530596543994e-05, "loss": 0.0, "step": 962 }, { "epoch": 0.3268766159454075, "grad_norm": 0.005917869508266449, "learning_rate": 9.813673347393899e-05, "loss": 0.0, "step": 964 }, { "epoch": 0.3275547831984063, "grad_norm": 0.006878651212900877, "learning_rate": 9.812814159296757e-05, "loss": 0.0, "step": 966 }, { "epoch": 0.3282329504514051, "grad_norm": 0.04819082096219063, "learning_rate": 9.811953032598644e-05, "loss": 0.0001, "step": 968 }, { "epoch": 0.32891111770440384, "grad_norm": 0.019986020401120186, "learning_rate": 9.811089967646428e-05, "loss": 0.0003, "step": 970 }, { "epoch": 0.32958928495740264, "grad_norm": 0.005363111849874258, "learning_rate": 9.810224964787746e-05, "loss": 0.0, "step": 972 }, { "epoch": 0.3302674522104014, "grad_norm": 0.023500891402363777, "learning_rate": 9.809358024371024e-05, "loss": 0.0, "step": 974 }, { "epoch": 0.3309456194634002, "grad_norm": 0.006279476452618837, "learning_rate": 9.808489146745466e-05, "loss": 0.0, "step": 976 }, { "epoch": 0.3316237867163989, "grad_norm": 0.007938874885439873, "learning_rate": 9.807618332261052e-05, "loss": 0.0, "step": 978 }, { "epoch": 0.3323019539693977, "grad_norm": 0.0037944272626191378, "learning_rate": 9.806745581268552e-05, "loss": 0.0, "step": 980 }, { "epoch": 0.33298012122239645, "grad_norm": 0.031090207397937775, "learning_rate": 9.805870894119506e-05, "loss": 0.0002, "step": 982 }, { "epoch": 0.33365828847539525, "grad_norm": 0.04043909162282944, "learning_rate": 9.804994271166236e-05, "loss": 0.0001, "step": 984 }, { "epoch": 0.334336455728394, "grad_norm": 0.00045781582593917847, "learning_rate": 9.804115712761851e-05, "loss": 0.0, "step": 986 }, { "epoch": 0.3350146229813928, "grad_norm": 0.011888192035257816, "learning_rate": 9.803235219260231e-05, "loss": 0.0, "step": 988 }, { "epoch": 0.3356927902343916, "grad_norm": 0.021957555785775185, "learning_rate": 9.802352791016039e-05, "loss": 0.0, "step": 990 }, { "epoch": 0.3363709574873903, "grad_norm": 0.022019004449248314, "learning_rate": 9.801468428384716e-05, "loss": 0.0004, "step": 992 }, { "epoch": 0.3370491247403891, "grad_norm": 0.001489504473283887, "learning_rate": 9.800582131722485e-05, "loss": 0.0001, "step": 994 }, { "epoch": 0.33772729199338786, "grad_norm": 0.002710692584514618, "learning_rate": 9.799693901386345e-05, "loss": 0.0001, "step": 996 }, { "epoch": 0.33840545924638665, "grad_norm": 0.0030198118183761835, "learning_rate": 9.798803737734076e-05, "loss": 0.0001, "step": 998 }, { "epoch": 0.3390836264993854, "grad_norm": 0.021767836064100266, "learning_rate": 9.797911641124236e-05, "loss": 0.0001, "step": 1000 }, { "epoch": 0.3397617937523842, "grad_norm": 0.0020249721128493547, "learning_rate": 9.79701761191616e-05, "loss": 0.0003, "step": 1002 }, { "epoch": 0.34043996100538293, "grad_norm": 0.01206112653017044, "learning_rate": 9.796121650469963e-05, "loss": 0.0001, "step": 1004 }, { "epoch": 0.3411181282583817, "grad_norm": 0.0016083617229014635, "learning_rate": 9.795223757146539e-05, "loss": 0.0, "step": 1006 }, { "epoch": 0.3417962955113805, "grad_norm": 0.004822399001568556, "learning_rate": 9.794323932307559e-05, "loss": 0.0, "step": 1008 }, { "epoch": 0.34247446276437926, "grad_norm": 0.008584674447774887, "learning_rate": 9.793422176315472e-05, "loss": 0.0, "step": 1010 }, { "epoch": 0.34315263001737806, "grad_norm": 0.003843455109745264, "learning_rate": 9.792518489533507e-05, "loss": 0.0, "step": 1012 }, { "epoch": 0.3438307972703768, "grad_norm": 0.002502762945368886, "learning_rate": 9.791612872325667e-05, "loss": 0.0, "step": 1014 }, { "epoch": 0.3445089645233756, "grad_norm": 0.0043897624127566814, "learning_rate": 9.790705325056735e-05, "loss": 0.0, "step": 1016 }, { "epoch": 0.34518713177637433, "grad_norm": 0.013913934119045734, "learning_rate": 9.78979584809227e-05, "loss": 0.0, "step": 1018 }, { "epoch": 0.34586529902937313, "grad_norm": 0.0008682821062393486, "learning_rate": 9.788884441798612e-05, "loss": 0.0, "step": 1020 }, { "epoch": 0.34654346628237187, "grad_norm": 0.0015347907319664955, "learning_rate": 9.787971106542872e-05, "loss": 0.0, "step": 1022 }, { "epoch": 0.34722163353537067, "grad_norm": 0.0006304876878857613, "learning_rate": 9.787055842692944e-05, "loss": 0.0, "step": 1024 }, { "epoch": 0.3478998007883694, "grad_norm": 0.015095069073140621, "learning_rate": 9.786138650617493e-05, "loss": 0.0, "step": 1026 }, { "epoch": 0.3485779680413682, "grad_norm": 0.0007357313297688961, "learning_rate": 9.785219530685969e-05, "loss": 0.0, "step": 1028 }, { "epoch": 0.349256135294367, "grad_norm": 0.006811681669205427, "learning_rate": 9.784298483268588e-05, "loss": 0.0001, "step": 1030 }, { "epoch": 0.34993430254736574, "grad_norm": 0.004541031084954739, "learning_rate": 9.783375508736351e-05, "loss": 0.0, "step": 1032 }, { "epoch": 0.35061246980036453, "grad_norm": 0.002640953753143549, "learning_rate": 9.782450607461031e-05, "loss": 0.0, "step": 1034 }, { "epoch": 0.3512906370533633, "grad_norm": 0.014266207814216614, "learning_rate": 9.781523779815179e-05, "loss": 0.0, "step": 1036 }, { "epoch": 0.35196880430636207, "grad_norm": 0.0035020592622458935, "learning_rate": 9.780595026172119e-05, "loss": 0.0, "step": 1038 }, { "epoch": 0.3526469715593608, "grad_norm": 0.0020277295261621475, "learning_rate": 9.779664346905954e-05, "loss": 0.0, "step": 1040 }, { "epoch": 0.3533251388123596, "grad_norm": 0.0008716668817214668, "learning_rate": 9.778731742391562e-05, "loss": 0.0, "step": 1042 }, { "epoch": 0.35400330606535835, "grad_norm": 0.004051413387060165, "learning_rate": 9.777797213004596e-05, "loss": 0.0, "step": 1044 }, { "epoch": 0.35468147331835714, "grad_norm": 0.0006017841515131295, "learning_rate": 9.776860759121484e-05, "loss": 0.0, "step": 1046 }, { "epoch": 0.3553596405713559, "grad_norm": 0.0020267630461603403, "learning_rate": 9.775922381119429e-05, "loss": 0.0, "step": 1048 }, { "epoch": 0.3560378078243547, "grad_norm": 0.001963792135939002, "learning_rate": 9.774982079376411e-05, "loss": 0.0, "step": 1050 }, { "epoch": 0.3567159750773535, "grad_norm": 0.039067551493644714, "learning_rate": 9.774039854271182e-05, "loss": 0.0001, "step": 1052 }, { "epoch": 0.3573941423303522, "grad_norm": 0.009917604736983776, "learning_rate": 9.773095706183271e-05, "loss": 0.0, "step": 1054 }, { "epoch": 0.358072309583351, "grad_norm": 0.03943745791912079, "learning_rate": 9.772149635492978e-05, "loss": 0.0002, "step": 1056 }, { "epoch": 0.35875047683634975, "grad_norm": 0.03156045079231262, "learning_rate": 9.771201642581385e-05, "loss": 0.0001, "step": 1058 }, { "epoch": 0.35942864408934855, "grad_norm": 0.0029956672806292772, "learning_rate": 9.77025172783034e-05, "loss": 0.0, "step": 1060 }, { "epoch": 0.3601068113423473, "grad_norm": 0.009155509062111378, "learning_rate": 9.769299891622469e-05, "loss": 0.0, "step": 1062 }, { "epoch": 0.3607849785953461, "grad_norm": 0.001078491797670722, "learning_rate": 9.768346134341174e-05, "loss": 0.0, "step": 1064 }, { "epoch": 0.3614631458483448, "grad_norm": 0.006173829548060894, "learning_rate": 9.767390456370624e-05, "loss": 0.0, "step": 1066 }, { "epoch": 0.3621413131013436, "grad_norm": 0.0022076182067394257, "learning_rate": 9.76643285809577e-05, "loss": 0.0001, "step": 1068 }, { "epoch": 0.3628194803543424, "grad_norm": 0.0006827347096987069, "learning_rate": 9.76547333990233e-05, "loss": 0.0, "step": 1070 }, { "epoch": 0.36349764760734116, "grad_norm": 0.02418726310133934, "learning_rate": 9.764511902176798e-05, "loss": 0.0001, "step": 1072 }, { "epoch": 0.36417581486033995, "grad_norm": 0.002262687310576439, "learning_rate": 9.763548545306443e-05, "loss": 0.0, "step": 1074 }, { "epoch": 0.3648539821133387, "grad_norm": 0.0012373102363198996, "learning_rate": 9.762583269679303e-05, "loss": 0.0, "step": 1076 }, { "epoch": 0.3655321493663375, "grad_norm": 0.2534419596195221, "learning_rate": 9.761616075684192e-05, "loss": 0.0002, "step": 1078 }, { "epoch": 0.36621031661933623, "grad_norm": 0.0008780054631642997, "learning_rate": 9.760646963710694e-05, "loss": 0.0, "step": 1080 }, { "epoch": 0.366888483872335, "grad_norm": 0.04172241687774658, "learning_rate": 9.759675934149168e-05, "loss": 0.0004, "step": 1082 }, { "epoch": 0.36756665112533377, "grad_norm": 0.04473108425736427, "learning_rate": 9.758702987390746e-05, "loss": 0.0004, "step": 1084 }, { "epoch": 0.36824481837833256, "grad_norm": 0.11230382323265076, "learning_rate": 9.75772812382733e-05, "loss": 0.0006, "step": 1086 }, { "epoch": 0.3689229856313313, "grad_norm": 0.08816549926996231, "learning_rate": 9.756751343851594e-05, "loss": 0.0007, "step": 1088 }, { "epoch": 0.3696011528843301, "grad_norm": 0.06734654307365417, "learning_rate": 9.755772647856986e-05, "loss": 0.0003, "step": 1090 }, { "epoch": 0.3702793201373289, "grad_norm": 0.02505115605890751, "learning_rate": 9.754792036237725e-05, "loss": 0.0004, "step": 1092 }, { "epoch": 0.37095748739032763, "grad_norm": 0.02396882139146328, "learning_rate": 9.753809509388798e-05, "loss": 0.0007, "step": 1094 }, { "epoch": 0.37163565464332643, "grad_norm": 0.023111695423722267, "learning_rate": 9.752825067705971e-05, "loss": 0.0002, "step": 1096 }, { "epoch": 0.37231382189632517, "grad_norm": 0.045954182744026184, "learning_rate": 9.751838711585774e-05, "loss": 0.0003, "step": 1098 }, { "epoch": 0.37299198914932397, "grad_norm": 0.029057525098323822, "learning_rate": 9.750850441425515e-05, "loss": 0.0004, "step": 1100 }, { "epoch": 0.3736701564023227, "grad_norm": 0.030954748392105103, "learning_rate": 9.749860257623263e-05, "loss": 0.0002, "step": 1102 }, { "epoch": 0.3743483236553215, "grad_norm": 0.0924738347530365, "learning_rate": 9.748868160577868e-05, "loss": 0.0003, "step": 1104 }, { "epoch": 0.37502649090832024, "grad_norm": 0.05257641151547432, "learning_rate": 9.747874150688948e-05, "loss": 0.0001, "step": 1106 }, { "epoch": 0.37570465816131904, "grad_norm": 0.05264095962047577, "learning_rate": 9.746878228356885e-05, "loss": 0.0007, "step": 1108 }, { "epoch": 0.3763828254143178, "grad_norm": 0.04698754847049713, "learning_rate": 9.74588039398284e-05, "loss": 0.0002, "step": 1110 }, { "epoch": 0.3770609926673166, "grad_norm": 0.031078685075044632, "learning_rate": 9.744880647968741e-05, "loss": 0.0003, "step": 1112 }, { "epoch": 0.37773915992031537, "grad_norm": 0.011770748533308506, "learning_rate": 9.743878990717283e-05, "loss": 0.0002, "step": 1114 }, { "epoch": 0.3784173271733141, "grad_norm": 0.016282442957162857, "learning_rate": 9.742875422631937e-05, "loss": 0.0001, "step": 1116 }, { "epoch": 0.3790954944263129, "grad_norm": 0.03300836682319641, "learning_rate": 9.741869944116937e-05, "loss": 0.0001, "step": 1118 }, { "epoch": 0.37977366167931165, "grad_norm": 0.046346571296453476, "learning_rate": 9.740862555577289e-05, "loss": 0.0009, "step": 1120 }, { "epoch": 0.38045182893231044, "grad_norm": 0.020293988287448883, "learning_rate": 9.739853257418772e-05, "loss": 0.0001, "step": 1122 }, { "epoch": 0.3811299961853092, "grad_norm": 0.006188335362821817, "learning_rate": 9.73884205004793e-05, "loss": 0.0, "step": 1124 }, { "epoch": 0.381808163438308, "grad_norm": 0.04997680336236954, "learning_rate": 9.737828933872075e-05, "loss": 0.0003, "step": 1126 }, { "epoch": 0.3824863306913067, "grad_norm": 0.013155604712665081, "learning_rate": 9.736813909299294e-05, "loss": 0.0001, "step": 1128 }, { "epoch": 0.3831644979443055, "grad_norm": 0.0035465641412883997, "learning_rate": 9.735796976738437e-05, "loss": 0.0002, "step": 1130 }, { "epoch": 0.3838426651973043, "grad_norm": 0.006658546626567841, "learning_rate": 9.734778136599123e-05, "loss": 0.0, "step": 1132 }, { "epoch": 0.38452083245030305, "grad_norm": 0.053657446056604385, "learning_rate": 9.733757389291742e-05, "loss": 0.0008, "step": 1134 }, { "epoch": 0.38519899970330185, "grad_norm": 0.08685652166604996, "learning_rate": 9.73273473522745e-05, "loss": 0.0005, "step": 1136 }, { "epoch": 0.3858771669563006, "grad_norm": 0.009623444639146328, "learning_rate": 9.731710174818174e-05, "loss": 0.0001, "step": 1138 }, { "epoch": 0.3865553342092994, "grad_norm": 0.03243015334010124, "learning_rate": 9.730683708476604e-05, "loss": 0.0004, "step": 1140 }, { "epoch": 0.3872335014622981, "grad_norm": 0.0013256813399493694, "learning_rate": 9.729655336616204e-05, "loss": 0.0004, "step": 1142 }, { "epoch": 0.3879116687152969, "grad_norm": 0.006923831533640623, "learning_rate": 9.728625059651197e-05, "loss": 0.0001, "step": 1144 }, { "epoch": 0.38858983596829566, "grad_norm": 0.004441696684807539, "learning_rate": 9.727592877996585e-05, "loss": 0.0001, "step": 1146 }, { "epoch": 0.38926800322129446, "grad_norm": 0.004240158013999462, "learning_rate": 9.726558792068125e-05, "loss": 0.0, "step": 1148 }, { "epoch": 0.3899461704742932, "grad_norm": 0.014222053810954094, "learning_rate": 9.72552280228235e-05, "loss": 0.0001, "step": 1150 }, { "epoch": 0.390624337727292, "grad_norm": 0.03526894748210907, "learning_rate": 9.724484909056554e-05, "loss": 0.0004, "step": 1152 }, { "epoch": 0.3913025049802908, "grad_norm": 0.005613984540104866, "learning_rate": 9.723445112808802e-05, "loss": 0.0, "step": 1154 }, { "epoch": 0.39198067223328953, "grad_norm": 0.01902814209461212, "learning_rate": 9.722403413957923e-05, "loss": 0.0001, "step": 1156 }, { "epoch": 0.3926588394862883, "grad_norm": 0.07783958315849304, "learning_rate": 9.721359812923515e-05, "loss": 0.0003, "step": 1158 }, { "epoch": 0.39333700673928707, "grad_norm": 0.008552520535886288, "learning_rate": 9.720314310125937e-05, "loss": 0.0, "step": 1160 }, { "epoch": 0.39401517399228586, "grad_norm": 0.00928240455687046, "learning_rate": 9.719266905986322e-05, "loss": 0.0, "step": 1162 }, { "epoch": 0.3946933412452846, "grad_norm": 0.04865438863635063, "learning_rate": 9.71821760092656e-05, "loss": 0.0001, "step": 1164 }, { "epoch": 0.3953715084982834, "grad_norm": 0.005656507331877947, "learning_rate": 9.717166395369313e-05, "loss": 0.0, "step": 1166 }, { "epoch": 0.39604967575128214, "grad_norm": 0.00930027011781931, "learning_rate": 9.716113289738004e-05, "loss": 0.0001, "step": 1168 }, { "epoch": 0.39672784300428093, "grad_norm": 0.023045826703310013, "learning_rate": 9.715058284456828e-05, "loss": 0.0001, "step": 1170 }, { "epoch": 0.39740601025727973, "grad_norm": 0.00920875370502472, "learning_rate": 9.714001379950739e-05, "loss": 0.0001, "step": 1172 }, { "epoch": 0.39808417751027847, "grad_norm": 0.002126706065610051, "learning_rate": 9.712942576645459e-05, "loss": 0.0001, "step": 1174 }, { "epoch": 0.39876234476327727, "grad_norm": 0.010585418902337551, "learning_rate": 9.71188187496747e-05, "loss": 0.0, "step": 1176 }, { "epoch": 0.399440512016276, "grad_norm": 0.007169020362198353, "learning_rate": 9.710819275344028e-05, "loss": 0.0, "step": 1178 }, { "epoch": 0.4001186792692748, "grad_norm": 0.008359720930457115, "learning_rate": 9.709754778203143e-05, "loss": 0.0001, "step": 1180 }, { "epoch": 0.40079684652227354, "grad_norm": 0.017095040529966354, "learning_rate": 9.708688383973596e-05, "loss": 0.0002, "step": 1182 }, { "epoch": 0.40147501377527234, "grad_norm": 0.003711053868755698, "learning_rate": 9.707620093084933e-05, "loss": 0.0, "step": 1184 }, { "epoch": 0.4021531810282711, "grad_norm": 0.003627863246947527, "learning_rate": 9.706549905967459e-05, "loss": 0.0, "step": 1186 }, { "epoch": 0.4028313482812699, "grad_norm": 0.004584900569170713, "learning_rate": 9.705477823052246e-05, "loss": 0.0, "step": 1188 }, { "epoch": 0.4035095155342686, "grad_norm": 0.012822436168789864, "learning_rate": 9.704403844771128e-05, "loss": 0.0, "step": 1190 }, { "epoch": 0.4041876827872674, "grad_norm": 0.02418431267142296, "learning_rate": 9.703327971556704e-05, "loss": 0.0, "step": 1192 }, { "epoch": 0.4048658500402662, "grad_norm": 0.011410797946155071, "learning_rate": 9.702250203842336e-05, "loss": 0.0, "step": 1194 }, { "epoch": 0.40554401729326495, "grad_norm": 0.0004434087313711643, "learning_rate": 9.701170542062148e-05, "loss": 0.0, "step": 1196 }, { "epoch": 0.40622218454626374, "grad_norm": 0.0011917415540665388, "learning_rate": 9.700088986651027e-05, "loss": 0.0, "step": 1198 }, { "epoch": 0.4069003517992625, "grad_norm": 0.1085277646780014, "learning_rate": 9.699005538044626e-05, "loss": 0.0002, "step": 1200 }, { "epoch": 0.4075785190522613, "grad_norm": 0.001079617883078754, "learning_rate": 9.697920196679353e-05, "loss": 0.0, "step": 1202 }, { "epoch": 0.40825668630526, "grad_norm": 0.0006843967130407691, "learning_rate": 9.69683296299239e-05, "loss": 0.0001, "step": 1204 }, { "epoch": 0.4089348535582588, "grad_norm": 0.0017491994658485055, "learning_rate": 9.695743837421669e-05, "loss": 0.0, "step": 1206 }, { "epoch": 0.40961302081125756, "grad_norm": 0.050194092094898224, "learning_rate": 9.694652820405895e-05, "loss": 0.0001, "step": 1208 }, { "epoch": 0.41029118806425635, "grad_norm": 0.0033219442702829838, "learning_rate": 9.693559912384523e-05, "loss": 0.0001, "step": 1210 }, { "epoch": 0.4109693553172551, "grad_norm": 0.021614020690321922, "learning_rate": 9.69246511379778e-05, "loss": 0.0, "step": 1212 }, { "epoch": 0.4116475225702539, "grad_norm": 0.0711006447672844, "learning_rate": 9.691368425086651e-05, "loss": 0.0007, "step": 1214 }, { "epoch": 0.4123256898232527, "grad_norm": 0.0004094350733794272, "learning_rate": 9.690269846692881e-05, "loss": 0.0, "step": 1216 }, { "epoch": 0.4130038570762514, "grad_norm": 0.008974644355475903, "learning_rate": 9.689169379058979e-05, "loss": 0.0006, "step": 1218 }, { "epoch": 0.4136820243292502, "grad_norm": 0.00673671206459403, "learning_rate": 9.688067022628211e-05, "loss": 0.0004, "step": 1220 }, { "epoch": 0.41436019158224896, "grad_norm": 0.014988470822572708, "learning_rate": 9.686962777844606e-05, "loss": 0.0002, "step": 1222 }, { "epoch": 0.41503835883524776, "grad_norm": 0.05447763949632645, "learning_rate": 9.685856645152955e-05, "loss": 0.0002, "step": 1224 }, { "epoch": 0.4157165260882465, "grad_norm": 0.009435374289751053, "learning_rate": 9.68474862499881e-05, "loss": 0.0001, "step": 1226 }, { "epoch": 0.4163946933412453, "grad_norm": 0.022904202342033386, "learning_rate": 9.683638717828476e-05, "loss": 0.0002, "step": 1228 }, { "epoch": 0.41707286059424403, "grad_norm": 0.002160982694476843, "learning_rate": 9.682526924089028e-05, "loss": 0.0, "step": 1230 }, { "epoch": 0.41775102784724283, "grad_norm": 0.005374177824705839, "learning_rate": 9.681413244228294e-05, "loss": 0.0001, "step": 1232 }, { "epoch": 0.4184291951002416, "grad_norm": 0.0011474484344944358, "learning_rate": 9.680297678694867e-05, "loss": 0.0001, "step": 1234 }, { "epoch": 0.41910736235324036, "grad_norm": 0.03147886320948601, "learning_rate": 9.679180227938094e-05, "loss": 0.0002, "step": 1236 }, { "epoch": 0.41978552960623916, "grad_norm": 0.04308544844388962, "learning_rate": 9.678060892408083e-05, "loss": 0.0001, "step": 1238 }, { "epoch": 0.4204636968592379, "grad_norm": 0.00260750949382782, "learning_rate": 9.676939672555706e-05, "loss": 0.0, "step": 1240 }, { "epoch": 0.4211418641122367, "grad_norm": 0.049146685749292374, "learning_rate": 9.675816568832588e-05, "loss": 0.0001, "step": 1242 }, { "epoch": 0.42182003136523544, "grad_norm": 0.05711871013045311, "learning_rate": 9.674691581691114e-05, "loss": 0.0, "step": 1244 }, { "epoch": 0.42249819861823423, "grad_norm": 0.008233152329921722, "learning_rate": 9.673564711584431e-05, "loss": 0.0, "step": 1246 }, { "epoch": 0.423176365871233, "grad_norm": 0.001224467996507883, "learning_rate": 9.672435958966442e-05, "loss": 0.0, "step": 1248 }, { "epoch": 0.42385453312423177, "grad_norm": 0.0015520035522058606, "learning_rate": 9.671305324291806e-05, "loss": 0.0, "step": 1250 }, { "epoch": 0.4245327003772305, "grad_norm": 0.09785683453083038, "learning_rate": 9.670172808015943e-05, "loss": 0.0003, "step": 1252 }, { "epoch": 0.4252108676302293, "grad_norm": 0.011007199063897133, "learning_rate": 9.669038410595033e-05, "loss": 0.0, "step": 1254 }, { "epoch": 0.4258890348832281, "grad_norm": 0.05002971738576889, "learning_rate": 9.667902132486009e-05, "loss": 0.0002, "step": 1256 }, { "epoch": 0.42656720213622684, "grad_norm": 0.027726514264941216, "learning_rate": 9.666763974146564e-05, "loss": 0.0002, "step": 1258 }, { "epoch": 0.42724536938922564, "grad_norm": 0.034664928913116455, "learning_rate": 9.66562393603515e-05, "loss": 0.0002, "step": 1260 }, { "epoch": 0.4279235366422244, "grad_norm": 0.0023947851732373238, "learning_rate": 9.664482018610971e-05, "loss": 0.0001, "step": 1262 }, { "epoch": 0.4286017038952232, "grad_norm": 0.027528831735253334, "learning_rate": 9.663338222333993e-05, "loss": 0.0002, "step": 1264 }, { "epoch": 0.4292798711482219, "grad_norm": 0.006727095227688551, "learning_rate": 9.662192547664937e-05, "loss": 0.0, "step": 1266 }, { "epoch": 0.4299580384012207, "grad_norm": 0.0011739626061171293, "learning_rate": 9.66104499506528e-05, "loss": 0.0, "step": 1268 }, { "epoch": 0.43063620565421945, "grad_norm": 0.002890555886551738, "learning_rate": 9.659895564997257e-05, "loss": 0.0, "step": 1270 }, { "epoch": 0.43131437290721825, "grad_norm": 0.001660423120483756, "learning_rate": 9.658744257923857e-05, "loss": 0.0, "step": 1272 }, { "epoch": 0.431992540160217, "grad_norm": 0.030299240723252296, "learning_rate": 9.657591074308827e-05, "loss": 0.0002, "step": 1274 }, { "epoch": 0.4326707074132158, "grad_norm": 0.0587908960878849, "learning_rate": 9.656436014616669e-05, "loss": 0.0002, "step": 1276 }, { "epoch": 0.4333488746662146, "grad_norm": 0.004164823330938816, "learning_rate": 9.655279079312642e-05, "loss": 0.0001, "step": 1278 }, { "epoch": 0.4340270419192133, "grad_norm": 0.0012816853122785687, "learning_rate": 9.654120268862758e-05, "loss": 0.0, "step": 1280 }, { "epoch": 0.4347052091722121, "grad_norm": 0.026919906958937645, "learning_rate": 9.652959583733787e-05, "loss": 0.0001, "step": 1282 }, { "epoch": 0.43538337642521086, "grad_norm": 0.0006462790770456195, "learning_rate": 9.651797024393252e-05, "loss": 0.0, "step": 1284 }, { "epoch": 0.43606154367820965, "grad_norm": 0.0024133336264640093, "learning_rate": 9.650632591309431e-05, "loss": 0.0002, "step": 1286 }, { "epoch": 0.4367397109312084, "grad_norm": 0.031166810542345047, "learning_rate": 9.649466284951359e-05, "loss": 0.0003, "step": 1288 }, { "epoch": 0.4374178781842072, "grad_norm": 0.004173577763140202, "learning_rate": 9.648298105788822e-05, "loss": 0.0, "step": 1290 }, { "epoch": 0.4380960454372059, "grad_norm": 0.012892540544271469, "learning_rate": 9.647128054292365e-05, "loss": 0.0001, "step": 1292 }, { "epoch": 0.4387742126902047, "grad_norm": 0.04061705246567726, "learning_rate": 9.645956130933284e-05, "loss": 0.0002, "step": 1294 }, { "epoch": 0.4394523799432035, "grad_norm": 0.0026674168184399605, "learning_rate": 9.644782336183629e-05, "loss": 0.0001, "step": 1296 }, { "epoch": 0.44013054719620226, "grad_norm": 0.006499381735920906, "learning_rate": 9.643606670516205e-05, "loss": 0.0, "step": 1298 }, { "epoch": 0.44080871444920106, "grad_norm": 0.00404358608648181, "learning_rate": 9.642429134404569e-05, "loss": 0.0004, "step": 1300 }, { "epoch": 0.4414868817021998, "grad_norm": 0.03190307691693306, "learning_rate": 9.641249728323033e-05, "loss": 0.0001, "step": 1302 }, { "epoch": 0.4421650489551986, "grad_norm": 0.004501507617533207, "learning_rate": 9.64006845274666e-05, "loss": 0.0, "step": 1304 }, { "epoch": 0.44284321620819733, "grad_norm": 0.0022560632787644863, "learning_rate": 9.63888530815127e-05, "loss": 0.0, "step": 1306 }, { "epoch": 0.44352138346119613, "grad_norm": 0.007302314043045044, "learning_rate": 9.637700295013432e-05, "loss": 0.0, "step": 1308 }, { "epoch": 0.44419955071419487, "grad_norm": 0.014958181418478489, "learning_rate": 9.636513413810471e-05, "loss": 0.0, "step": 1310 }, { "epoch": 0.44487771796719366, "grad_norm": 0.001923035946674645, "learning_rate": 9.63532466502046e-05, "loss": 0.0, "step": 1312 }, { "epoch": 0.4455558852201924, "grad_norm": 0.0023916817735880613, "learning_rate": 9.63413404912223e-05, "loss": 0.0, "step": 1314 }, { "epoch": 0.4462340524731912, "grad_norm": 0.03837057203054428, "learning_rate": 9.632941566595357e-05, "loss": 0.0001, "step": 1316 }, { "epoch": 0.44691221972619, "grad_norm": 0.0005067187012173235, "learning_rate": 9.631747217920178e-05, "loss": 0.0, "step": 1318 }, { "epoch": 0.44759038697918874, "grad_norm": 0.002008441835641861, "learning_rate": 9.63055100357777e-05, "loss": 0.0, "step": 1320 }, { "epoch": 0.44826855423218753, "grad_norm": 0.004499231465160847, "learning_rate": 9.629352924049975e-05, "loss": 0.0, "step": 1322 }, { "epoch": 0.4489467214851863, "grad_norm": 0.014846640639007092, "learning_rate": 9.628152979819374e-05, "loss": 0.0003, "step": 1324 }, { "epoch": 0.44962488873818507, "grad_norm": 0.022211197763681412, "learning_rate": 9.626951171369305e-05, "loss": 0.0002, "step": 1326 }, { "epoch": 0.4503030559911838, "grad_norm": 0.0004746819904539734, "learning_rate": 9.62574749918386e-05, "loss": 0.0, "step": 1328 }, { "epoch": 0.4509812232441826, "grad_norm": 0.0026894102338701487, "learning_rate": 9.624541963747873e-05, "loss": 0.0001, "step": 1330 }, { "epoch": 0.45165939049718135, "grad_norm": 0.0006651193252764642, "learning_rate": 9.623334565546937e-05, "loss": 0.0, "step": 1332 }, { "epoch": 0.45233755775018014, "grad_norm": 0.009189899079501629, "learning_rate": 9.622125305067393e-05, "loss": 0.0, "step": 1334 }, { "epoch": 0.4530157250031789, "grad_norm": 0.0018004291923716664, "learning_rate": 9.620914182796327e-05, "loss": 0.0, "step": 1336 }, { "epoch": 0.4536938922561777, "grad_norm": 0.0035469504073262215, "learning_rate": 9.61970119922158e-05, "loss": 0.0, "step": 1338 }, { "epoch": 0.4543720595091765, "grad_norm": 0.001930896774865687, "learning_rate": 9.618486354831743e-05, "loss": 0.0, "step": 1340 }, { "epoch": 0.4550502267621752, "grad_norm": 0.0019152465974912047, "learning_rate": 9.617269650116157e-05, "loss": 0.0002, "step": 1342 }, { "epoch": 0.455728394015174, "grad_norm": 0.0019315932877361774, "learning_rate": 9.616051085564906e-05, "loss": 0.0, "step": 1344 }, { "epoch": 0.45640656126817275, "grad_norm": 0.0008209678344428539, "learning_rate": 9.614830661668829e-05, "loss": 0.0, "step": 1346 }, { "epoch": 0.45708472852117155, "grad_norm": 0.002465085359290242, "learning_rate": 9.613608378919513e-05, "loss": 0.0001, "step": 1348 }, { "epoch": 0.4577628957741703, "grad_norm": 0.001089555793441832, "learning_rate": 9.612384237809295e-05, "loss": 0.0, "step": 1350 }, { "epoch": 0.4584410630271691, "grad_norm": 0.0003401720605324954, "learning_rate": 9.611158238831259e-05, "loss": 0.0, "step": 1352 }, { "epoch": 0.4591192302801678, "grad_norm": 0.0004586543655022979, "learning_rate": 9.609930382479233e-05, "loss": 0.0, "step": 1354 }, { "epoch": 0.4597973975331666, "grad_norm": 0.0005209209630265832, "learning_rate": 9.608700669247802e-05, "loss": 0.0, "step": 1356 }, { "epoch": 0.4604755647861654, "grad_norm": 0.0011509821051731706, "learning_rate": 9.607469099632291e-05, "loss": 0.0, "step": 1358 }, { "epoch": 0.46115373203916415, "grad_norm": 0.0004960742662660778, "learning_rate": 9.606235674128781e-05, "loss": 0.0, "step": 1360 }, { "epoch": 0.46183189929216295, "grad_norm": 0.002033142140135169, "learning_rate": 9.605000393234089e-05, "loss": 0.0, "step": 1362 }, { "epoch": 0.4625100665451617, "grad_norm": 0.0006835223757661879, "learning_rate": 9.60376325744579e-05, "loss": 0.0, "step": 1364 }, { "epoch": 0.4631882337981605, "grad_norm": 0.0004336755082476884, "learning_rate": 9.602524267262203e-05, "loss": 0.0, "step": 1366 }, { "epoch": 0.4638664010511592, "grad_norm": 0.0009202666115015745, "learning_rate": 9.60128342318239e-05, "loss": 0.0, "step": 1368 }, { "epoch": 0.464544568304158, "grad_norm": 0.00034619055804796517, "learning_rate": 9.600040725706164e-05, "loss": 0.0, "step": 1370 }, { "epoch": 0.46522273555715676, "grad_norm": 0.00034607655834406614, "learning_rate": 9.598796175334085e-05, "loss": 0.0, "step": 1372 }, { "epoch": 0.46590090281015556, "grad_norm": 0.0022296837996691465, "learning_rate": 9.597549772567456e-05, "loss": 0.0, "step": 1374 }, { "epoch": 0.4665790700631543, "grad_norm": 0.0003772445779759437, "learning_rate": 9.596301517908328e-05, "loss": 0.0, "step": 1376 }, { "epoch": 0.4672572373161531, "grad_norm": 0.0872640609741211, "learning_rate": 9.595051411859499e-05, "loss": 0.0001, "step": 1378 }, { "epoch": 0.4679354045691519, "grad_norm": 0.00028464055503718555, "learning_rate": 9.59379945492451e-05, "loss": 0.0, "step": 1380 }, { "epoch": 0.46861357182215063, "grad_norm": 0.0007707217009738088, "learning_rate": 9.592545647607652e-05, "loss": 0.0, "step": 1382 }, { "epoch": 0.46929173907514943, "grad_norm": 0.0003467558417469263, "learning_rate": 9.591289990413954e-05, "loss": 0.0, "step": 1384 }, { "epoch": 0.46996990632814817, "grad_norm": 0.017712855711579323, "learning_rate": 9.590032483849199e-05, "loss": 0.0001, "step": 1386 }, { "epoch": 0.47064807358114696, "grad_norm": 0.0008023407426662743, "learning_rate": 9.588773128419906e-05, "loss": 0.0, "step": 1388 }, { "epoch": 0.4713262408341457, "grad_norm": 0.011850233189761639, "learning_rate": 9.587511924633348e-05, "loss": 0.0, "step": 1390 }, { "epoch": 0.4720044080871445, "grad_norm": 0.0005710996338166296, "learning_rate": 9.586248872997532e-05, "loss": 0.0, "step": 1392 }, { "epoch": 0.47268257534014324, "grad_norm": 0.009720748290419579, "learning_rate": 9.584983974021222e-05, "loss": 0.0, "step": 1394 }, { "epoch": 0.47336074259314204, "grad_norm": 0.0015520682791247964, "learning_rate": 9.583717228213914e-05, "loss": 0.0002, "step": 1396 }, { "epoch": 0.4740389098461408, "grad_norm": 0.06051940098404884, "learning_rate": 9.582448636085855e-05, "loss": 0.0002, "step": 1398 }, { "epoch": 0.4747170770991396, "grad_norm": 0.036541007459163666, "learning_rate": 9.581178198148034e-05, "loss": 0.0001, "step": 1400 }, { "epoch": 0.47539524435213837, "grad_norm": 0.0005350433639250696, "learning_rate": 9.579905914912182e-05, "loss": 0.0, "step": 1402 }, { "epoch": 0.4760734116051371, "grad_norm": 0.008719049394130707, "learning_rate": 9.578631786890774e-05, "loss": 0.0, "step": 1404 }, { "epoch": 0.4767515788581359, "grad_norm": 0.052884291857481, "learning_rate": 9.577355814597031e-05, "loss": 0.0002, "step": 1406 }, { "epoch": 0.47742974611113465, "grad_norm": 0.002605754416435957, "learning_rate": 9.576077998544912e-05, "loss": 0.0, "step": 1408 }, { "epoch": 0.47810791336413344, "grad_norm": 0.0009654919849708676, "learning_rate": 9.574798339249125e-05, "loss": 0.0, "step": 1410 }, { "epoch": 0.4787860806171322, "grad_norm": 0.014030814170837402, "learning_rate": 9.573516837225112e-05, "loss": 0.0, "step": 1412 }, { "epoch": 0.479464247870131, "grad_norm": 0.003000308061018586, "learning_rate": 9.572233492989064e-05, "loss": 0.0, "step": 1414 }, { "epoch": 0.4801424151231297, "grad_norm": 0.03821394965052605, "learning_rate": 9.570948307057912e-05, "loss": 0.0001, "step": 1416 }, { "epoch": 0.4808205823761285, "grad_norm": 0.0002816450723912567, "learning_rate": 9.569661279949329e-05, "loss": 0.0, "step": 1418 }, { "epoch": 0.4814987496291273, "grad_norm": 0.005087055265903473, "learning_rate": 9.56837241218173e-05, "loss": 0.0, "step": 1420 }, { "epoch": 0.48217691688212605, "grad_norm": 0.017613375559449196, "learning_rate": 9.567081704274268e-05, "loss": 0.0, "step": 1422 }, { "epoch": 0.48285508413512485, "grad_norm": 0.00882010255008936, "learning_rate": 9.565789156746842e-05, "loss": 0.0003, "step": 1424 }, { "epoch": 0.4835332513881236, "grad_norm": 0.0006112809642218053, "learning_rate": 9.564494770120089e-05, "loss": 0.0, "step": 1426 }, { "epoch": 0.4842114186411224, "grad_norm": 0.0022903706412762403, "learning_rate": 9.56319854491539e-05, "loss": 0.0, "step": 1428 }, { "epoch": 0.4848895858941211, "grad_norm": 0.0009983257623389363, "learning_rate": 9.561900481654862e-05, "loss": 0.0, "step": 1430 }, { "epoch": 0.4855677531471199, "grad_norm": 0.0007166972500272095, "learning_rate": 9.560600580861365e-05, "loss": 0.0, "step": 1432 }, { "epoch": 0.48624592040011866, "grad_norm": 0.04111838713288307, "learning_rate": 9.559298843058501e-05, "loss": 0.0001, "step": 1434 }, { "epoch": 0.48692408765311745, "grad_norm": 0.001161652966402471, "learning_rate": 9.557995268770608e-05, "loss": 0.0, "step": 1436 }, { "epoch": 0.4876022549061162, "grad_norm": 0.01842758245766163, "learning_rate": 9.556689858522764e-05, "loss": 0.0, "step": 1438 }, { "epoch": 0.488280422159115, "grad_norm": 0.0006185704842209816, "learning_rate": 9.555382612840791e-05, "loss": 0.0, "step": 1440 }, { "epoch": 0.4889585894121138, "grad_norm": 0.012515629641711712, "learning_rate": 9.554073532251247e-05, "loss": 0.0, "step": 1442 }, { "epoch": 0.4896367566651125, "grad_norm": 0.0011411434970796108, "learning_rate": 9.552762617281428e-05, "loss": 0.0, "step": 1444 }, { "epoch": 0.4903149239181113, "grad_norm": 0.0034499443136155605, "learning_rate": 9.55144986845937e-05, "loss": 0.0, "step": 1446 }, { "epoch": 0.49099309117111006, "grad_norm": 0.019210342317819595, "learning_rate": 9.550135286313852e-05, "loss": 0.0001, "step": 1448 }, { "epoch": 0.49167125842410886, "grad_norm": 0.005309837404638529, "learning_rate": 9.548818871374383e-05, "loss": 0.0, "step": 1450 }, { "epoch": 0.4923494256771076, "grad_norm": 0.0003773440548684448, "learning_rate": 9.547500624171217e-05, "loss": 0.0, "step": 1452 }, { "epoch": 0.4930275929301064, "grad_norm": 0.00025181396631523967, "learning_rate": 9.546180545235344e-05, "loss": 0.0, "step": 1454 }, { "epoch": 0.49370576018310514, "grad_norm": 0.00040584264206700027, "learning_rate": 9.544858635098491e-05, "loss": 0.0, "step": 1456 }, { "epoch": 0.49438392743610393, "grad_norm": 0.05193353816866875, "learning_rate": 9.543534894293121e-05, "loss": 0.0001, "step": 1458 }, { "epoch": 0.4950620946891027, "grad_norm": 0.0005504893488250673, "learning_rate": 9.542209323352441e-05, "loss": 0.0, "step": 1460 }, { "epoch": 0.49574026194210147, "grad_norm": 0.0023802071809768677, "learning_rate": 9.540881922810388e-05, "loss": 0.0, "step": 1462 }, { "epoch": 0.49641842919510026, "grad_norm": 0.0005835008923895657, "learning_rate": 9.539552693201642e-05, "loss": 0.0, "step": 1464 }, { "epoch": 0.497096596448099, "grad_norm": 0.0014621730661019683, "learning_rate": 9.538221635061611e-05, "loss": 0.0, "step": 1466 }, { "epoch": 0.4977747637010978, "grad_norm": 0.0009525296627543867, "learning_rate": 9.536888748926449e-05, "loss": 0.0, "step": 1468 }, { "epoch": 0.49845293095409654, "grad_norm": 0.08590487390756607, "learning_rate": 9.535554035333041e-05, "loss": 0.0002, "step": 1470 }, { "epoch": 0.49913109820709534, "grad_norm": 0.001831568544730544, "learning_rate": 9.534217494819011e-05, "loss": 0.0, "step": 1472 }, { "epoch": 0.4998092654600941, "grad_norm": 0.0029671755619347095, "learning_rate": 9.532879127922717e-05, "loss": 0.0003, "step": 1474 }, { "epoch": 0.5004874327130928, "grad_norm": 0.024554090574383736, "learning_rate": 9.53153893518325e-05, "loss": 0.0002, "step": 1476 }, { "epoch": 0.5011655999660917, "grad_norm": 0.0011774948798120022, "learning_rate": 9.530196917140444e-05, "loss": 0.0, "step": 1478 }, { "epoch": 0.5018437672190904, "grad_norm": 0.02411930449306965, "learning_rate": 9.52885307433486e-05, "loss": 0.0, "step": 1480 }, { "epoch": 0.5025219344720891, "grad_norm": 0.052455827593803406, "learning_rate": 9.527507407307799e-05, "loss": 0.0002, "step": 1482 }, { "epoch": 0.503200101725088, "grad_norm": 0.03569287061691284, "learning_rate": 9.526159916601298e-05, "loss": 0.0002, "step": 1484 }, { "epoch": 0.5038782689780867, "grad_norm": 0.0015953992260619998, "learning_rate": 9.524810602758121e-05, "loss": 0.0, "step": 1486 }, { "epoch": 0.5045564362310855, "grad_norm": 0.04384199529886246, "learning_rate": 9.523459466321777e-05, "loss": 0.0, "step": 1488 }, { "epoch": 0.5052346034840842, "grad_norm": 0.022246044129133224, "learning_rate": 9.5221065078365e-05, "loss": 0.0001, "step": 1490 }, { "epoch": 0.5059127707370831, "grad_norm": 0.0010375117417424917, "learning_rate": 9.520751727847262e-05, "loss": 0.0, "step": 1492 }, { "epoch": 0.5065909379900818, "grad_norm": 0.0021145131904631853, "learning_rate": 9.519395126899768e-05, "loss": 0.0002, "step": 1494 }, { "epoch": 0.5072691052430806, "grad_norm": 0.012676035054028034, "learning_rate": 9.518036705540458e-05, "loss": 0.0001, "step": 1496 }, { "epoch": 0.5079472724960793, "grad_norm": 0.006452012341469526, "learning_rate": 9.516676464316505e-05, "loss": 0.0, "step": 1498 }, { "epoch": 0.5086254397490781, "grad_norm": 0.0071767643094062805, "learning_rate": 9.51531440377581e-05, "loss": 0.0, "step": 1500 }, { "epoch": 0.5093036070020769, "grad_norm": 0.08541678637266159, "learning_rate": 9.513950524467014e-05, "loss": 0.0003, "step": 1502 }, { "epoch": 0.5099817742550756, "grad_norm": 0.01230217982083559, "learning_rate": 9.512584826939487e-05, "loss": 0.0001, "step": 1504 }, { "epoch": 0.5106599415080745, "grad_norm": 0.000837941886857152, "learning_rate": 9.51121731174333e-05, "loss": 0.0, "step": 1506 }, { "epoch": 0.5113381087610732, "grad_norm": 0.000993087887763977, "learning_rate": 9.50984797942938e-05, "loss": 0.0, "step": 1508 }, { "epoch": 0.512016276014072, "grad_norm": 0.07351294904947281, "learning_rate": 9.508476830549205e-05, "loss": 0.0001, "step": 1510 }, { "epoch": 0.5126944432670707, "grad_norm": 0.0013747677439823747, "learning_rate": 9.507103865655101e-05, "loss": 0.0, "step": 1512 }, { "epoch": 0.5133726105200695, "grad_norm": 0.005486196838319302, "learning_rate": 9.505729085300097e-05, "loss": 0.0, "step": 1514 }, { "epoch": 0.5140507777730683, "grad_norm": 0.00730984378606081, "learning_rate": 9.504352490037958e-05, "loss": 0.0, "step": 1516 }, { "epoch": 0.514728945026067, "grad_norm": 0.02579108066856861, "learning_rate": 9.502974080423172e-05, "loss": 0.0, "step": 1518 }, { "epoch": 0.5154071122790658, "grad_norm": 0.0014567964244633913, "learning_rate": 9.501593857010969e-05, "loss": 0.0, "step": 1520 }, { "epoch": 0.5160852795320646, "grad_norm": 0.006894987542182207, "learning_rate": 9.500211820357297e-05, "loss": 0.0003, "step": 1522 }, { "epoch": 0.5167634467850634, "grad_norm": 0.004342679399996996, "learning_rate": 9.49882797101884e-05, "loss": 0.0, "step": 1524 }, { "epoch": 0.5174416140380621, "grad_norm": 0.00029608941986225545, "learning_rate": 9.497442309553016e-05, "loss": 0.0, "step": 1526 }, { "epoch": 0.518119781291061, "grad_norm": 0.020566822960972786, "learning_rate": 9.496054836517968e-05, "loss": 0.0, "step": 1528 }, { "epoch": 0.5187979485440597, "grad_norm": 0.01303982175886631, "learning_rate": 9.494665552472568e-05, "loss": 0.0, "step": 1530 }, { "epoch": 0.5194761157970584, "grad_norm": 0.0011589553905650973, "learning_rate": 9.493274457976422e-05, "loss": 0.0, "step": 1532 }, { "epoch": 0.5201542830500572, "grad_norm": 0.02673693560063839, "learning_rate": 9.491881553589863e-05, "loss": 0.0, "step": 1534 }, { "epoch": 0.520832450303056, "grad_norm": 0.009233599528670311, "learning_rate": 9.490486839873949e-05, "loss": 0.0005, "step": 1536 }, { "epoch": 0.5215106175560548, "grad_norm": 0.006066435482352972, "learning_rate": 9.489090317390475e-05, "loss": 0.0, "step": 1538 }, { "epoch": 0.5221887848090535, "grad_norm": 0.002637030789628625, "learning_rate": 9.487691986701957e-05, "loss": 0.0, "step": 1540 }, { "epoch": 0.5228669520620522, "grad_norm": 0.04053063690662384, "learning_rate": 9.486291848371643e-05, "loss": 0.0001, "step": 1542 }, { "epoch": 0.5235451193150511, "grad_norm": 0.0009022873127833009, "learning_rate": 9.484889902963509e-05, "loss": 0.0, "step": 1544 }, { "epoch": 0.5242232865680498, "grad_norm": 0.007944276556372643, "learning_rate": 9.483486151042258e-05, "loss": 0.0, "step": 1546 }, { "epoch": 0.5249014538210486, "grad_norm": 0.010381053201854229, "learning_rate": 9.482080593173323e-05, "loss": 0.0, "step": 1548 }, { "epoch": 0.5255796210740474, "grad_norm": 0.0014247434446588159, "learning_rate": 9.480673229922861e-05, "loss": 0.0001, "step": 1550 }, { "epoch": 0.5262577883270462, "grad_norm": 0.00017903864500112832, "learning_rate": 9.479264061857756e-05, "loss": 0.0, "step": 1552 }, { "epoch": 0.5269359555800449, "grad_norm": 0.0005723198410123587, "learning_rate": 9.477853089545624e-05, "loss": 0.0, "step": 1554 }, { "epoch": 0.5276141228330437, "grad_norm": 0.022112039849162102, "learning_rate": 9.476440313554803e-05, "loss": 0.0001, "step": 1556 }, { "epoch": 0.5282922900860425, "grad_norm": 0.04562903195619583, "learning_rate": 9.47502573445436e-05, "loss": 0.0002, "step": 1558 }, { "epoch": 0.5289704573390412, "grad_norm": 0.000275386351859197, "learning_rate": 9.473609352814083e-05, "loss": 0.0, "step": 1560 }, { "epoch": 0.52964862459204, "grad_norm": 0.0007450791308656335, "learning_rate": 9.472191169204497e-05, "loss": 0.0, "step": 1562 }, { "epoch": 0.5303267918450388, "grad_norm": 0.0013386242790147662, "learning_rate": 9.47077118419684e-05, "loss": 0.0, "step": 1564 }, { "epoch": 0.5310049590980376, "grad_norm": 0.0014541690470650792, "learning_rate": 9.469349398363088e-05, "loss": 0.0, "step": 1566 }, { "epoch": 0.5316831263510363, "grad_norm": 0.00032288787770085037, "learning_rate": 9.467925812275931e-05, "loss": 0.0, "step": 1568 }, { "epoch": 0.5323612936040351, "grad_norm": 0.00030021867132745683, "learning_rate": 9.46650042650879e-05, "loss": 0.0, "step": 1570 }, { "epoch": 0.5330394608570339, "grad_norm": 0.007078014779835939, "learning_rate": 9.465073241635814e-05, "loss": 0.0, "step": 1572 }, { "epoch": 0.5337176281100326, "grad_norm": 0.00022079533664509654, "learning_rate": 9.463644258231869e-05, "loss": 0.0, "step": 1574 }, { "epoch": 0.5343957953630314, "grad_norm": 0.003117764601483941, "learning_rate": 9.46221347687255e-05, "loss": 0.0, "step": 1576 }, { "epoch": 0.5350739626160301, "grad_norm": 0.0010085367830470204, "learning_rate": 9.460780898134177e-05, "loss": 0.0, "step": 1578 }, { "epoch": 0.535752129869029, "grad_norm": 0.0002581927983555943, "learning_rate": 9.45934652259379e-05, "loss": 0.0, "step": 1580 }, { "epoch": 0.5364302971220277, "grad_norm": 0.03966759517788887, "learning_rate": 9.45791035082916e-05, "loss": 0.0001, "step": 1582 }, { "epoch": 0.5371084643750265, "grad_norm": 0.00011890224413946271, "learning_rate": 9.456472383418771e-05, "loss": 0.0, "step": 1584 }, { "epoch": 0.5377866316280253, "grad_norm": 9.437848348170519e-05, "learning_rate": 9.45503262094184e-05, "loss": 0.0, "step": 1586 }, { "epoch": 0.538464798881024, "grad_norm": 0.0005711221019737422, "learning_rate": 9.453591063978302e-05, "loss": 0.0, "step": 1588 }, { "epoch": 0.5391429661340228, "grad_norm": 0.00018182714120484889, "learning_rate": 9.452147713108816e-05, "loss": 0.0, "step": 1590 }, { "epoch": 0.5398211333870215, "grad_norm": 0.0001235974341398105, "learning_rate": 9.450702568914764e-05, "loss": 0.0, "step": 1592 }, { "epoch": 0.5404993006400204, "grad_norm": 0.0001750366936903447, "learning_rate": 9.44925563197825e-05, "loss": 0.0, "step": 1594 }, { "epoch": 0.5411774678930191, "grad_norm": 0.022940626367926598, "learning_rate": 9.447806902882097e-05, "loss": 0.0001, "step": 1596 }, { "epoch": 0.5418556351460179, "grad_norm": 0.0008206462953239679, "learning_rate": 9.446356382209858e-05, "loss": 0.0, "step": 1598 }, { "epoch": 0.5425338023990166, "grad_norm": 0.13076429069042206, "learning_rate": 9.444904070545798e-05, "loss": 0.0003, "step": 1600 }, { "epoch": 0.5432119696520155, "grad_norm": 0.02494717203080654, "learning_rate": 9.443449968474911e-05, "loss": 0.0, "step": 1602 }, { "epoch": 0.5438901369050142, "grad_norm": 0.22823643684387207, "learning_rate": 9.441994076582907e-05, "loss": 0.0002, "step": 1604 }, { "epoch": 0.5445683041580129, "grad_norm": 0.03987814113497734, "learning_rate": 9.440536395456222e-05, "loss": 0.0001, "step": 1606 }, { "epoch": 0.5452464714110118, "grad_norm": 0.2199239283800125, "learning_rate": 9.439076925682006e-05, "loss": 0.001, "step": 1608 }, { "epoch": 0.5459246386640105, "grad_norm": 0.06272803992033005, "learning_rate": 9.437615667848138e-05, "loss": 0.0004, "step": 1610 }, { "epoch": 0.5466028059170093, "grad_norm": 0.1594870239496231, "learning_rate": 9.436152622543207e-05, "loss": 0.0012, "step": 1612 }, { "epoch": 0.547280973170008, "grad_norm": 0.007507851347327232, "learning_rate": 9.434687790356531e-05, "loss": 0.0005, "step": 1614 }, { "epoch": 0.5479591404230069, "grad_norm": 0.03764336183667183, "learning_rate": 9.433221171878144e-05, "loss": 0.0011, "step": 1616 }, { "epoch": 0.5486373076760056, "grad_norm": 0.1107349619269371, "learning_rate": 9.431752767698799e-05, "loss": 0.001, "step": 1618 }, { "epoch": 0.5493154749290043, "grad_norm": 0.027478542178869247, "learning_rate": 9.430282578409968e-05, "loss": 0.0002, "step": 1620 }, { "epoch": 0.5499936421820031, "grad_norm": 0.05804652348160744, "learning_rate": 9.428810604603846e-05, "loss": 0.0006, "step": 1622 }, { "epoch": 0.5506718094350019, "grad_norm": 0.024435337632894516, "learning_rate": 9.42733684687334e-05, "loss": 0.0002, "step": 1624 }, { "epoch": 0.5513499766880007, "grad_norm": 0.0044030421413481236, "learning_rate": 9.425861305812082e-05, "loss": 0.0002, "step": 1626 }, { "epoch": 0.5520281439409994, "grad_norm": 0.06567414849996567, "learning_rate": 9.424383982014423e-05, "loss": 0.0002, "step": 1628 }, { "epoch": 0.5527063111939983, "grad_norm": 0.10958150774240494, "learning_rate": 9.42290487607542e-05, "loss": 0.0006, "step": 1630 }, { "epoch": 0.553384478446997, "grad_norm": 0.036298271268606186, "learning_rate": 9.421423988590865e-05, "loss": 0.0003, "step": 1632 }, { "epoch": 0.5540626456999957, "grad_norm": 0.09867826849222183, "learning_rate": 9.419941320157255e-05, "loss": 0.0009, "step": 1634 }, { "epoch": 0.5547408129529945, "grad_norm": 0.04817799851298332, "learning_rate": 9.418456871371809e-05, "loss": 0.0006, "step": 1636 }, { "epoch": 0.5554189802059933, "grad_norm": 0.05213902145624161, "learning_rate": 9.416970642832465e-05, "loss": 0.0002, "step": 1638 }, { "epoch": 0.5560971474589921, "grad_norm": 0.035610131919384, "learning_rate": 9.415482635137873e-05, "loss": 0.0004, "step": 1640 }, { "epoch": 0.5567753147119908, "grad_norm": 0.05330129340291023, "learning_rate": 9.413992848887406e-05, "loss": 0.0006, "step": 1642 }, { "epoch": 0.5574534819649896, "grad_norm": 0.0890207514166832, "learning_rate": 9.412501284681145e-05, "loss": 0.001, "step": 1644 }, { "epoch": 0.5581316492179884, "grad_norm": 0.03352257236838341, "learning_rate": 9.411007943119894e-05, "loss": 0.0003, "step": 1646 }, { "epoch": 0.5588098164709872, "grad_norm": 0.023783110082149506, "learning_rate": 9.409512824805172e-05, "loss": 0.0006, "step": 1648 }, { "epoch": 0.5594879837239859, "grad_norm": 0.050553686916828156, "learning_rate": 9.408015930339211e-05, "loss": 0.0006, "step": 1650 }, { "epoch": 0.5601661509769847, "grad_norm": 0.03647039085626602, "learning_rate": 9.40651726032496e-05, "loss": 0.0004, "step": 1652 }, { "epoch": 0.5608443182299835, "grad_norm": 0.06705080717802048, "learning_rate": 9.405016815366086e-05, "loss": 0.0004, "step": 1654 }, { "epoch": 0.5615224854829822, "grad_norm": 0.033958956599235535, "learning_rate": 9.403514596066963e-05, "loss": 0.0004, "step": 1656 }, { "epoch": 0.562200652735981, "grad_norm": 0.04107276350259781, "learning_rate": 9.402010603032689e-05, "loss": 0.0004, "step": 1658 }, { "epoch": 0.5628788199889798, "grad_norm": 0.03651130571961403, "learning_rate": 9.400504836869069e-05, "loss": 0.0002, "step": 1660 }, { "epoch": 0.5635569872419786, "grad_norm": 0.08204538375139236, "learning_rate": 9.398997298182628e-05, "loss": 0.0007, "step": 1662 }, { "epoch": 0.5642351544949773, "grad_norm": 0.052677396684885025, "learning_rate": 9.397487987580602e-05, "loss": 0.0005, "step": 1664 }, { "epoch": 0.5649133217479761, "grad_norm": 0.05886976420879364, "learning_rate": 9.395976905670939e-05, "loss": 0.0003, "step": 1666 }, { "epoch": 0.5655914890009749, "grad_norm": 0.01760573871433735, "learning_rate": 9.394464053062304e-05, "loss": 0.0001, "step": 1668 }, { "epoch": 0.5662696562539736, "grad_norm": 0.038459379225969315, "learning_rate": 9.392949430364076e-05, "loss": 0.0003, "step": 1670 }, { "epoch": 0.5669478235069724, "grad_norm": 0.020486151799559593, "learning_rate": 9.391433038186341e-05, "loss": 0.0002, "step": 1672 }, { "epoch": 0.5676259907599712, "grad_norm": 0.037114061415195465, "learning_rate": 9.389914877139903e-05, "loss": 0.0002, "step": 1674 }, { "epoch": 0.56830415801297, "grad_norm": 0.022536030039191246, "learning_rate": 9.388394947836279e-05, "loss": 0.0002, "step": 1676 }, { "epoch": 0.5689823252659687, "grad_norm": 0.020146558061242104, "learning_rate": 9.386873250887694e-05, "loss": 0.0002, "step": 1678 }, { "epoch": 0.5696604925189674, "grad_norm": 0.011131091974675655, "learning_rate": 9.385349786907088e-05, "loss": 0.0001, "step": 1680 }, { "epoch": 0.5703386597719663, "grad_norm": 0.020283561199903488, "learning_rate": 9.383824556508112e-05, "loss": 0.0002, "step": 1682 }, { "epoch": 0.571016827024965, "grad_norm": 0.026053188368678093, "learning_rate": 9.382297560305129e-05, "loss": 0.0002, "step": 1684 }, { "epoch": 0.5716949942779638, "grad_norm": 0.0034120355267077684, "learning_rate": 9.380768798913213e-05, "loss": 0.0003, "step": 1686 }, { "epoch": 0.5723731615309626, "grad_norm": 0.0031566445250064135, "learning_rate": 9.37923827294815e-05, "loss": 0.0, "step": 1688 }, { "epoch": 0.5730513287839614, "grad_norm": 0.04603234678506851, "learning_rate": 9.377705983026433e-05, "loss": 0.0008, "step": 1690 }, { "epoch": 0.5737294960369601, "grad_norm": 0.024317435920238495, "learning_rate": 9.376171929765269e-05, "loss": 0.0005, "step": 1692 }, { "epoch": 0.5744076632899588, "grad_norm": 0.024809902533888817, "learning_rate": 9.374636113782576e-05, "loss": 0.0001, "step": 1694 }, { "epoch": 0.5750858305429577, "grad_norm": 0.06629851460456848, "learning_rate": 9.373098535696979e-05, "loss": 0.0003, "step": 1696 }, { "epoch": 0.5757639977959564, "grad_norm": 0.07917294651269913, "learning_rate": 9.371559196127815e-05, "loss": 0.0006, "step": 1698 }, { "epoch": 0.5764421650489552, "grad_norm": 0.029546458274126053, "learning_rate": 9.37001809569513e-05, "loss": 0.0005, "step": 1700 }, { "epoch": 0.5771203323019539, "grad_norm": 0.02987642213702202, "learning_rate": 9.368475235019678e-05, "loss": 0.0004, "step": 1702 }, { "epoch": 0.5777984995549528, "grad_norm": 0.05608579143881798, "learning_rate": 9.366930614722925e-05, "loss": 0.0009, "step": 1704 }, { "epoch": 0.5784766668079515, "grad_norm": 0.053664229810237885, "learning_rate": 9.365384235427042e-05, "loss": 0.0002, "step": 1706 }, { "epoch": 0.5791548340609503, "grad_norm": 0.04168921709060669, "learning_rate": 9.36383609775491e-05, "loss": 0.0003, "step": 1708 }, { "epoch": 0.5798330013139491, "grad_norm": 0.04824904724955559, "learning_rate": 9.36228620233012e-05, "loss": 0.0002, "step": 1710 }, { "epoch": 0.5805111685669478, "grad_norm": 0.0651102215051651, "learning_rate": 9.36073454977697e-05, "loss": 0.0004, "step": 1712 }, { "epoch": 0.5811893358199466, "grad_norm": 0.03015986643731594, "learning_rate": 9.359181140720464e-05, "loss": 0.0003, "step": 1714 }, { "epoch": 0.5818675030729453, "grad_norm": 0.031174469739198685, "learning_rate": 9.357625975786317e-05, "loss": 0.0002, "step": 1716 }, { "epoch": 0.5825456703259442, "grad_norm": 0.09150509536266327, "learning_rate": 9.356069055600948e-05, "loss": 0.0005, "step": 1718 }, { "epoch": 0.5832238375789429, "grad_norm": 0.048985328525304794, "learning_rate": 9.354510380791483e-05, "loss": 0.0002, "step": 1720 }, { "epoch": 0.5839020048319417, "grad_norm": 0.010641923174262047, "learning_rate": 9.352949951985758e-05, "loss": 0.0001, "step": 1722 }, { "epoch": 0.5845801720849404, "grad_norm": 0.011535330675542355, "learning_rate": 9.351387769812315e-05, "loss": 0.0003, "step": 1724 }, { "epoch": 0.5852583393379392, "grad_norm": 0.01625724509358406, "learning_rate": 9.349823834900395e-05, "loss": 0.0001, "step": 1726 }, { "epoch": 0.585936506590938, "grad_norm": 0.03464433178305626, "learning_rate": 9.348258147879957e-05, "loss": 0.0003, "step": 1728 }, { "epoch": 0.5866146738439367, "grad_norm": 0.056171514093875885, "learning_rate": 9.346690709381655e-05, "loss": 0.0005, "step": 1730 }, { "epoch": 0.5872928410969356, "grad_norm": 0.04725542664527893, "learning_rate": 9.345121520036856e-05, "loss": 0.0003, "step": 1732 }, { "epoch": 0.5879710083499343, "grad_norm": 0.04443192109465599, "learning_rate": 9.343550580477631e-05, "loss": 0.0003, "step": 1734 }, { "epoch": 0.5886491756029331, "grad_norm": 0.004577595740556717, "learning_rate": 9.341977891336749e-05, "loss": 0.0002, "step": 1736 }, { "epoch": 0.5893273428559318, "grad_norm": 0.005344774108380079, "learning_rate": 9.340403453247691e-05, "loss": 0.0, "step": 1738 }, { "epoch": 0.5900055101089307, "grad_norm": 0.03278961032629013, "learning_rate": 9.338827266844644e-05, "loss": 0.0004, "step": 1740 }, { "epoch": 0.5906836773619294, "grad_norm": 0.07316702604293823, "learning_rate": 9.33724933276249e-05, "loss": 0.0003, "step": 1742 }, { "epoch": 0.5913618446149281, "grad_norm": 0.027365239337086678, "learning_rate": 9.335669651636824e-05, "loss": 0.0001, "step": 1744 }, { "epoch": 0.5920400118679269, "grad_norm": 0.021680912002921104, "learning_rate": 9.33408822410394e-05, "loss": 0.0001, "step": 1746 }, { "epoch": 0.5927181791209257, "grad_norm": 0.07199620455503464, "learning_rate": 9.33250505080084e-05, "loss": 0.0004, "step": 1748 }, { "epoch": 0.5933963463739245, "grad_norm": 0.0033168045338243246, "learning_rate": 9.330920132365222e-05, "loss": 0.0, "step": 1750 }, { "epoch": 0.5940745136269232, "grad_norm": 0.02242601104080677, "learning_rate": 9.329333469435493e-05, "loss": 0.0001, "step": 1752 }, { "epoch": 0.5947526808799221, "grad_norm": 0.01347749400883913, "learning_rate": 9.327745062650761e-05, "loss": 0.0001, "step": 1754 }, { "epoch": 0.5954308481329208, "grad_norm": 0.004134488757699728, "learning_rate": 9.326154912650834e-05, "loss": 0.0001, "step": 1756 }, { "epoch": 0.5961090153859195, "grad_norm": 0.04600505530834198, "learning_rate": 9.324563020076227e-05, "loss": 0.0003, "step": 1758 }, { "epoch": 0.5967871826389183, "grad_norm": 0.005666503217071295, "learning_rate": 9.322969385568151e-05, "loss": 0.0001, "step": 1760 }, { "epoch": 0.5974653498919171, "grad_norm": 0.02874704822897911, "learning_rate": 9.321374009768525e-05, "loss": 0.0001, "step": 1762 }, { "epoch": 0.5981435171449159, "grad_norm": 0.0032893160823732615, "learning_rate": 9.319776893319962e-05, "loss": 0.0, "step": 1764 }, { "epoch": 0.5988216843979146, "grad_norm": 0.004591864533722401, "learning_rate": 9.318178036865785e-05, "loss": 0.0001, "step": 1766 }, { "epoch": 0.5994998516509134, "grad_norm": 0.06738582253456116, "learning_rate": 9.316577441050012e-05, "loss": 0.0004, "step": 1768 }, { "epoch": 0.6001780189039122, "grad_norm": 0.0225976575165987, "learning_rate": 9.314975106517361e-05, "loss": 0.0002, "step": 1770 }, { "epoch": 0.6008561861569109, "grad_norm": 0.09164045006036758, "learning_rate": 9.313371033913253e-05, "loss": 0.0006, "step": 1772 }, { "epoch": 0.6015343534099097, "grad_norm": 0.018893009051680565, "learning_rate": 9.311765223883808e-05, "loss": 0.0002, "step": 1774 }, { "epoch": 0.6022125206629085, "grad_norm": 0.05388549342751503, "learning_rate": 9.310157677075847e-05, "loss": 0.0005, "step": 1776 }, { "epoch": 0.6028906879159073, "grad_norm": 0.08900446444749832, "learning_rate": 9.308548394136889e-05, "loss": 0.0006, "step": 1778 }, { "epoch": 0.603568855168906, "grad_norm": 0.14662937819957733, "learning_rate": 9.306937375715153e-05, "loss": 0.0008, "step": 1780 }, { "epoch": 0.6042470224219048, "grad_norm": 0.0050133345648646355, "learning_rate": 9.305324622459557e-05, "loss": 0.0, "step": 1782 }, { "epoch": 0.6049251896749036, "grad_norm": 0.05219025909900665, "learning_rate": 9.30371013501972e-05, "loss": 0.0004, "step": 1784 }, { "epoch": 0.6056033569279023, "grad_norm": 0.04286068305373192, "learning_rate": 9.302093914045953e-05, "loss": 0.0006, "step": 1786 }, { "epoch": 0.6062815241809011, "grad_norm": 0.053802043199539185, "learning_rate": 9.300475960189272e-05, "loss": 0.0008, "step": 1788 }, { "epoch": 0.6069596914338999, "grad_norm": 0.004111258313059807, "learning_rate": 9.298856274101389e-05, "loss": 0.0001, "step": 1790 }, { "epoch": 0.6076378586868987, "grad_norm": 0.020110275596380234, "learning_rate": 9.297234856434712e-05, "loss": 0.0005, "step": 1792 }, { "epoch": 0.6083160259398974, "grad_norm": 0.043072108179330826, "learning_rate": 9.29561170784235e-05, "loss": 0.0004, "step": 1794 }, { "epoch": 0.6089941931928962, "grad_norm": 0.010845893993973732, "learning_rate": 9.293986828978106e-05, "loss": 0.0004, "step": 1796 }, { "epoch": 0.609672360445895, "grad_norm": 0.017575867474079132, "learning_rate": 9.292360220496479e-05, "loss": 0.0001, "step": 1798 }, { "epoch": 0.6103505276988938, "grad_norm": 0.05960315838456154, "learning_rate": 9.29073188305267e-05, "loss": 0.0002, "step": 1800 }, { "epoch": 0.6110286949518925, "grad_norm": 0.01850164122879505, "learning_rate": 9.28910181730257e-05, "loss": 0.0003, "step": 1802 }, { "epoch": 0.6117068622048912, "grad_norm": 0.059937555342912674, "learning_rate": 9.287470023902774e-05, "loss": 0.0004, "step": 1804 }, { "epoch": 0.6123850294578901, "grad_norm": 0.0020369836129248142, "learning_rate": 9.285836503510562e-05, "loss": 0.0001, "step": 1806 }, { "epoch": 0.6130631967108888, "grad_norm": 0.00964849442243576, "learning_rate": 9.28420125678392e-05, "loss": 0.0, "step": 1808 }, { "epoch": 0.6137413639638876, "grad_norm": 0.04994570463895798, "learning_rate": 9.282564284381524e-05, "loss": 0.0002, "step": 1810 }, { "epoch": 0.6144195312168864, "grad_norm": 0.02114264853298664, "learning_rate": 9.280925586962747e-05, "loss": 0.0, "step": 1812 }, { "epoch": 0.6150976984698852, "grad_norm": 0.0017973040230572224, "learning_rate": 9.279285165187655e-05, "loss": 0.0002, "step": 1814 }, { "epoch": 0.6157758657228839, "grad_norm": 0.015073217451572418, "learning_rate": 9.27764301971701e-05, "loss": 0.0, "step": 1816 }, { "epoch": 0.6164540329758826, "grad_norm": 0.010442632250487804, "learning_rate": 9.275999151212269e-05, "loss": 0.0, "step": 1818 }, { "epoch": 0.6171322002288815, "grad_norm": 0.00925542414188385, "learning_rate": 9.27435356033558e-05, "loss": 0.0001, "step": 1820 }, { "epoch": 0.6178103674818802, "grad_norm": 0.0010579138761386275, "learning_rate": 9.272706247749789e-05, "loss": 0.0, "step": 1822 }, { "epoch": 0.618488534734879, "grad_norm": 0.0006910067750141025, "learning_rate": 9.271057214118432e-05, "loss": 0.0004, "step": 1824 }, { "epoch": 0.6191667019878777, "grad_norm": 0.0045022424310445786, "learning_rate": 9.26940646010574e-05, "loss": 0.0, "step": 1826 }, { "epoch": 0.6198448692408766, "grad_norm": 0.0032699264120310545, "learning_rate": 9.267753986376637e-05, "loss": 0.0, "step": 1828 }, { "epoch": 0.6205230364938753, "grad_norm": 0.0015567560913041234, "learning_rate": 9.266099793596739e-05, "loss": 0.0003, "step": 1830 }, { "epoch": 0.621201203746874, "grad_norm": 0.019803684204816818, "learning_rate": 9.264443882432355e-05, "loss": 0.0001, "step": 1832 }, { "epoch": 0.6218793709998729, "grad_norm": 0.03902900218963623, "learning_rate": 9.262786253550482e-05, "loss": 0.0001, "step": 1834 }, { "epoch": 0.6225575382528716, "grad_norm": 0.0033882891293615103, "learning_rate": 9.261126907618818e-05, "loss": 0.0, "step": 1836 }, { "epoch": 0.6232357055058704, "grad_norm": 0.004297104198485613, "learning_rate": 9.259465845305745e-05, "loss": 0.0, "step": 1838 }, { "epoch": 0.6239138727588691, "grad_norm": 0.04403658211231232, "learning_rate": 9.257803067280337e-05, "loss": 0.0002, "step": 1840 }, { "epoch": 0.624592040011868, "grad_norm": 0.0007246221066452563, "learning_rate": 9.256138574212365e-05, "loss": 0.0, "step": 1842 }, { "epoch": 0.6252702072648667, "grad_norm": 0.003953785169869661, "learning_rate": 9.254472366772285e-05, "loss": 0.0, "step": 1844 }, { "epoch": 0.6259483745178654, "grad_norm": 0.003318328410387039, "learning_rate": 9.252804445631243e-05, "loss": 0.0, "step": 1846 }, { "epoch": 0.6266265417708642, "grad_norm": 0.021481771022081375, "learning_rate": 9.251134811461077e-05, "loss": 0.0001, "step": 1848 }, { "epoch": 0.627304709023863, "grad_norm": 0.0016347782220691442, "learning_rate": 9.249463464934321e-05, "loss": 0.0, "step": 1850 }, { "epoch": 0.6279828762768618, "grad_norm": 0.002228502184152603, "learning_rate": 9.247790406724186e-05, "loss": 0.0, "step": 1852 }, { "epoch": 0.6286610435298605, "grad_norm": 0.015562807209789753, "learning_rate": 9.246115637504587e-05, "loss": 0.0001, "step": 1854 }, { "epoch": 0.6293392107828594, "grad_norm": 0.016600174829363823, "learning_rate": 9.244439157950114e-05, "loss": 0.0001, "step": 1856 }, { "epoch": 0.6300173780358581, "grad_norm": 0.014609369449317455, "learning_rate": 9.242760968736056e-05, "loss": 0.0, "step": 1858 }, { "epoch": 0.6306955452888569, "grad_norm": 0.0012606787495315075, "learning_rate": 9.241081070538389e-05, "loss": 0.0, "step": 1860 }, { "epoch": 0.6313737125418556, "grad_norm": 0.01701480522751808, "learning_rate": 9.239399464033771e-05, "loss": 0.0001, "step": 1862 }, { "epoch": 0.6320518797948544, "grad_norm": 0.009432896971702576, "learning_rate": 9.237716149899559e-05, "loss": 0.0, "step": 1864 }, { "epoch": 0.6327300470478532, "grad_norm": 0.0005134916864335537, "learning_rate": 9.236031128813788e-05, "loss": 0.0, "step": 1866 }, { "epoch": 0.6334082143008519, "grad_norm": 0.0026470404118299484, "learning_rate": 9.234344401455184e-05, "loss": 0.0, "step": 1868 }, { "epoch": 0.6340863815538507, "grad_norm": 0.0005988986231386662, "learning_rate": 9.232655968503161e-05, "loss": 0.0, "step": 1870 }, { "epoch": 0.6347645488068495, "grad_norm": 0.00032376497983932495, "learning_rate": 9.230965830637821e-05, "loss": 0.0, "step": 1872 }, { "epoch": 0.6354427160598483, "grad_norm": 0.0007756618433631957, "learning_rate": 9.22927398853995e-05, "loss": 0.0, "step": 1874 }, { "epoch": 0.636120883312847, "grad_norm": 0.0016515598399564624, "learning_rate": 9.227580442891022e-05, "loss": 0.0, "step": 1876 }, { "epoch": 0.6367990505658458, "grad_norm": 0.03476359322667122, "learning_rate": 9.225885194373199e-05, "loss": 0.0001, "step": 1878 }, { "epoch": 0.6374772178188446, "grad_norm": 0.001096145948395133, "learning_rate": 9.224188243669323e-05, "loss": 0.0, "step": 1880 }, { "epoch": 0.6381553850718433, "grad_norm": 0.0007735094404779375, "learning_rate": 9.222489591462927e-05, "loss": 0.0, "step": 1882 }, { "epoch": 0.6388335523248421, "grad_norm": 0.118960902094841, "learning_rate": 9.22078923843823e-05, "loss": 0.0002, "step": 1884 }, { "epoch": 0.6395117195778409, "grad_norm": 0.00038147129816934466, "learning_rate": 9.219087185280132e-05, "loss": 0.0, "step": 1886 }, { "epoch": 0.6401898868308397, "grad_norm": 0.0548308789730072, "learning_rate": 9.21738343267422e-05, "loss": 0.0002, "step": 1888 }, { "epoch": 0.6408680540838384, "grad_norm": 0.002948600333184004, "learning_rate": 9.215677981306768e-05, "loss": 0.0, "step": 1890 }, { "epoch": 0.6415462213368373, "grad_norm": 0.00647724187001586, "learning_rate": 9.213970831864727e-05, "loss": 0.0, "step": 1892 }, { "epoch": 0.642224388589836, "grad_norm": 0.03968558833003044, "learning_rate": 9.212261985035739e-05, "loss": 0.0003, "step": 1894 }, { "epoch": 0.6429025558428347, "grad_norm": 0.0006300415843725204, "learning_rate": 9.210551441508126e-05, "loss": 0.0, "step": 1896 }, { "epoch": 0.6435807230958335, "grad_norm": 0.0007496281177736819, "learning_rate": 9.208839201970897e-05, "loss": 0.0, "step": 1898 }, { "epoch": 0.6442588903488323, "grad_norm": 0.029067767783999443, "learning_rate": 9.207125267113742e-05, "loss": 0.0002, "step": 1900 }, { "epoch": 0.6449370576018311, "grad_norm": 0.0023949281312525272, "learning_rate": 9.205409637627031e-05, "loss": 0.0001, "step": 1902 }, { "epoch": 0.6456152248548298, "grad_norm": 0.048111461102962494, "learning_rate": 9.203692314201822e-05, "loss": 0.0002, "step": 1904 }, { "epoch": 0.6462933921078285, "grad_norm": 0.002165262820199132, "learning_rate": 9.201973297529851e-05, "loss": 0.0, "step": 1906 }, { "epoch": 0.6469715593608274, "grad_norm": 0.0018261608202010393, "learning_rate": 9.200252588303539e-05, "loss": 0.0, "step": 1908 }, { "epoch": 0.6476497266138261, "grad_norm": 0.00589523371309042, "learning_rate": 9.198530187215986e-05, "loss": 0.0, "step": 1910 }, { "epoch": 0.6483278938668249, "grad_norm": 0.00899149477481842, "learning_rate": 9.196806094960976e-05, "loss": 0.0, "step": 1912 }, { "epoch": 0.6490060611198237, "grad_norm": 0.00829945970326662, "learning_rate": 9.195080312232972e-05, "loss": 0.0, "step": 1914 }, { "epoch": 0.6496842283728225, "grad_norm": 0.01966802030801773, "learning_rate": 9.193352839727121e-05, "loss": 0.0001, "step": 1916 }, { "epoch": 0.6503623956258212, "grad_norm": 0.003154600504785776, "learning_rate": 9.191623678139247e-05, "loss": 0.0, "step": 1918 }, { "epoch": 0.65104056287882, "grad_norm": 0.024086041375994682, "learning_rate": 9.189892828165857e-05, "loss": 0.0007, "step": 1920 }, { "epoch": 0.6517187301318188, "grad_norm": 0.021366657689213753, "learning_rate": 9.188160290504136e-05, "loss": 0.0, "step": 1922 }, { "epoch": 0.6523968973848175, "grad_norm": 0.003147814190015197, "learning_rate": 9.186426065851952e-05, "loss": 0.0, "step": 1924 }, { "epoch": 0.6530750646378163, "grad_norm": 0.018301868811249733, "learning_rate": 9.184690154907849e-05, "loss": 0.0001, "step": 1926 }, { "epoch": 0.653753231890815, "grad_norm": 0.0010956176556646824, "learning_rate": 9.182952558371052e-05, "loss": 0.0004, "step": 1928 }, { "epoch": 0.6544313991438139, "grad_norm": 0.011941217817366123, "learning_rate": 9.181213276941465e-05, "loss": 0.0001, "step": 1930 }, { "epoch": 0.6551095663968126, "grad_norm": 0.04679281637072563, "learning_rate": 9.179472311319669e-05, "loss": 0.0001, "step": 1932 }, { "epoch": 0.6557877336498114, "grad_norm": 0.005270330235362053, "learning_rate": 9.177729662206926e-05, "loss": 0.0005, "step": 1934 }, { "epoch": 0.6564659009028102, "grad_norm": 0.059503424912691116, "learning_rate": 9.175985330305175e-05, "loss": 0.0006, "step": 1936 }, { "epoch": 0.657144068155809, "grad_norm": 0.05685205012559891, "learning_rate": 9.174239316317033e-05, "loss": 0.0003, "step": 1938 }, { "epoch": 0.6578222354088077, "grad_norm": 0.02518923580646515, "learning_rate": 9.172491620945793e-05, "loss": 0.0001, "step": 1940 }, { "epoch": 0.6585004026618064, "grad_norm": 0.0023309921380132437, "learning_rate": 9.170742244895427e-05, "loss": 0.0002, "step": 1942 }, { "epoch": 0.6591785699148053, "grad_norm": 0.010663488879799843, "learning_rate": 9.168991188870583e-05, "loss": 0.0001, "step": 1944 }, { "epoch": 0.659856737167804, "grad_norm": 0.004960994701832533, "learning_rate": 9.167238453576589e-05, "loss": 0.0, "step": 1946 }, { "epoch": 0.6605349044208028, "grad_norm": 0.010904761031270027, "learning_rate": 9.165484039719444e-05, "loss": 0.0002, "step": 1948 }, { "epoch": 0.6612130716738015, "grad_norm": 0.02706189453601837, "learning_rate": 9.163727948005823e-05, "loss": 0.0002, "step": 1950 }, { "epoch": 0.6618912389268004, "grad_norm": 0.030097760260105133, "learning_rate": 9.161970179143084e-05, "loss": 0.0003, "step": 1952 }, { "epoch": 0.6625694061797991, "grad_norm": 0.04335637018084526, "learning_rate": 9.160210733839254e-05, "loss": 0.0003, "step": 1954 }, { "epoch": 0.6632475734327978, "grad_norm": 0.07014850527048111, "learning_rate": 9.158449612803039e-05, "loss": 0.0005, "step": 1956 }, { "epoch": 0.6639257406857967, "grad_norm": 0.004154474474489689, "learning_rate": 9.156686816743816e-05, "loss": 0.0002, "step": 1958 }, { "epoch": 0.6646039079387954, "grad_norm": 0.07189586758613586, "learning_rate": 9.154922346371642e-05, "loss": 0.0003, "step": 1960 }, { "epoch": 0.6652820751917942, "grad_norm": 0.022829728201031685, "learning_rate": 9.153156202397243e-05, "loss": 0.0001, "step": 1962 }, { "epoch": 0.6659602424447929, "grad_norm": 0.015623291954398155, "learning_rate": 9.151388385532022e-05, "loss": 0.0001, "step": 1964 }, { "epoch": 0.6666384096977918, "grad_norm": 0.04405735060572624, "learning_rate": 9.149618896488056e-05, "loss": 0.0001, "step": 1966 }, { "epoch": 0.6673165769507905, "grad_norm": 0.007028218358755112, "learning_rate": 9.147847735978094e-05, "loss": 0.0002, "step": 1968 }, { "epoch": 0.6679947442037892, "grad_norm": 0.06891366094350815, "learning_rate": 9.146074904715561e-05, "loss": 0.0002, "step": 1970 }, { "epoch": 0.668672911456788, "grad_norm": 0.006903359200805426, "learning_rate": 9.144300403414552e-05, "loss": 0.0001, "step": 1972 }, { "epoch": 0.6693510787097868, "grad_norm": 0.02753445878624916, "learning_rate": 9.142524232789836e-05, "loss": 0.0001, "step": 1974 }, { "epoch": 0.6700292459627856, "grad_norm": 0.002083703875541687, "learning_rate": 9.140746393556854e-05, "loss": 0.0001, "step": 1976 }, { "epoch": 0.6707074132157843, "grad_norm": 0.03971313685178757, "learning_rate": 9.13896688643172e-05, "loss": 0.0001, "step": 1978 }, { "epoch": 0.6713855804687832, "grad_norm": 0.01733364723622799, "learning_rate": 9.13718571213122e-05, "loss": 0.0001, "step": 1980 }, { "epoch": 0.6720637477217819, "grad_norm": 0.0006641743239015341, "learning_rate": 9.135402871372808e-05, "loss": 0.0001, "step": 1982 }, { "epoch": 0.6727419149747806, "grad_norm": 0.009665962308645248, "learning_rate": 9.133618364874616e-05, "loss": 0.0001, "step": 1984 }, { "epoch": 0.6734200822277794, "grad_norm": 0.025363771244883537, "learning_rate": 9.131832193355441e-05, "loss": 0.0001, "step": 1986 }, { "epoch": 0.6740982494807782, "grad_norm": 0.020307032391428947, "learning_rate": 9.130044357534752e-05, "loss": 0.0001, "step": 1988 }, { "epoch": 0.674776416733777, "grad_norm": 0.004317408427596092, "learning_rate": 9.12825485813269e-05, "loss": 0.0001, "step": 1990 }, { "epoch": 0.6754545839867757, "grad_norm": 0.007612558081746101, "learning_rate": 9.126463695870064e-05, "loss": 0.0, "step": 1992 }, { "epoch": 0.6761327512397745, "grad_norm": 0.00039802887476980686, "learning_rate": 9.124670871468355e-05, "loss": 0.0, "step": 1994 }, { "epoch": 0.6768109184927733, "grad_norm": 0.06519202142953873, "learning_rate": 9.122876385649713e-05, "loss": 0.0002, "step": 1996 }, { "epoch": 0.677489085745772, "grad_norm": 0.0010920091299340129, "learning_rate": 9.121080239136954e-05, "loss": 0.0002, "step": 1998 }, { "epoch": 0.6781672529987708, "grad_norm": 0.0016080245841294527, "learning_rate": 9.119282432653569e-05, "loss": 0.0, "step": 2000 }, { "epoch": 0.6788454202517696, "grad_norm": 0.007306835614144802, "learning_rate": 9.117482966923714e-05, "loss": 0.0, "step": 2002 }, { "epoch": 0.6795235875047684, "grad_norm": 0.011635251343250275, "learning_rate": 9.11568184267221e-05, "loss": 0.0, "step": 2004 }, { "epoch": 0.6802017547577671, "grad_norm": 0.001634229440242052, "learning_rate": 9.113879060624553e-05, "loss": 0.0, "step": 2006 }, { "epoch": 0.6808799220107659, "grad_norm": 0.0009552877745591104, "learning_rate": 9.112074621506902e-05, "loss": 0.0, "step": 2008 }, { "epoch": 0.6815580892637647, "grad_norm": 0.004326884634792805, "learning_rate": 9.110268526046085e-05, "loss": 0.0, "step": 2010 }, { "epoch": 0.6822362565167635, "grad_norm": 0.0170154869556427, "learning_rate": 9.108460774969598e-05, "loss": 0.0001, "step": 2012 }, { "epoch": 0.6829144237697622, "grad_norm": 0.023070022463798523, "learning_rate": 9.106651369005601e-05, "loss": 0.0001, "step": 2014 }, { "epoch": 0.683592591022761, "grad_norm": 0.0006951871910132468, "learning_rate": 9.104840308882924e-05, "loss": 0.0, "step": 2016 }, { "epoch": 0.6842707582757598, "grad_norm": 0.030992897227406502, "learning_rate": 9.103027595331063e-05, "loss": 0.0002, "step": 2018 }, { "epoch": 0.6849489255287585, "grad_norm": 0.0006043879548087716, "learning_rate": 9.101213229080177e-05, "loss": 0.0002, "step": 2020 }, { "epoch": 0.6856270927817573, "grad_norm": 0.0035766216460615396, "learning_rate": 9.099397210861091e-05, "loss": 0.0, "step": 2022 }, { "epoch": 0.6863052600347561, "grad_norm": 0.020493974909186363, "learning_rate": 9.0975795414053e-05, "loss": 0.0002, "step": 2024 }, { "epoch": 0.6869834272877549, "grad_norm": 0.0067712995223701, "learning_rate": 9.09576022144496e-05, "loss": 0.0, "step": 2026 }, { "epoch": 0.6876615945407536, "grad_norm": 0.007755874190479517, "learning_rate": 9.093939251712891e-05, "loss": 0.0, "step": 2028 }, { "epoch": 0.6883397617937523, "grad_norm": 0.01566697284579277, "learning_rate": 9.092116632942582e-05, "loss": 0.0001, "step": 2030 }, { "epoch": 0.6890179290467512, "grad_norm": 0.0011003854451701045, "learning_rate": 9.090292365868183e-05, "loss": 0.0, "step": 2032 }, { "epoch": 0.6896960962997499, "grad_norm": 0.0002819379442371428, "learning_rate": 9.088466451224508e-05, "loss": 0.0001, "step": 2034 }, { "epoch": 0.6903742635527487, "grad_norm": 0.0012433732626959682, "learning_rate": 9.086638889747035e-05, "loss": 0.0, "step": 2036 }, { "epoch": 0.6910524308057475, "grad_norm": 0.00959568377584219, "learning_rate": 9.084809682171907e-05, "loss": 0.0, "step": 2038 }, { "epoch": 0.6917305980587463, "grad_norm": 0.029413780197501183, "learning_rate": 9.082978829235927e-05, "loss": 0.0002, "step": 2040 }, { "epoch": 0.692408765311745, "grad_norm": 0.001475983764976263, "learning_rate": 9.081146331676562e-05, "loss": 0.0, "step": 2042 }, { "epoch": 0.6930869325647437, "grad_norm": 0.00169148959685117, "learning_rate": 9.079312190231944e-05, "loss": 0.0, "step": 2044 }, { "epoch": 0.6937650998177426, "grad_norm": 0.0006321507389657199, "learning_rate": 9.077476405640862e-05, "loss": 0.0, "step": 2046 }, { "epoch": 0.6944432670707413, "grad_norm": 0.0008575081010349095, "learning_rate": 9.075638978642771e-05, "loss": 0.0, "step": 2048 }, { "epoch": 0.6951214343237401, "grad_norm": 0.0017931024776771665, "learning_rate": 9.07379990997779e-05, "loss": 0.0, "step": 2050 }, { "epoch": 0.6957996015767388, "grad_norm": 0.013845414854586124, "learning_rate": 9.071959200386688e-05, "loss": 0.0001, "step": 2052 }, { "epoch": 0.6964777688297377, "grad_norm": 0.0004693720256909728, "learning_rate": 9.07011685061091e-05, "loss": 0.0, "step": 2054 }, { "epoch": 0.6971559360827364, "grad_norm": 0.002094951691105962, "learning_rate": 9.068272861392551e-05, "loss": 0.0, "step": 2056 }, { "epoch": 0.6978341033357351, "grad_norm": 0.08430146425962448, "learning_rate": 9.06642723347437e-05, "loss": 0.0002, "step": 2058 }, { "epoch": 0.698512270588734, "grad_norm": 0.0010323359165340662, "learning_rate": 9.064579967599786e-05, "loss": 0.0001, "step": 2060 }, { "epoch": 0.6991904378417327, "grad_norm": 0.009902028366923332, "learning_rate": 9.062731064512876e-05, "loss": 0.0, "step": 2062 }, { "epoch": 0.6998686050947315, "grad_norm": 0.0009311030153185129, "learning_rate": 9.06088052495838e-05, "loss": 0.0002, "step": 2064 }, { "epoch": 0.7005467723477302, "grad_norm": 0.00923315342515707, "learning_rate": 9.059028349681694e-05, "loss": 0.0, "step": 2066 }, { "epoch": 0.7012249396007291, "grad_norm": 0.06685227155685425, "learning_rate": 9.057174539428873e-05, "loss": 0.0002, "step": 2068 }, { "epoch": 0.7019031068537278, "grad_norm": 0.01807691715657711, "learning_rate": 9.055319094946633e-05, "loss": 0.0002, "step": 2070 }, { "epoch": 0.7025812741067265, "grad_norm": 0.00733609776943922, "learning_rate": 9.053462016982348e-05, "loss": 0.0, "step": 2072 }, { "epoch": 0.7032594413597253, "grad_norm": 0.02869594469666481, "learning_rate": 9.051603306284047e-05, "loss": 0.0002, "step": 2074 }, { "epoch": 0.7039376086127241, "grad_norm": 0.028494779020547867, "learning_rate": 9.049742963600418e-05, "loss": 0.0001, "step": 2076 }, { "epoch": 0.7046157758657229, "grad_norm": 0.011124657467007637, "learning_rate": 9.047880989680808e-05, "loss": 0.0001, "step": 2078 }, { "epoch": 0.7052939431187216, "grad_norm": 0.001125624985434115, "learning_rate": 9.046017385275218e-05, "loss": 0.0001, "step": 2080 }, { "epoch": 0.7059721103717205, "grad_norm": 0.006963169202208519, "learning_rate": 9.044152151134311e-05, "loss": 0.0001, "step": 2082 }, { "epoch": 0.7066502776247192, "grad_norm": 0.07182872295379639, "learning_rate": 9.0422852880094e-05, "loss": 0.0002, "step": 2084 }, { "epoch": 0.707328444877718, "grad_norm": 0.006542180199176073, "learning_rate": 9.040416796652458e-05, "loss": 0.0, "step": 2086 }, { "epoch": 0.7080066121307167, "grad_norm": 0.0014154227683320642, "learning_rate": 9.038546677816115e-05, "loss": 0.0001, "step": 2088 }, { "epoch": 0.7086847793837155, "grad_norm": 0.005531368311494589, "learning_rate": 9.036674932253652e-05, "loss": 0.0002, "step": 2090 }, { "epoch": 0.7093629466367143, "grad_norm": 0.035860493779182434, "learning_rate": 9.034801560719011e-05, "loss": 0.0001, "step": 2092 }, { "epoch": 0.710041113889713, "grad_norm": 0.02187497168779373, "learning_rate": 9.032926563966781e-05, "loss": 0.0001, "step": 2094 }, { "epoch": 0.7107192811427118, "grad_norm": 0.003647587960585952, "learning_rate": 9.031049942752215e-05, "loss": 0.0, "step": 2096 }, { "epoch": 0.7113974483957106, "grad_norm": 0.03892146795988083, "learning_rate": 9.029171697831214e-05, "loss": 0.0002, "step": 2098 }, { "epoch": 0.7120756156487094, "grad_norm": 0.00161711813416332, "learning_rate": 9.027291829960334e-05, "loss": 0.0, "step": 2100 }, { "epoch": 0.7127537829017081, "grad_norm": 0.06189465895295143, "learning_rate": 9.025410339896788e-05, "loss": 0.0008, "step": 2102 }, { "epoch": 0.713431950154707, "grad_norm": 0.001967528834939003, "learning_rate": 9.023527228398439e-05, "loss": 0.0005, "step": 2104 }, { "epoch": 0.7141101174077057, "grad_norm": 0.04755755886435509, "learning_rate": 9.021642496223801e-05, "loss": 0.0005, "step": 2106 }, { "epoch": 0.7147882846607044, "grad_norm": 0.04014548659324646, "learning_rate": 9.019756144132048e-05, "loss": 0.0001, "step": 2108 }, { "epoch": 0.7154664519137032, "grad_norm": 0.03437534719705582, "learning_rate": 9.017868172882999e-05, "loss": 0.0001, "step": 2110 }, { "epoch": 0.716144619166702, "grad_norm": 0.06222149357199669, "learning_rate": 9.015978583237132e-05, "loss": 0.0004, "step": 2112 }, { "epoch": 0.7168227864197008, "grad_norm": 0.007575817871838808, "learning_rate": 9.014087375955573e-05, "loss": 0.0001, "step": 2114 }, { "epoch": 0.7175009536726995, "grad_norm": 0.03959363326430321, "learning_rate": 9.012194551800098e-05, "loss": 0.0001, "step": 2116 }, { "epoch": 0.7181791209256984, "grad_norm": 0.024278750643134117, "learning_rate": 9.010300111533138e-05, "loss": 0.0001, "step": 2118 }, { "epoch": 0.7188572881786971, "grad_norm": 0.00355353276245296, "learning_rate": 9.008404055917772e-05, "loss": 0.0001, "step": 2120 }, { "epoch": 0.7195354554316958, "grad_norm": 0.06160525605082512, "learning_rate": 9.00650638571773e-05, "loss": 0.0001, "step": 2122 }, { "epoch": 0.7202136226846946, "grad_norm": 0.006701144389808178, "learning_rate": 9.004607101697397e-05, "loss": 0.0, "step": 2124 }, { "epoch": 0.7208917899376934, "grad_norm": 0.004692121408879757, "learning_rate": 9.002706204621803e-05, "loss": 0.0001, "step": 2126 }, { "epoch": 0.7215699571906922, "grad_norm": 0.02521413005888462, "learning_rate": 9.000803695256627e-05, "loss": 0.0003, "step": 2128 }, { "epoch": 0.7222481244436909, "grad_norm": 0.021716909483075142, "learning_rate": 8.998899574368201e-05, "loss": 0.0001, "step": 2130 }, { "epoch": 0.7229262916966896, "grad_norm": 0.03598916158080101, "learning_rate": 8.996993842723504e-05, "loss": 0.0001, "step": 2132 }, { "epoch": 0.7236044589496885, "grad_norm": 0.003856200259178877, "learning_rate": 8.995086501090167e-05, "loss": 0.0, "step": 2134 }, { "epoch": 0.7242826262026872, "grad_norm": 0.003834143513813615, "learning_rate": 8.993177550236464e-05, "loss": 0.0, "step": 2136 }, { "epoch": 0.724960793455686, "grad_norm": 0.008992083370685577, "learning_rate": 8.991266990931322e-05, "loss": 0.0, "step": 2138 }, { "epoch": 0.7256389607086848, "grad_norm": 0.01066268514841795, "learning_rate": 8.989354823944314e-05, "loss": 0.0001, "step": 2140 }, { "epoch": 0.7263171279616836, "grad_norm": 0.0006079597515054047, "learning_rate": 8.987441050045658e-05, "loss": 0.0, "step": 2142 }, { "epoch": 0.7269952952146823, "grad_norm": 0.007435145787894726, "learning_rate": 8.985525670006225e-05, "loss": 0.0, "step": 2144 }, { "epoch": 0.727673462467681, "grad_norm": 0.002100519370287657, "learning_rate": 8.983608684597529e-05, "loss": 0.0, "step": 2146 }, { "epoch": 0.7283516297206799, "grad_norm": 0.02002492919564247, "learning_rate": 8.98169009459173e-05, "loss": 0.0001, "step": 2148 }, { "epoch": 0.7290297969736786, "grad_norm": 0.0023629760835319757, "learning_rate": 8.979769900761639e-05, "loss": 0.0, "step": 2150 }, { "epoch": 0.7297079642266774, "grad_norm": 0.0003582996141631156, "learning_rate": 8.977848103880706e-05, "loss": 0.0, "step": 2152 }, { "epoch": 0.7303861314796761, "grad_norm": 0.0012692782329395413, "learning_rate": 8.975924704723033e-05, "loss": 0.0, "step": 2154 }, { "epoch": 0.731064298732675, "grad_norm": 0.0007864875951781869, "learning_rate": 8.973999704063365e-05, "loss": 0.0, "step": 2156 }, { "epoch": 0.7317424659856737, "grad_norm": 0.0011207296047359705, "learning_rate": 8.972073102677091e-05, "loss": 0.0, "step": 2158 }, { "epoch": 0.7324206332386725, "grad_norm": 0.002925334731116891, "learning_rate": 8.970144901340246e-05, "loss": 0.0, "step": 2160 }, { "epoch": 0.7330988004916713, "grad_norm": 0.0008847307763062418, "learning_rate": 8.96821510082951e-05, "loss": 0.0, "step": 2162 }, { "epoch": 0.73377696774467, "grad_norm": 0.023506417870521545, "learning_rate": 8.966283701922205e-05, "loss": 0.0, "step": 2164 }, { "epoch": 0.7344551349976688, "grad_norm": 0.004152826499193907, "learning_rate": 8.964350705396301e-05, "loss": 0.0001, "step": 2166 }, { "epoch": 0.7351333022506675, "grad_norm": 0.009032066911458969, "learning_rate": 8.962416112030405e-05, "loss": 0.0, "step": 2168 }, { "epoch": 0.7358114695036664, "grad_norm": 0.024347808212041855, "learning_rate": 8.960479922603775e-05, "loss": 0.0001, "step": 2170 }, { "epoch": 0.7364896367566651, "grad_norm": 0.012557287700474262, "learning_rate": 8.958542137896304e-05, "loss": 0.0001, "step": 2172 }, { "epoch": 0.7371678040096639, "grad_norm": 0.02936052344739437, "learning_rate": 8.956602758688535e-05, "loss": 0.0002, "step": 2174 }, { "epoch": 0.7378459712626626, "grad_norm": 0.0004298368585295975, "learning_rate": 8.954661785761647e-05, "loss": 0.0, "step": 2176 }, { "epoch": 0.7385241385156615, "grad_norm": 0.001203262945637107, "learning_rate": 8.952719219897464e-05, "loss": 0.0, "step": 2178 }, { "epoch": 0.7392023057686602, "grad_norm": 0.027022022753953934, "learning_rate": 8.950775061878453e-05, "loss": 0.0, "step": 2180 }, { "epoch": 0.7398804730216589, "grad_norm": 0.0010193714406341314, "learning_rate": 8.948829312487719e-05, "loss": 0.0, "step": 2182 }, { "epoch": 0.7405586402746578, "grad_norm": 0.006130346562713385, "learning_rate": 8.94688197250901e-05, "loss": 0.0, "step": 2184 }, { "epoch": 0.7412368075276565, "grad_norm": 0.0006499038427136838, "learning_rate": 8.944933042726714e-05, "loss": 0.0, "step": 2186 }, { "epoch": 0.7419149747806553, "grad_norm": 0.0354667603969574, "learning_rate": 8.94298252392586e-05, "loss": 0.0001, "step": 2188 }, { "epoch": 0.742593142033654, "grad_norm": 0.000631691247690469, "learning_rate": 8.941030416892118e-05, "loss": 0.0, "step": 2190 }, { "epoch": 0.7432713092866529, "grad_norm": 0.0023066874127835035, "learning_rate": 8.939076722411796e-05, "loss": 0.0, "step": 2192 }, { "epoch": 0.7439494765396516, "grad_norm": 0.011757615953683853, "learning_rate": 8.93712144127184e-05, "loss": 0.0, "step": 2194 }, { "epoch": 0.7446276437926503, "grad_norm": 0.03690662980079651, "learning_rate": 8.935164574259838e-05, "loss": 0.0002, "step": 2196 }, { "epoch": 0.7453058110456491, "grad_norm": 0.002006297232583165, "learning_rate": 8.933206122164018e-05, "loss": 0.0, "step": 2198 }, { "epoch": 0.7459839782986479, "grad_norm": 0.055943265557289124, "learning_rate": 8.931246085773239e-05, "loss": 0.0003, "step": 2200 }, { "epoch": 0.7466621455516467, "grad_norm": 0.00032859868952073157, "learning_rate": 8.92928446587701e-05, "loss": 0.0, "step": 2202 }, { "epoch": 0.7473403128046454, "grad_norm": 0.0006503578624688089, "learning_rate": 8.927321263265467e-05, "loss": 0.0, "step": 2204 }, { "epoch": 0.7480184800576443, "grad_norm": 0.01603405550122261, "learning_rate": 8.925356478729387e-05, "loss": 0.0001, "step": 2206 }, { "epoch": 0.748696647310643, "grad_norm": 0.000402370496885851, "learning_rate": 8.92339011306019e-05, "loss": 0.0, "step": 2208 }, { "epoch": 0.7493748145636417, "grad_norm": 0.0431690588593483, "learning_rate": 8.921422167049923e-05, "loss": 0.0003, "step": 2210 }, { "epoch": 0.7500529818166405, "grad_norm": 0.04571768641471863, "learning_rate": 8.919452641491276e-05, "loss": 0.0001, "step": 2212 }, { "epoch": 0.7507311490696393, "grad_norm": 0.0007486413232982159, "learning_rate": 8.917481537177575e-05, "loss": 0.0, "step": 2214 }, { "epoch": 0.7514093163226381, "grad_norm": 0.0011588893830776215, "learning_rate": 8.915508854902778e-05, "loss": 0.0, "step": 2216 }, { "epoch": 0.7520874835756368, "grad_norm": 0.04039638116955757, "learning_rate": 8.913534595461483e-05, "loss": 0.0002, "step": 2218 }, { "epoch": 0.7527656508286356, "grad_norm": 0.0011450482998043299, "learning_rate": 8.91155875964892e-05, "loss": 0.0, "step": 2220 }, { "epoch": 0.7534438180816344, "grad_norm": 0.02565939724445343, "learning_rate": 8.909581348260958e-05, "loss": 0.0001, "step": 2222 }, { "epoch": 0.7541219853346331, "grad_norm": 0.05398557707667351, "learning_rate": 8.907602362094094e-05, "loss": 0.0002, "step": 2224 }, { "epoch": 0.7548001525876319, "grad_norm": 0.01135649997740984, "learning_rate": 8.905621801945467e-05, "loss": 0.0001, "step": 2226 }, { "epoch": 0.7554783198406307, "grad_norm": 0.007088663522154093, "learning_rate": 8.903639668612846e-05, "loss": 0.0, "step": 2228 }, { "epoch": 0.7561564870936295, "grad_norm": 0.009923601523041725, "learning_rate": 8.901655962894632e-05, "loss": 0.0, "step": 2230 }, { "epoch": 0.7568346543466282, "grad_norm": 0.02772349677979946, "learning_rate": 8.899670685589864e-05, "loss": 0.0002, "step": 2232 }, { "epoch": 0.757512821599627, "grad_norm": 0.04577437788248062, "learning_rate": 8.89768383749821e-05, "loss": 0.0001, "step": 2234 }, { "epoch": 0.7581909888526258, "grad_norm": 0.0013328574132174253, "learning_rate": 8.895695419419972e-05, "loss": 0.0, "step": 2236 }, { "epoch": 0.7588691561056246, "grad_norm": 0.017390921711921692, "learning_rate": 8.893705432156086e-05, "loss": 0.0, "step": 2238 }, { "epoch": 0.7595473233586233, "grad_norm": 0.035108234733343124, "learning_rate": 8.891713876508116e-05, "loss": 0.0001, "step": 2240 }, { "epoch": 0.7602254906116221, "grad_norm": 0.016167139634490013, "learning_rate": 8.889720753278264e-05, "loss": 0.0, "step": 2242 }, { "epoch": 0.7609036578646209, "grad_norm": 0.03242606669664383, "learning_rate": 8.887726063269355e-05, "loss": 0.0001, "step": 2244 }, { "epoch": 0.7615818251176196, "grad_norm": 0.002253993647173047, "learning_rate": 8.885729807284856e-05, "loss": 0.0, "step": 2246 }, { "epoch": 0.7622599923706184, "grad_norm": 0.05065949261188507, "learning_rate": 8.883731986128853e-05, "loss": 0.0003, "step": 2248 }, { "epoch": 0.7629381596236172, "grad_norm": 0.0061168549582362175, "learning_rate": 8.88173260060607e-05, "loss": 0.0, "step": 2250 }, { "epoch": 0.763616326876616, "grad_norm": 0.00484465854242444, "learning_rate": 8.879731651521861e-05, "loss": 0.0, "step": 2252 }, { "epoch": 0.7642944941296147, "grad_norm": 0.01573886163532734, "learning_rate": 8.877729139682206e-05, "loss": 0.0002, "step": 2254 }, { "epoch": 0.7649726613826134, "grad_norm": 0.0024972499813884497, "learning_rate": 8.875725065893717e-05, "loss": 0.0003, "step": 2256 }, { "epoch": 0.7656508286356123, "grad_norm": 0.007368649821728468, "learning_rate": 8.873719430963636e-05, "loss": 0.0, "step": 2258 }, { "epoch": 0.766328995888611, "grad_norm": 0.05810406804084778, "learning_rate": 8.871712235699831e-05, "loss": 0.0002, "step": 2260 }, { "epoch": 0.7670071631416098, "grad_norm": 0.001753704040311277, "learning_rate": 8.869703480910801e-05, "loss": 0.0, "step": 2262 }, { "epoch": 0.7676853303946086, "grad_norm": 0.015127058140933514, "learning_rate": 8.867693167405671e-05, "loss": 0.0001, "step": 2264 }, { "epoch": 0.7683634976476074, "grad_norm": 0.007170891854912043, "learning_rate": 8.865681295994195e-05, "loss": 0.0, "step": 2266 }, { "epoch": 0.7690416649006061, "grad_norm": 0.0003231594746466726, "learning_rate": 8.863667867486756e-05, "loss": 0.0001, "step": 2268 }, { "epoch": 0.7697198321536048, "grad_norm": 0.03522728011012077, "learning_rate": 8.861652882694363e-05, "loss": 0.0001, "step": 2270 }, { "epoch": 0.7703979994066037, "grad_norm": 0.0019485610537230968, "learning_rate": 8.859636342428648e-05, "loss": 0.0, "step": 2272 }, { "epoch": 0.7710761666596024, "grad_norm": 0.003564967541024089, "learning_rate": 8.857618247501878e-05, "loss": 0.0, "step": 2274 }, { "epoch": 0.7717543339126012, "grad_norm": 0.016705702990293503, "learning_rate": 8.855598598726939e-05, "loss": 0.0, "step": 2276 }, { "epoch": 0.7724325011655999, "grad_norm": 0.00029819709016010165, "learning_rate": 8.853577396917345e-05, "loss": 0.0, "step": 2278 }, { "epoch": 0.7731106684185988, "grad_norm": 0.01925487443804741, "learning_rate": 8.851554642887237e-05, "loss": 0.0, "step": 2280 }, { "epoch": 0.7737888356715975, "grad_norm": 0.002167096361517906, "learning_rate": 8.849530337451378e-05, "loss": 0.0, "step": 2282 }, { "epoch": 0.7744670029245962, "grad_norm": 0.00036691053537651896, "learning_rate": 8.84750448142516e-05, "loss": 0.0, "step": 2284 }, { "epoch": 0.7751451701775951, "grad_norm": 0.0002451124310027808, "learning_rate": 8.845477075624598e-05, "loss": 0.0, "step": 2286 }, { "epoch": 0.7758233374305938, "grad_norm": 0.007234338205307722, "learning_rate": 8.843448120866329e-05, "loss": 0.0, "step": 2288 }, { "epoch": 0.7765015046835926, "grad_norm": 0.004028369206935167, "learning_rate": 8.841417617967618e-05, "loss": 0.0, "step": 2290 }, { "epoch": 0.7771796719365913, "grad_norm": 0.0018081561429426074, "learning_rate": 8.839385567746347e-05, "loss": 0.0, "step": 2292 }, { "epoch": 0.7778578391895902, "grad_norm": 0.0005776785546913743, "learning_rate": 8.837351971021031e-05, "loss": 0.0, "step": 2294 }, { "epoch": 0.7785360064425889, "grad_norm": 0.0026705998461693525, "learning_rate": 8.8353168286108e-05, "loss": 0.0, "step": 2296 }, { "epoch": 0.7792141736955877, "grad_norm": 0.003311643609777093, "learning_rate": 8.833280141335408e-05, "loss": 0.0001, "step": 2298 }, { "epoch": 0.7798923409485864, "grad_norm": 0.00011252472177147865, "learning_rate": 8.831241910015233e-05, "loss": 0.0, "step": 2300 }, { "epoch": 0.7805705082015852, "grad_norm": 0.0006050001829862595, "learning_rate": 8.829202135471276e-05, "loss": 0.0, "step": 2302 }, { "epoch": 0.781248675454584, "grad_norm": 0.0012319076340645552, "learning_rate": 8.827160818525156e-05, "loss": 0.0, "step": 2304 }, { "epoch": 0.7819268427075827, "grad_norm": 0.0023186788894236088, "learning_rate": 8.825117959999116e-05, "loss": 0.0, "step": 2306 }, { "epoch": 0.7826050099605816, "grad_norm": 0.028587566688656807, "learning_rate": 8.82307356071602e-05, "loss": 0.0001, "step": 2308 }, { "epoch": 0.7832831772135803, "grad_norm": 0.002290781820192933, "learning_rate": 8.82102762149935e-05, "loss": 0.0, "step": 2310 }, { "epoch": 0.7839613444665791, "grad_norm": 0.00015715346671640873, "learning_rate": 8.818980143173213e-05, "loss": 0.0, "step": 2312 }, { "epoch": 0.7846395117195778, "grad_norm": 0.001944783260114491, "learning_rate": 8.816931126562328e-05, "loss": 0.0, "step": 2314 }, { "epoch": 0.7853176789725766, "grad_norm": 0.000705886457581073, "learning_rate": 8.814880572492044e-05, "loss": 0.0, "step": 2316 }, { "epoch": 0.7859958462255754, "grad_norm": 0.0004583608824759722, "learning_rate": 8.81282848178832e-05, "loss": 0.0, "step": 2318 }, { "epoch": 0.7866740134785741, "grad_norm": 0.0001509250869276002, "learning_rate": 8.81077485527774e-05, "loss": 0.0, "step": 2320 }, { "epoch": 0.7873521807315729, "grad_norm": 0.0009164375951513648, "learning_rate": 8.808719693787504e-05, "loss": 0.0, "step": 2322 }, { "epoch": 0.7880303479845717, "grad_norm": 0.0007774062687531114, "learning_rate": 8.806662998145431e-05, "loss": 0.0, "step": 2324 }, { "epoch": 0.7887085152375705, "grad_norm": 0.004715454764664173, "learning_rate": 8.804604769179958e-05, "loss": 0.0, "step": 2326 }, { "epoch": 0.7893866824905692, "grad_norm": 0.00013452318671625108, "learning_rate": 8.802545007720138e-05, "loss": 0.0, "step": 2328 }, { "epoch": 0.790064849743568, "grad_norm": 0.00014649407239630818, "learning_rate": 8.800483714595644e-05, "loss": 0.0, "step": 2330 }, { "epoch": 0.7907430169965668, "grad_norm": 0.00020664343901444227, "learning_rate": 8.798420890636764e-05, "loss": 0.0, "step": 2332 }, { "epoch": 0.7914211842495655, "grad_norm": 0.010160190984606743, "learning_rate": 8.796356536674403e-05, "loss": 0.0, "step": 2334 }, { "epoch": 0.7920993515025643, "grad_norm": 9.14070478756912e-05, "learning_rate": 8.794290653540084e-05, "loss": 0.0, "step": 2336 }, { "epoch": 0.7927775187555631, "grad_norm": 9.601074270904064e-05, "learning_rate": 8.792223242065945e-05, "loss": 0.0, "step": 2338 }, { "epoch": 0.7934556860085619, "grad_norm": 0.0001925271499203518, "learning_rate": 8.790154303084736e-05, "loss": 0.0, "step": 2340 }, { "epoch": 0.7941338532615606, "grad_norm": 0.00016138785576913506, "learning_rate": 8.788083837429828e-05, "loss": 0.0, "step": 2342 }, { "epoch": 0.7948120205145595, "grad_norm": 0.0004923270898871124, "learning_rate": 8.786011845935202e-05, "loss": 0.0, "step": 2344 }, { "epoch": 0.7954901877675582, "grad_norm": 0.00026215388788841665, "learning_rate": 8.783938329435457e-05, "loss": 0.0, "step": 2346 }, { "epoch": 0.7961683550205569, "grad_norm": 0.0034639956429600716, "learning_rate": 8.781863288765806e-05, "loss": 0.0, "step": 2348 }, { "epoch": 0.7968465222735557, "grad_norm": 0.0001678073313087225, "learning_rate": 8.779786724762075e-05, "loss": 0.0, "step": 2350 }, { "epoch": 0.7975246895265545, "grad_norm": 0.00024357331858482212, "learning_rate": 8.777708638260702e-05, "loss": 0.0, "step": 2352 }, { "epoch": 0.7982028567795533, "grad_norm": 0.00016859491006471217, "learning_rate": 8.775629030098742e-05, "loss": 0.0, "step": 2354 }, { "epoch": 0.798881024032552, "grad_norm": 0.0007309844950214028, "learning_rate": 8.773547901113862e-05, "loss": 0.0, "step": 2356 }, { "epoch": 0.7995591912855508, "grad_norm": 0.00010276456305291504, "learning_rate": 8.771465252144335e-05, "loss": 0.0, "step": 2358 }, { "epoch": 0.8002373585385496, "grad_norm": 5.053769200458191e-05, "learning_rate": 8.769381084029058e-05, "loss": 0.0, "step": 2360 }, { "epoch": 0.8009155257915483, "grad_norm": 0.0001550625602249056, "learning_rate": 8.76729539760753e-05, "loss": 0.0, "step": 2362 }, { "epoch": 0.8015936930445471, "grad_norm": 0.00012546095240395516, "learning_rate": 8.765208193719866e-05, "loss": 0.0, "step": 2364 }, { "epoch": 0.8022718602975459, "grad_norm": 6.550251418957487e-05, "learning_rate": 8.763119473206794e-05, "loss": 0.0, "step": 2366 }, { "epoch": 0.8029500275505447, "grad_norm": 0.00018826685845851898, "learning_rate": 8.761029236909647e-05, "loss": 0.0, "step": 2368 }, { "epoch": 0.8036281948035434, "grad_norm": 0.0002414361370028928, "learning_rate": 8.758937485670373e-05, "loss": 0.0, "step": 2370 }, { "epoch": 0.8043063620565422, "grad_norm": 0.00019206578144803643, "learning_rate": 8.75684422033153e-05, "loss": 0.0, "step": 2372 }, { "epoch": 0.804984529309541, "grad_norm": 6.82855534250848e-05, "learning_rate": 8.754749441736283e-05, "loss": 0.0, "step": 2374 }, { "epoch": 0.8056626965625397, "grad_norm": 0.00027947468333877623, "learning_rate": 8.752653150728411e-05, "loss": 0.0, "step": 2376 }, { "epoch": 0.8063408638155385, "grad_norm": 0.0001643617288209498, "learning_rate": 8.750555348152298e-05, "loss": 0.0, "step": 2378 }, { "epoch": 0.8070190310685372, "grad_norm": 8.241987961810082e-05, "learning_rate": 8.74845603485294e-05, "loss": 0.0, "step": 2380 }, { "epoch": 0.8076971983215361, "grad_norm": 0.00038982118712738156, "learning_rate": 8.746355211675939e-05, "loss": 0.0, "step": 2382 }, { "epoch": 0.8083753655745348, "grad_norm": 0.02610670030117035, "learning_rate": 8.744252879467507e-05, "loss": 0.0001, "step": 2384 }, { "epoch": 0.8090535328275336, "grad_norm": 0.00022798529244028032, "learning_rate": 8.742149039074463e-05, "loss": 0.0, "step": 2386 }, { "epoch": 0.8097317000805324, "grad_norm": 5.434233753476292e-05, "learning_rate": 8.740043691344233e-05, "loss": 0.0, "step": 2388 }, { "epoch": 0.8104098673335312, "grad_norm": 8.579740824643523e-05, "learning_rate": 8.737936837124852e-05, "loss": 0.0, "step": 2390 }, { "epoch": 0.8110880345865299, "grad_norm": 0.0001528613647678867, "learning_rate": 8.73582847726496e-05, "loss": 0.0, "step": 2392 }, { "epoch": 0.8117662018395286, "grad_norm": 0.0012288985308259726, "learning_rate": 8.733718612613803e-05, "loss": 0.0, "step": 2394 }, { "epoch": 0.8124443690925275, "grad_norm": 4.78740876133088e-05, "learning_rate": 8.731607244021236e-05, "loss": 0.0, "step": 2396 }, { "epoch": 0.8131225363455262, "grad_norm": 0.021069234237074852, "learning_rate": 8.729494372337716e-05, "loss": 0.0001, "step": 2398 }, { "epoch": 0.813800703598525, "grad_norm": 0.00011475202336441725, "learning_rate": 8.727379998414311e-05, "loss": 0.0, "step": 2400 }, { "epoch": 0.8144788708515237, "grad_norm": 0.00019884259381797165, "learning_rate": 8.725264123102688e-05, "loss": 0.0, "step": 2402 }, { "epoch": 0.8151570381045226, "grad_norm": 0.00013354167458601296, "learning_rate": 8.723146747255122e-05, "loss": 0.0, "step": 2404 }, { "epoch": 0.8158352053575213, "grad_norm": 0.000227173266466707, "learning_rate": 8.72102787172449e-05, "loss": 0.0, "step": 2406 }, { "epoch": 0.81651337261052, "grad_norm": 7.385117351077497e-05, "learning_rate": 8.718907497364277e-05, "loss": 0.0, "step": 2408 }, { "epoch": 0.8171915398635189, "grad_norm": 0.002906026551499963, "learning_rate": 8.71678562502857e-05, "loss": 0.0, "step": 2410 }, { "epoch": 0.8178697071165176, "grad_norm": 4.0036276914179325e-05, "learning_rate": 8.714662255572055e-05, "loss": 0.0, "step": 2412 }, { "epoch": 0.8185478743695164, "grad_norm": 7.089857535902411e-05, "learning_rate": 8.712537389850031e-05, "loss": 0.0, "step": 2414 }, { "epoch": 0.8192260416225151, "grad_norm": 0.0007380903698503971, "learning_rate": 8.710411028718388e-05, "loss": 0.0, "step": 2416 }, { "epoch": 0.819904208875514, "grad_norm": 0.00014535282389260828, "learning_rate": 8.708283173033627e-05, "loss": 0.0, "step": 2418 }, { "epoch": 0.8205823761285127, "grad_norm": 0.0002633386757224798, "learning_rate": 8.706153823652848e-05, "loss": 0.0, "step": 2420 }, { "epoch": 0.8212605433815114, "grad_norm": 8.741030615055934e-05, "learning_rate": 8.70402298143375e-05, "loss": 0.0, "step": 2422 }, { "epoch": 0.8219387106345102, "grad_norm": 0.0004991906462237239, "learning_rate": 8.70189064723464e-05, "loss": 0.0, "step": 2424 }, { "epoch": 0.822616877887509, "grad_norm": 0.0008120696293190122, "learning_rate": 8.69975682191442e-05, "loss": 0.0, "step": 2426 }, { "epoch": 0.8232950451405078, "grad_norm": 8.671852992847562e-05, "learning_rate": 8.697621506332594e-05, "loss": 0.0, "step": 2428 }, { "epoch": 0.8239732123935065, "grad_norm": 0.0048635476268827915, "learning_rate": 8.695484701349268e-05, "loss": 0.0, "step": 2430 }, { "epoch": 0.8246513796465054, "grad_norm": 0.0019546092953532934, "learning_rate": 8.693346407825144e-05, "loss": 0.0, "step": 2432 }, { "epoch": 0.8253295468995041, "grad_norm": 0.0003734403580892831, "learning_rate": 8.69120662662153e-05, "loss": 0.0, "step": 2434 }, { "epoch": 0.8260077141525028, "grad_norm": 0.0005670825485140085, "learning_rate": 8.68906535860033e-05, "loss": 0.0, "step": 2436 }, { "epoch": 0.8266858814055016, "grad_norm": 0.00019534399325493723, "learning_rate": 8.686922604624044e-05, "loss": 0.0, "step": 2438 }, { "epoch": 0.8273640486585004, "grad_norm": 9.16475837584585e-05, "learning_rate": 8.684778365555773e-05, "loss": 0.0, "step": 2440 }, { "epoch": 0.8280422159114992, "grad_norm": 0.0014319182373583317, "learning_rate": 8.682632642259216e-05, "loss": 0.0, "step": 2442 }, { "epoch": 0.8287203831644979, "grad_norm": 6.968220259295776e-05, "learning_rate": 8.680485435598673e-05, "loss": 0.0, "step": 2444 }, { "epoch": 0.8293985504174967, "grad_norm": 0.00036705288221128285, "learning_rate": 8.678336746439035e-05, "loss": 0.0, "step": 2446 }, { "epoch": 0.8300767176704955, "grad_norm": 0.0001591446198290214, "learning_rate": 8.676186575645796e-05, "loss": 0.0, "step": 2448 }, { "epoch": 0.8307548849234943, "grad_norm": 7.104194810381159e-05, "learning_rate": 8.674034924085043e-05, "loss": 0.0, "step": 2450 }, { "epoch": 0.831433052176493, "grad_norm": 0.00011930055188713595, "learning_rate": 8.671881792623464e-05, "loss": 0.0, "step": 2452 }, { "epoch": 0.8321112194294918, "grad_norm": 0.0007769940420985222, "learning_rate": 8.669727182128338e-05, "loss": 0.0, "step": 2454 }, { "epoch": 0.8327893866824906, "grad_norm": 0.0024935437832027674, "learning_rate": 8.667571093467541e-05, "loss": 0.0, "step": 2456 }, { "epoch": 0.8334675539354893, "grad_norm": 4.6994613512651995e-05, "learning_rate": 8.665413527509546e-05, "loss": 0.0, "step": 2458 }, { "epoch": 0.8341457211884881, "grad_norm": 7.935589383123443e-05, "learning_rate": 8.66325448512342e-05, "loss": 0.0, "step": 2460 }, { "epoch": 0.8348238884414869, "grad_norm": 6.114977441029623e-05, "learning_rate": 8.661093967178826e-05, "loss": 0.0, "step": 2462 }, { "epoch": 0.8355020556944857, "grad_norm": 3.911663225153461e-05, "learning_rate": 8.65893197454602e-05, "loss": 0.0, "step": 2464 }, { "epoch": 0.8361802229474844, "grad_norm": 0.005718694068491459, "learning_rate": 8.656768508095853e-05, "loss": 0.0, "step": 2466 }, { "epoch": 0.8368583902004832, "grad_norm": 5.9984056861139834e-05, "learning_rate": 8.654603568699768e-05, "loss": 0.0, "step": 2468 }, { "epoch": 0.837536557453482, "grad_norm": 6.293792830547318e-05, "learning_rate": 8.652437157229801e-05, "loss": 0.0, "step": 2470 }, { "epoch": 0.8382147247064807, "grad_norm": 4.67793652205728e-05, "learning_rate": 8.650269274558585e-05, "loss": 0.0, "step": 2472 }, { "epoch": 0.8388928919594795, "grad_norm": 0.000718626833986491, "learning_rate": 8.648099921559342e-05, "loss": 0.0, "step": 2474 }, { "epoch": 0.8395710592124783, "grad_norm": 0.00555463507771492, "learning_rate": 8.645929099105887e-05, "loss": 0.0, "step": 2476 }, { "epoch": 0.8402492264654771, "grad_norm": 0.00010523942182771862, "learning_rate": 8.643756808072624e-05, "loss": 0.0, "step": 2478 }, { "epoch": 0.8409273937184758, "grad_norm": 4.4146174332126975e-05, "learning_rate": 8.641583049334558e-05, "loss": 0.0, "step": 2480 }, { "epoch": 0.8416055609714745, "grad_norm": 2.2396032363758422e-05, "learning_rate": 8.639407823767274e-05, "loss": 0.0, "step": 2482 }, { "epoch": 0.8422837282244734, "grad_norm": 5.793202217319049e-05, "learning_rate": 8.637231132246955e-05, "loss": 0.0, "step": 2484 }, { "epoch": 0.8429618954774721, "grad_norm": 0.0030191531404852867, "learning_rate": 8.635052975650369e-05, "loss": 0.0, "step": 2486 }, { "epoch": 0.8436400627304709, "grad_norm": 5.942771167610772e-05, "learning_rate": 8.63287335485488e-05, "loss": 0.0, "step": 2488 }, { "epoch": 0.8443182299834697, "grad_norm": 5.2272378525231034e-05, "learning_rate": 8.630692270738439e-05, "loss": 0.0, "step": 2490 }, { "epoch": 0.8449963972364685, "grad_norm": 3.476697384030558e-05, "learning_rate": 8.628509724179584e-05, "loss": 0.0, "step": 2492 }, { "epoch": 0.8456745644894672, "grad_norm": 8.026020805118605e-05, "learning_rate": 8.626325716057446e-05, "loss": 0.0, "step": 2494 }, { "epoch": 0.846352731742466, "grad_norm": 7.056519825709984e-05, "learning_rate": 8.624140247251743e-05, "loss": 0.0, "step": 2496 }, { "epoch": 0.8470308989954648, "grad_norm": 7.995586202014238e-05, "learning_rate": 8.621953318642784e-05, "loss": 0.0, "step": 2498 }, { "epoch": 0.8477090662484635, "grad_norm": 4.878836261923425e-05, "learning_rate": 8.619764931111459e-05, "loss": 0.0, "step": 2500 }, { "epoch": 0.8483872335014623, "grad_norm": 0.0002059117250610143, "learning_rate": 8.617575085539253e-05, "loss": 0.0, "step": 2502 }, { "epoch": 0.849065400754461, "grad_norm": 4.21931836172007e-05, "learning_rate": 8.615383782808237e-05, "loss": 0.0, "step": 2504 }, { "epoch": 0.8497435680074599, "grad_norm": 0.0018902408191934228, "learning_rate": 8.613191023801064e-05, "loss": 0.0, "step": 2506 }, { "epoch": 0.8504217352604586, "grad_norm": 5.137163680046797e-05, "learning_rate": 8.61099680940098e-05, "loss": 0.0, "step": 2508 }, { "epoch": 0.8510999025134574, "grad_norm": 0.0010492595611140132, "learning_rate": 8.608801140491811e-05, "loss": 0.0, "step": 2510 }, { "epoch": 0.8517780697664562, "grad_norm": 6.547579687321559e-05, "learning_rate": 8.606604017957976e-05, "loss": 0.0, "step": 2512 }, { "epoch": 0.8524562370194549, "grad_norm": 2.5358762286487035e-05, "learning_rate": 8.604405442684474e-05, "loss": 0.0, "step": 2514 }, { "epoch": 0.8531344042724537, "grad_norm": 5.517467434401624e-05, "learning_rate": 8.602205415556889e-05, "loss": 0.0, "step": 2516 }, { "epoch": 0.8538125715254524, "grad_norm": 4.9549442337593064e-05, "learning_rate": 8.600003937461394e-05, "loss": 0.0, "step": 2518 }, { "epoch": 0.8544907387784513, "grad_norm": 3.567721068975516e-05, "learning_rate": 8.597801009284744e-05, "loss": 0.0, "step": 2520 }, { "epoch": 0.85516890603145, "grad_norm": 3.9313017623499036e-05, "learning_rate": 8.595596631914277e-05, "loss": 0.0, "step": 2522 }, { "epoch": 0.8558470732844488, "grad_norm": 0.0009759738459251821, "learning_rate": 8.593390806237917e-05, "loss": 0.0, "step": 2524 }, { "epoch": 0.8565252405374475, "grad_norm": 0.00014349607226904482, "learning_rate": 8.591183533144171e-05, "loss": 0.0, "step": 2526 }, { "epoch": 0.8572034077904463, "grad_norm": 5.6128526921384037e-05, "learning_rate": 8.588974813522126e-05, "loss": 0.0, "step": 2528 }, { "epoch": 0.8578815750434451, "grad_norm": 5.782934749731794e-05, "learning_rate": 8.586764648261456e-05, "loss": 0.0, "step": 2530 }, { "epoch": 0.8585597422964438, "grad_norm": 0.00024813110940158367, "learning_rate": 8.584553038252414e-05, "loss": 0.0, "step": 2532 }, { "epoch": 0.8592379095494427, "grad_norm": 2.7292395316180773e-05, "learning_rate": 8.582339984385838e-05, "loss": 0.0, "step": 2534 }, { "epoch": 0.8599160768024414, "grad_norm": 3.258465585531667e-05, "learning_rate": 8.580125487553143e-05, "loss": 0.0, "step": 2536 }, { "epoch": 0.8605942440554402, "grad_norm": 1.458320184610784e-05, "learning_rate": 8.57790954864633e-05, "loss": 0.0, "step": 2538 }, { "epoch": 0.8612724113084389, "grad_norm": 0.00010439179459353909, "learning_rate": 8.575692168557981e-05, "loss": 0.0, "step": 2540 }, { "epoch": 0.8619505785614378, "grad_norm": 4.440178599907085e-05, "learning_rate": 8.573473348181251e-05, "loss": 0.0, "step": 2542 }, { "epoch": 0.8626287458144365, "grad_norm": 3.7284295103745535e-05, "learning_rate": 8.571253088409886e-05, "loss": 0.0, "step": 2544 }, { "epoch": 0.8633069130674352, "grad_norm": 3.342417403473519e-05, "learning_rate": 8.569031390138202e-05, "loss": 0.0, "step": 2546 }, { "epoch": 0.863985080320434, "grad_norm": 2.9887109121773392e-05, "learning_rate": 8.566808254261103e-05, "loss": 0.0, "step": 2548 }, { "epoch": 0.8646632475734328, "grad_norm": 4.787669240613468e-05, "learning_rate": 8.564583681674065e-05, "loss": 0.0, "step": 2550 }, { "epoch": 0.8653414148264316, "grad_norm": 5.066965240985155e-05, "learning_rate": 8.562357673273147e-05, "loss": 0.0, "step": 2552 }, { "epoch": 0.8660195820794303, "grad_norm": 0.000141487194923684, "learning_rate": 8.560130229954984e-05, "loss": 0.0, "step": 2554 }, { "epoch": 0.8666977493324292, "grad_norm": 4.295802136766724e-05, "learning_rate": 8.557901352616789e-05, "loss": 0.0, "step": 2556 }, { "epoch": 0.8673759165854279, "grad_norm": 2.949769805127289e-05, "learning_rate": 8.555671042156354e-05, "loss": 0.0, "step": 2558 }, { "epoch": 0.8680540838384266, "grad_norm": 3.286828359705396e-05, "learning_rate": 8.55343929947205e-05, "loss": 0.0, "step": 2560 }, { "epoch": 0.8687322510914254, "grad_norm": 0.00010639905667630956, "learning_rate": 8.55120612546282e-05, "loss": 0.0, "step": 2562 }, { "epoch": 0.8694104183444242, "grad_norm": 2.4564649720559828e-05, "learning_rate": 8.548971521028189e-05, "loss": 0.0, "step": 2564 }, { "epoch": 0.870088585597423, "grad_norm": 0.00031212970498017967, "learning_rate": 8.546735487068252e-05, "loss": 0.0, "step": 2566 }, { "epoch": 0.8707667528504217, "grad_norm": 0.00017252239922527224, "learning_rate": 8.544498024483687e-05, "loss": 0.0, "step": 2568 }, { "epoch": 0.8714449201034205, "grad_norm": 4.669900954468176e-05, "learning_rate": 8.542259134175739e-05, "loss": 0.0, "step": 2570 }, { "epoch": 0.8721230873564193, "grad_norm": 2.540825335017871e-05, "learning_rate": 8.540018817046238e-05, "loss": 0.0, "step": 2572 }, { "epoch": 0.872801254609418, "grad_norm": 3.5121825931128114e-05, "learning_rate": 8.53777707399758e-05, "loss": 0.0, "step": 2574 }, { "epoch": 0.8734794218624168, "grad_norm": 2.1930207367404364e-05, "learning_rate": 8.535533905932738e-05, "loss": 0.0, "step": 2576 }, { "epoch": 0.8741575891154156, "grad_norm": 3.102342816418968e-05, "learning_rate": 8.533289313755263e-05, "loss": 0.0, "step": 2578 }, { "epoch": 0.8748357563684144, "grad_norm": 9.593948198016733e-05, "learning_rate": 8.531043298369274e-05, "loss": 0.0, "step": 2580 }, { "epoch": 0.8755139236214131, "grad_norm": 0.0001077059205272235, "learning_rate": 8.528795860679468e-05, "loss": 0.0, "step": 2582 }, { "epoch": 0.8761920908744119, "grad_norm": 5.816249904455617e-05, "learning_rate": 8.52654700159111e-05, "loss": 0.0, "step": 2584 }, { "epoch": 0.8768702581274107, "grad_norm": 2.9580603950307705e-05, "learning_rate": 8.524296722010042e-05, "loss": 0.0, "step": 2586 }, { "epoch": 0.8775484253804094, "grad_norm": 0.001621534931473434, "learning_rate": 8.522045022842677e-05, "loss": 0.0, "step": 2588 }, { "epoch": 0.8782265926334082, "grad_norm": 2.7219555704505183e-05, "learning_rate": 8.519791904995996e-05, "loss": 0.0, "step": 2590 }, { "epoch": 0.878904759886407, "grad_norm": 3.0587609217036515e-05, "learning_rate": 8.517537369377557e-05, "loss": 0.0, "step": 2592 }, { "epoch": 0.8795829271394058, "grad_norm": 3.34189462591894e-05, "learning_rate": 8.515281416895488e-05, "loss": 0.0, "step": 2594 }, { "epoch": 0.8802610943924045, "grad_norm": 3.841116631519981e-05, "learning_rate": 8.513024048458485e-05, "loss": 0.0, "step": 2596 }, { "epoch": 0.8809392616454033, "grad_norm": 4.351099778432399e-05, "learning_rate": 8.510765264975813e-05, "loss": 0.0, "step": 2598 }, { "epoch": 0.8816174288984021, "grad_norm": 6.0994632804067805e-05, "learning_rate": 8.508505067357313e-05, "loss": 0.0, "step": 2600 }, { "epoch": 0.8822955961514009, "grad_norm": 2.2559877834282815e-05, "learning_rate": 8.506243456513391e-05, "loss": 0.0, "step": 2602 }, { "epoch": 0.8829737634043996, "grad_norm": 3.4261829569004476e-05, "learning_rate": 8.503980433355024e-05, "loss": 0.0, "step": 2604 }, { "epoch": 0.8836519306573983, "grad_norm": 2.5804607503232546e-05, "learning_rate": 8.501715998793757e-05, "loss": 0.0, "step": 2606 }, { "epoch": 0.8843300979103972, "grad_norm": 3.149559779558331e-05, "learning_rate": 8.499450153741706e-05, "loss": 0.0, "step": 2608 }, { "epoch": 0.8850082651633959, "grad_norm": 5.7846591516863555e-05, "learning_rate": 8.497182899111551e-05, "loss": 0.0, "step": 2610 }, { "epoch": 0.8856864324163947, "grad_norm": 9.873088129097596e-05, "learning_rate": 8.494914235816542e-05, "loss": 0.0, "step": 2612 }, { "epoch": 0.8863645996693935, "grad_norm": 3.39953439834062e-05, "learning_rate": 8.492644164770499e-05, "loss": 0.0, "step": 2614 }, { "epoch": 0.8870427669223923, "grad_norm": 3.4924494684673846e-05, "learning_rate": 8.490372686887802e-05, "loss": 0.0, "step": 2616 }, { "epoch": 0.887720934175391, "grad_norm": 5.127754411660135e-05, "learning_rate": 8.488099803083404e-05, "loss": 0.0, "step": 2618 }, { "epoch": 0.8883991014283897, "grad_norm": 0.00016556790797039866, "learning_rate": 8.485825514272824e-05, "loss": 0.0, "step": 2620 }, { "epoch": 0.8890772686813886, "grad_norm": 3.2666204788256437e-05, "learning_rate": 8.483549821372141e-05, "loss": 0.0, "step": 2622 }, { "epoch": 0.8897554359343873, "grad_norm": 3.055952402064577e-05, "learning_rate": 8.481272725298009e-05, "loss": 0.0, "step": 2624 }, { "epoch": 0.8904336031873861, "grad_norm": 0.0005399963119998574, "learning_rate": 8.478994226967638e-05, "loss": 0.0, "step": 2626 }, { "epoch": 0.8911117704403848, "grad_norm": 0.0007298051496036351, "learning_rate": 8.476714327298809e-05, "loss": 0.0, "step": 2628 }, { "epoch": 0.8917899376933837, "grad_norm": 6.035435217199847e-05, "learning_rate": 8.474433027209866e-05, "loss": 0.0, "step": 2630 }, { "epoch": 0.8924681049463824, "grad_norm": 0.00013174138439353555, "learning_rate": 8.472150327619714e-05, "loss": 0.0, "step": 2632 }, { "epoch": 0.8931462721993811, "grad_norm": 2.576379120000638e-05, "learning_rate": 8.469866229447825e-05, "loss": 0.0, "step": 2634 }, { "epoch": 0.89382443945238, "grad_norm": 0.007593007758259773, "learning_rate": 8.467580733614233e-05, "loss": 0.0, "step": 2636 }, { "epoch": 0.8945026067053787, "grad_norm": 3.41287195624318e-05, "learning_rate": 8.465293841039539e-05, "loss": 0.0, "step": 2638 }, { "epoch": 0.8951807739583775, "grad_norm": 5.675890133716166e-05, "learning_rate": 8.463005552644898e-05, "loss": 0.0, "step": 2640 }, { "epoch": 0.8958589412113762, "grad_norm": 0.00010869366087717935, "learning_rate": 8.460715869352035e-05, "loss": 0.0, "step": 2642 }, { "epoch": 0.8965371084643751, "grad_norm": 0.00029541790718212724, "learning_rate": 8.458424792083233e-05, "loss": 0.0, "step": 2644 }, { "epoch": 0.8972152757173738, "grad_norm": 0.00016577077622059733, "learning_rate": 8.456132321761339e-05, "loss": 0.0, "step": 2646 }, { "epoch": 0.8978934429703725, "grad_norm": 7.052655564621091e-05, "learning_rate": 8.45383845930976e-05, "loss": 0.0, "step": 2648 }, { "epoch": 0.8985716102233713, "grad_norm": 8.335147867910564e-05, "learning_rate": 8.451543205652462e-05, "loss": 0.0, "step": 2650 }, { "epoch": 0.8992497774763701, "grad_norm": 4.06303079216741e-05, "learning_rate": 8.449246561713972e-05, "loss": 0.0, "step": 2652 }, { "epoch": 0.8999279447293689, "grad_norm": 3.0850900657242164e-05, "learning_rate": 8.44694852841938e-05, "loss": 0.0, "step": 2654 }, { "epoch": 0.9006061119823676, "grad_norm": 9.577847959008068e-05, "learning_rate": 8.444649106694335e-05, "loss": 0.0, "step": 2656 }, { "epoch": 0.9012842792353665, "grad_norm": 0.0001490665745222941, "learning_rate": 8.442348297465039e-05, "loss": 0.0, "step": 2658 }, { "epoch": 0.9019624464883652, "grad_norm": 3.8870639400556684e-05, "learning_rate": 8.440046101658263e-05, "loss": 0.0, "step": 2660 }, { "epoch": 0.902640613741364, "grad_norm": 2.3244661861099303e-05, "learning_rate": 8.437742520201327e-05, "loss": 0.0, "step": 2662 }, { "epoch": 0.9033187809943627, "grad_norm": 3.095623105764389e-05, "learning_rate": 8.435437554022115e-05, "loss": 0.0, "step": 2664 }, { "epoch": 0.9039969482473615, "grad_norm": 2.8265672881389037e-05, "learning_rate": 8.433131204049067e-05, "loss": 0.0, "step": 2666 }, { "epoch": 0.9046751155003603, "grad_norm": 0.000637600664049387, "learning_rate": 8.430823471211183e-05, "loss": 0.0, "step": 2668 }, { "epoch": 0.905353282753359, "grad_norm": 3.2973053748719394e-05, "learning_rate": 8.428514356438012e-05, "loss": 0.0, "step": 2670 }, { "epoch": 0.9060314500063578, "grad_norm": 3.9376176573568955e-05, "learning_rate": 8.42620386065967e-05, "loss": 0.0, "step": 2672 }, { "epoch": 0.9067096172593566, "grad_norm": 1.6717647667974234e-05, "learning_rate": 8.423891984806821e-05, "loss": 0.0, "step": 2674 }, { "epoch": 0.9073877845123554, "grad_norm": 0.0008120017591863871, "learning_rate": 8.421578729810692e-05, "loss": 0.0, "step": 2676 }, { "epoch": 0.9080659517653541, "grad_norm": 3.816479875240475e-05, "learning_rate": 8.419264096603059e-05, "loss": 0.0, "step": 2678 }, { "epoch": 0.908744119018353, "grad_norm": 0.023744838312268257, "learning_rate": 8.416948086116256e-05, "loss": 0.0, "step": 2680 }, { "epoch": 0.9094222862713517, "grad_norm": 2.676597432582639e-05, "learning_rate": 8.414630699283174e-05, "loss": 0.0, "step": 2682 }, { "epoch": 0.9101004535243504, "grad_norm": 0.00024049430794548243, "learning_rate": 8.412311937037254e-05, "loss": 0.0, "step": 2684 }, { "epoch": 0.9107786207773492, "grad_norm": 3.337972520967014e-05, "learning_rate": 8.409991800312493e-05, "loss": 0.0, "step": 2686 }, { "epoch": 0.911456788030348, "grad_norm": 3.9082591683836654e-05, "learning_rate": 8.407670290043443e-05, "loss": 0.0, "step": 2688 }, { "epoch": 0.9121349552833468, "grad_norm": 0.03246762230992317, "learning_rate": 8.405347407165208e-05, "loss": 0.0, "step": 2690 }, { "epoch": 0.9128131225363455, "grad_norm": 7.276963879121467e-05, "learning_rate": 8.403023152613445e-05, "loss": 0.0, "step": 2692 }, { "epoch": 0.9134912897893444, "grad_norm": 0.00017264233611058444, "learning_rate": 8.400697527324362e-05, "loss": 0.0, "step": 2694 }, { "epoch": 0.9141694570423431, "grad_norm": 2.2921736672287807e-05, "learning_rate": 8.398370532234722e-05, "loss": 0.0, "step": 2696 }, { "epoch": 0.9148476242953418, "grad_norm": 2.179672992497217e-05, "learning_rate": 8.396042168281839e-05, "loss": 0.0, "step": 2698 }, { "epoch": 0.9155257915483406, "grad_norm": 2.780408067337703e-05, "learning_rate": 8.393712436403576e-05, "loss": 0.0, "step": 2700 }, { "epoch": 0.9162039588013394, "grad_norm": 0.0007328210631385446, "learning_rate": 8.391381337538349e-05, "loss": 0.0, "step": 2702 }, { "epoch": 0.9168821260543382, "grad_norm": 0.0016520414501428604, "learning_rate": 8.389048872625127e-05, "loss": 0.0, "step": 2704 }, { "epoch": 0.9175602933073369, "grad_norm": 0.07671560347080231, "learning_rate": 8.386715042603423e-05, "loss": 0.0001, "step": 2706 }, { "epoch": 0.9182384605603356, "grad_norm": 0.04780285805463791, "learning_rate": 8.384379848413304e-05, "loss": 0.0003, "step": 2708 }, { "epoch": 0.9189166278133345, "grad_norm": 0.000507183896843344, "learning_rate": 8.382043290995388e-05, "loss": 0.0, "step": 2710 }, { "epoch": 0.9195947950663332, "grad_norm": 0.004272800404578447, "learning_rate": 8.379705371290839e-05, "loss": 0.0, "step": 2712 }, { "epoch": 0.920272962319332, "grad_norm": 0.032198842614889145, "learning_rate": 8.37736609024137e-05, "loss": 0.0, "step": 2714 }, { "epoch": 0.9209511295723308, "grad_norm": 0.0008604843751527369, "learning_rate": 8.375025448789242e-05, "loss": 0.0001, "step": 2716 }, { "epoch": 0.9216292968253296, "grad_norm": 0.00031635569757781923, "learning_rate": 8.37268344787727e-05, "loss": 0.0001, "step": 2718 }, { "epoch": 0.9223074640783283, "grad_norm": 0.08091209083795547, "learning_rate": 8.370340088448808e-05, "loss": 0.0001, "step": 2720 }, { "epoch": 0.922985631331327, "grad_norm": 0.0015913655515760183, "learning_rate": 8.367995371447759e-05, "loss": 0.0, "step": 2722 }, { "epoch": 0.9236637985843259, "grad_norm": 0.056560616940259933, "learning_rate": 8.365649297818578e-05, "loss": 0.0001, "step": 2724 }, { "epoch": 0.9243419658373246, "grad_norm": 0.049934688955545425, "learning_rate": 8.363301868506264e-05, "loss": 0.0002, "step": 2726 }, { "epoch": 0.9250201330903234, "grad_norm": 0.011096746660768986, "learning_rate": 8.360953084456358e-05, "loss": 0.0, "step": 2728 }, { "epoch": 0.9256983003433221, "grad_norm": 0.0012205281527712941, "learning_rate": 8.358602946614951e-05, "loss": 0.0, "step": 2730 }, { "epoch": 0.926376467596321, "grad_norm": 0.0005476016085594893, "learning_rate": 8.35625145592868e-05, "loss": 0.0, "step": 2732 }, { "epoch": 0.9270546348493197, "grad_norm": 0.06708763539791107, "learning_rate": 8.353898613344724e-05, "loss": 0.0004, "step": 2734 }, { "epoch": 0.9277328021023185, "grad_norm": 0.000514314800966531, "learning_rate": 8.351544419810807e-05, "loss": 0.0, "step": 2736 }, { "epoch": 0.9284109693553173, "grad_norm": 0.00016134425823111087, "learning_rate": 8.3491888762752e-05, "loss": 0.0002, "step": 2738 }, { "epoch": 0.929089136608316, "grad_norm": 0.0014166885521262884, "learning_rate": 8.346831983686711e-05, "loss": 0.0003, "step": 2740 }, { "epoch": 0.9297673038613148, "grad_norm": 0.018221652135252953, "learning_rate": 8.344473742994703e-05, "loss": 0.0, "step": 2742 }, { "epoch": 0.9304454711143135, "grad_norm": 0.016289401799440384, "learning_rate": 8.342114155149069e-05, "loss": 0.0002, "step": 2744 }, { "epoch": 0.9311236383673124, "grad_norm": 0.0034765347372740507, "learning_rate": 8.339753221100252e-05, "loss": 0.0, "step": 2746 }, { "epoch": 0.9318018056203111, "grad_norm": 0.004045094829052687, "learning_rate": 8.337390941799239e-05, "loss": 0.0002, "step": 2748 }, { "epoch": 0.9324799728733099, "grad_norm": 0.00048109699855558574, "learning_rate": 8.335027318197554e-05, "loss": 0.0, "step": 2750 }, { "epoch": 0.9331581401263086, "grad_norm": 0.026088694110512733, "learning_rate": 8.332662351247262e-05, "loss": 0.0, "step": 2752 }, { "epoch": 0.9338363073793075, "grad_norm": 0.08691231906414032, "learning_rate": 8.330296041900975e-05, "loss": 0.0003, "step": 2754 }, { "epoch": 0.9345144746323062, "grad_norm": 0.0608268566429615, "learning_rate": 8.327928391111841e-05, "loss": 0.0002, "step": 2756 }, { "epoch": 0.9351926418853049, "grad_norm": 0.015854302793741226, "learning_rate": 8.325559399833548e-05, "loss": 0.0002, "step": 2758 }, { "epoch": 0.9358708091383038, "grad_norm": 0.022921759635210037, "learning_rate": 8.323189069020328e-05, "loss": 0.0003, "step": 2760 }, { "epoch": 0.9365489763913025, "grad_norm": 0.02427630126476288, "learning_rate": 8.320817399626945e-05, "loss": 0.0001, "step": 2762 }, { "epoch": 0.9372271436443013, "grad_norm": 0.01896664686501026, "learning_rate": 8.318444392608713e-05, "loss": 0.0001, "step": 2764 }, { "epoch": 0.9379053108973, "grad_norm": 0.036994654685258865, "learning_rate": 8.316070048921477e-05, "loss": 0.0004, "step": 2766 }, { "epoch": 0.9385834781502989, "grad_norm": 0.02746070735156536, "learning_rate": 8.313694369521621e-05, "loss": 0.0002, "step": 2768 }, { "epoch": 0.9392616454032976, "grad_norm": 0.050837792456150055, "learning_rate": 8.311317355366069e-05, "loss": 0.0003, "step": 2770 }, { "epoch": 0.9399398126562963, "grad_norm": 0.004767420701682568, "learning_rate": 8.308939007412281e-05, "loss": 0.0001, "step": 2772 }, { "epoch": 0.9406179799092951, "grad_norm": 0.030202515423297882, "learning_rate": 8.306559326618259e-05, "loss": 0.0001, "step": 2774 }, { "epoch": 0.9412961471622939, "grad_norm": 0.0155405905097723, "learning_rate": 8.304178313942536e-05, "loss": 0.0, "step": 2776 }, { "epoch": 0.9419743144152927, "grad_norm": 0.06447181850671768, "learning_rate": 8.301795970344182e-05, "loss": 0.0001, "step": 2778 }, { "epoch": 0.9426524816682914, "grad_norm": 0.0006567398668266833, "learning_rate": 8.299412296782804e-05, "loss": 0.0, "step": 2780 }, { "epoch": 0.9433306489212903, "grad_norm": 0.0004741934244520962, "learning_rate": 8.297027294218551e-05, "loss": 0.0001, "step": 2782 }, { "epoch": 0.944008816174289, "grad_norm": 0.044876668602228165, "learning_rate": 8.294640963612096e-05, "loss": 0.0004, "step": 2784 }, { "epoch": 0.9446869834272877, "grad_norm": 0.03857933729887009, "learning_rate": 8.292253305924655e-05, "loss": 0.0008, "step": 2786 }, { "epoch": 0.9453651506802865, "grad_norm": 0.0046510398387908936, "learning_rate": 8.289864322117977e-05, "loss": 0.0, "step": 2788 }, { "epoch": 0.9460433179332853, "grad_norm": 0.09152370691299438, "learning_rate": 8.287474013154343e-05, "loss": 0.0002, "step": 2790 }, { "epoch": 0.9467214851862841, "grad_norm": 0.009585980325937271, "learning_rate": 8.285082379996571e-05, "loss": 0.0001, "step": 2792 }, { "epoch": 0.9473996524392828, "grad_norm": 0.008876519277691841, "learning_rate": 8.282689423608008e-05, "loss": 0.0001, "step": 2794 }, { "epoch": 0.9480778196922816, "grad_norm": 0.008391699753701687, "learning_rate": 8.280295144952536e-05, "loss": 0.0, "step": 2796 }, { "epoch": 0.9487559869452804, "grad_norm": 0.0072625367902219296, "learning_rate": 8.277899544994575e-05, "loss": 0.0, "step": 2798 }, { "epoch": 0.9494341541982791, "grad_norm": 0.09617011994123459, "learning_rate": 8.275502624699067e-05, "loss": 0.0003, "step": 2800 }, { "epoch": 0.9501123214512779, "grad_norm": 0.0077019380405545235, "learning_rate": 8.273104385031494e-05, "loss": 0.0, "step": 2802 }, { "epoch": 0.9507904887042767, "grad_norm": 0.012218840420246124, "learning_rate": 8.270704826957868e-05, "loss": 0.0005, "step": 2804 }, { "epoch": 0.9514686559572755, "grad_norm": 0.03991170600056648, "learning_rate": 8.268303951444726e-05, "loss": 0.0005, "step": 2806 }, { "epoch": 0.9521468232102742, "grad_norm": 0.038647279143333435, "learning_rate": 8.265901759459144e-05, "loss": 0.0002, "step": 2808 }, { "epoch": 0.952824990463273, "grad_norm": 0.040331657975912094, "learning_rate": 8.263498251968724e-05, "loss": 0.0008, "step": 2810 }, { "epoch": 0.9535031577162718, "grad_norm": 0.07936710119247437, "learning_rate": 8.2610934299416e-05, "loss": 0.0003, "step": 2812 }, { "epoch": 0.9541813249692706, "grad_norm": 0.010408652015030384, "learning_rate": 8.25868729434643e-05, "loss": 0.0002, "step": 2814 }, { "epoch": 0.9548594922222693, "grad_norm": 0.088149294257164, "learning_rate": 8.25627984615241e-05, "loss": 0.0005, "step": 2816 }, { "epoch": 0.9555376594752681, "grad_norm": 0.09806723147630692, "learning_rate": 8.253871086329255e-05, "loss": 0.0006, "step": 2818 }, { "epoch": 0.9562158267282669, "grad_norm": 0.11084140837192535, "learning_rate": 8.251461015847217e-05, "loss": 0.0013, "step": 2820 }, { "epoch": 0.9568939939812656, "grad_norm": 0.28381043672561646, "learning_rate": 8.24904963567707e-05, "loss": 0.0013, "step": 2822 }, { "epoch": 0.9575721612342644, "grad_norm": 0.9102690815925598, "learning_rate": 8.246636946790119e-05, "loss": 0.0133, "step": 2824 }, { "epoch": 0.9582503284872632, "grad_norm": 0.6345599293708801, "learning_rate": 8.244222950158193e-05, "loss": 0.0126, "step": 2826 }, { "epoch": 0.958928495740262, "grad_norm": 0.11776126176118851, "learning_rate": 8.241807646753652e-05, "loss": 0.0032, "step": 2828 }, { "epoch": 0.9596066629932607, "grad_norm": 0.11438094079494476, "learning_rate": 8.239391037549378e-05, "loss": 0.0039, "step": 2830 }, { "epoch": 0.9602848302462594, "grad_norm": 0.11632762104272842, "learning_rate": 8.236973123518781e-05, "loss": 0.0018, "step": 2832 }, { "epoch": 0.9609629974992583, "grad_norm": 0.04498480632901192, "learning_rate": 8.234553905635797e-05, "loss": 0.0015, "step": 2834 }, { "epoch": 0.961641164752257, "grad_norm": 0.09208396822214127, "learning_rate": 8.232133384874887e-05, "loss": 0.0022, "step": 2836 }, { "epoch": 0.9623193320052558, "grad_norm": 0.08108418434858322, "learning_rate": 8.229711562211036e-05, "loss": 0.002, "step": 2838 }, { "epoch": 0.9629974992582546, "grad_norm": 0.1276485174894333, "learning_rate": 8.227288438619754e-05, "loss": 0.0015, "step": 2840 }, { "epoch": 0.9636756665112534, "grad_norm": 0.058672089129686356, "learning_rate": 8.224864015077073e-05, "loss": 0.0018, "step": 2842 }, { "epoch": 0.9643538337642521, "grad_norm": 0.0685403048992157, "learning_rate": 8.222438292559556e-05, "loss": 0.0016, "step": 2844 }, { "epoch": 0.9650320010172508, "grad_norm": 0.06782464683055878, "learning_rate": 8.220011272044277e-05, "loss": 0.0009, "step": 2846 }, { "epoch": 0.9657101682702497, "grad_norm": 0.038573555648326874, "learning_rate": 8.217582954508844e-05, "loss": 0.0008, "step": 2848 }, { "epoch": 0.9663883355232484, "grad_norm": 0.04300495982170105, "learning_rate": 8.215153340931382e-05, "loss": 0.0012, "step": 2850 }, { "epoch": 0.9670665027762472, "grad_norm": 0.03777950629591942, "learning_rate": 8.212722432290538e-05, "loss": 0.0016, "step": 2852 }, { "epoch": 0.9677446700292459, "grad_norm": 0.2890669107437134, "learning_rate": 8.210290229565483e-05, "loss": 0.0028, "step": 2854 }, { "epoch": 0.9684228372822448, "grad_norm": 0.21739627420902252, "learning_rate": 8.207856733735908e-05, "loss": 0.0035, "step": 2856 }, { "epoch": 0.9691010045352435, "grad_norm": 0.14371730387210846, "learning_rate": 8.205421945782024e-05, "loss": 0.0018, "step": 2858 }, { "epoch": 0.9697791717882422, "grad_norm": 0.12269674241542816, "learning_rate": 8.202985866684563e-05, "loss": 0.0027, "step": 2860 }, { "epoch": 0.9704573390412411, "grad_norm": 0.06122859939932823, "learning_rate": 8.200548497424778e-05, "loss": 0.0011, "step": 2862 }, { "epoch": 0.9711355062942398, "grad_norm": 0.09474431723356247, "learning_rate": 8.198109838984441e-05, "loss": 0.0014, "step": 2864 }, { "epoch": 0.9718136735472386, "grad_norm": 0.1314152330160141, "learning_rate": 8.195669892345843e-05, "loss": 0.001, "step": 2866 }, { "epoch": 0.9724918408002373, "grad_norm": 0.22767437994480133, "learning_rate": 8.193228658491795e-05, "loss": 0.001, "step": 2868 }, { "epoch": 0.9731700080532362, "grad_norm": 0.07902339100837708, "learning_rate": 8.190786138405626e-05, "loss": 0.0009, "step": 2870 }, { "epoch": 0.9738481753062349, "grad_norm": 0.10243401676416397, "learning_rate": 8.188342333071181e-05, "loss": 0.0023, "step": 2872 }, { "epoch": 0.9745263425592336, "grad_norm": 0.0833156555891037, "learning_rate": 8.185897243472826e-05, "loss": 0.0013, "step": 2874 }, { "epoch": 0.9752045098122324, "grad_norm": 0.02530244179069996, "learning_rate": 8.183450870595441e-05, "loss": 0.0008, "step": 2876 }, { "epoch": 0.9758826770652312, "grad_norm": 0.06331485509872437, "learning_rate": 8.181003215424428e-05, "loss": 0.0011, "step": 2878 }, { "epoch": 0.97656084431823, "grad_norm": 0.05141822248697281, "learning_rate": 8.178554278945701e-05, "loss": 0.0004, "step": 2880 }, { "epoch": 0.9772390115712287, "grad_norm": 0.04588576778769493, "learning_rate": 8.176104062145687e-05, "loss": 0.0004, "step": 2882 }, { "epoch": 0.9779171788242276, "grad_norm": 0.06578666716814041, "learning_rate": 8.173652566011338e-05, "loss": 0.0015, "step": 2884 }, { "epoch": 0.9785953460772263, "grad_norm": 0.08554302155971527, "learning_rate": 8.171199791530116e-05, "loss": 0.0011, "step": 2886 }, { "epoch": 0.979273513330225, "grad_norm": 0.07025856524705887, "learning_rate": 8.168745739689997e-05, "loss": 0.0016, "step": 2888 }, { "epoch": 0.9799516805832238, "grad_norm": 0.03825186938047409, "learning_rate": 8.166290411479473e-05, "loss": 0.0005, "step": 2890 }, { "epoch": 0.9806298478362226, "grad_norm": 0.08433175086975098, "learning_rate": 8.163833807887549e-05, "loss": 0.0018, "step": 2892 }, { "epoch": 0.9813080150892214, "grad_norm": 0.026424286887049675, "learning_rate": 8.161375929903746e-05, "loss": 0.0007, "step": 2894 }, { "epoch": 0.9819861823422201, "grad_norm": 0.06956103444099426, "learning_rate": 8.158916778518097e-05, "loss": 0.0003, "step": 2896 }, { "epoch": 0.9826643495952189, "grad_norm": 0.040197454392910004, "learning_rate": 8.156456354721147e-05, "loss": 0.0006, "step": 2898 }, { "epoch": 0.9833425168482177, "grad_norm": 0.02220018580555916, "learning_rate": 8.153994659503954e-05, "loss": 0.0002, "step": 2900 }, { "epoch": 0.9840206841012165, "grad_norm": 0.06474003195762634, "learning_rate": 8.15153169385809e-05, "loss": 0.0008, "step": 2902 }, { "epoch": 0.9846988513542152, "grad_norm": 0.06129273399710655, "learning_rate": 8.149067458775636e-05, "loss": 0.0009, "step": 2904 }, { "epoch": 0.985377018607214, "grad_norm": 0.034316860139369965, "learning_rate": 8.146601955249188e-05, "loss": 0.0006, "step": 2906 }, { "epoch": 0.9860551858602128, "grad_norm": 0.0359802171587944, "learning_rate": 8.144135184271849e-05, "loss": 0.0005, "step": 2908 }, { "epoch": 0.9867333531132115, "grad_norm": 0.05747028440237045, "learning_rate": 8.141667146837231e-05, "loss": 0.0004, "step": 2910 }, { "epoch": 0.9874115203662103, "grad_norm": 0.02614564448595047, "learning_rate": 8.139197843939463e-05, "loss": 0.0003, "step": 2912 }, { "epoch": 0.9880896876192091, "grad_norm": 0.1071239560842514, "learning_rate": 8.136727276573179e-05, "loss": 0.0004, "step": 2914 }, { "epoch": 0.9887678548722079, "grad_norm": 0.0214134119451046, "learning_rate": 8.134255445733522e-05, "loss": 0.0006, "step": 2916 }, { "epoch": 0.9894460221252066, "grad_norm": 0.08182989060878754, "learning_rate": 8.131782352416148e-05, "loss": 0.0007, "step": 2918 }, { "epoch": 0.9901241893782055, "grad_norm": 0.038342587649822235, "learning_rate": 8.129307997617216e-05, "loss": 0.0003, "step": 2920 }, { "epoch": 0.9908023566312042, "grad_norm": 0.08064837753772736, "learning_rate": 8.126832382333399e-05, "loss": 0.0007, "step": 2922 }, { "epoch": 0.9914805238842029, "grad_norm": 0.05218582600355148, "learning_rate": 8.124355507561871e-05, "loss": 0.0004, "step": 2924 }, { "epoch": 0.9921586911372017, "grad_norm": 0.03252217173576355, "learning_rate": 8.121877374300317e-05, "loss": 0.0002, "step": 2926 }, { "epoch": 0.9928368583902005, "grad_norm": 0.04875943437218666, "learning_rate": 8.119397983546932e-05, "loss": 0.0003, "step": 2928 }, { "epoch": 0.9935150256431993, "grad_norm": 0.019884569570422173, "learning_rate": 8.116917336300411e-05, "loss": 0.0003, "step": 2930 }, { "epoch": 0.994193192896198, "grad_norm": 0.06939960271120071, "learning_rate": 8.114435433559959e-05, "loss": 0.0005, "step": 2932 }, { "epoch": 0.9948713601491967, "grad_norm": 0.052569735795259476, "learning_rate": 8.111952276325287e-05, "loss": 0.0004, "step": 2934 }, { "epoch": 0.9955495274021956, "grad_norm": 0.01638111099600792, "learning_rate": 8.109467865596612e-05, "loss": 0.0001, "step": 2936 }, { "epoch": 0.9962276946551943, "grad_norm": 0.05986509472131729, "learning_rate": 8.106982202374651e-05, "loss": 0.0005, "step": 2938 }, { "epoch": 0.9969058619081931, "grad_norm": 0.03333326429128647, "learning_rate": 8.104495287660632e-05, "loss": 0.0006, "step": 2940 }, { "epoch": 0.9975840291611919, "grad_norm": 0.08217639476060867, "learning_rate": 8.102007122456281e-05, "loss": 0.0004, "step": 2942 }, { "epoch": 0.9982621964141907, "grad_norm": 0.03954688087105751, "learning_rate": 8.099517707763831e-05, "loss": 0.0002, "step": 2944 }, { "epoch": 0.9989403636671894, "grad_norm": 0.06906257569789886, "learning_rate": 8.097027044586021e-05, "loss": 0.0007, "step": 2946 }, { "epoch": 0.9996185309201882, "grad_norm": 0.0183778777718544, "learning_rate": 8.094535133926087e-05, "loss": 0.0001, "step": 2948 }, { "epoch": 1.0, "grad_norm": 0.13468892872333527, "learning_rate": 8.09204197678777e-05, "loss": 0.0014, "step": 2950 }, { "epoch": 1.0006781672529987, "grad_norm": 0.09359505027532578, "learning_rate": 8.089547574175317e-05, "loss": 0.0008, "step": 2952 }, { "epoch": 1.0013563345059975, "grad_norm": 0.05758538097143173, "learning_rate": 8.08705192709347e-05, "loss": 0.0011, "step": 2954 }, { "epoch": 1.0020345017589962, "grad_norm": 0.027957310900092125, "learning_rate": 8.084555036547474e-05, "loss": 0.0004, "step": 2956 }, { "epoch": 1.0027126690119952, "grad_norm": 0.035399600863456726, "learning_rate": 8.08205690354308e-05, "loss": 0.0006, "step": 2958 }, { "epoch": 1.003390836264994, "grad_norm": 0.049815621227025986, "learning_rate": 8.079557529086532e-05, "loss": 0.0011, "step": 2960 }, { "epoch": 1.0040690035179927, "grad_norm": 0.05087139829993248, "learning_rate": 8.077056914184582e-05, "loss": 0.0008, "step": 2962 }, { "epoch": 1.0047471707709914, "grad_norm": 0.03460082411766052, "learning_rate": 8.074555059844474e-05, "loss": 0.0009, "step": 2964 }, { "epoch": 1.0054253380239901, "grad_norm": 0.016356630250811577, "learning_rate": 8.072051967073955e-05, "loss": 0.0003, "step": 2966 }, { "epoch": 1.0061035052769889, "grad_norm": 0.023417463526129723, "learning_rate": 8.069547636881272e-05, "loss": 0.0007, "step": 2968 }, { "epoch": 1.0067816725299876, "grad_norm": 0.0395391583442688, "learning_rate": 8.067042070275167e-05, "loss": 0.0003, "step": 2970 }, { "epoch": 1.0074598397829866, "grad_norm": 0.01494118757545948, "learning_rate": 8.064535268264883e-05, "loss": 0.0003, "step": 2972 }, { "epoch": 1.0081380070359853, "grad_norm": 0.02469460852444172, "learning_rate": 8.06202723186016e-05, "loss": 0.0003, "step": 2974 }, { "epoch": 1.008816174288984, "grad_norm": 0.22398723661899567, "learning_rate": 8.059517962071233e-05, "loss": 0.0006, "step": 2976 }, { "epoch": 1.0094943415419828, "grad_norm": 0.03068060800433159, "learning_rate": 8.057007459908839e-05, "loss": 0.0004, "step": 2978 }, { "epoch": 1.0101725087949815, "grad_norm": 0.16560781002044678, "learning_rate": 8.054495726384204e-05, "loss": 0.0017, "step": 2980 }, { "epoch": 1.0108506760479803, "grad_norm": 0.06457192450761795, "learning_rate": 8.051982762509056e-05, "loss": 0.001, "step": 2982 }, { "epoch": 1.011528843300979, "grad_norm": 0.014625279232859612, "learning_rate": 8.049468569295616e-05, "loss": 0.0004, "step": 2984 }, { "epoch": 1.0122070105539778, "grad_norm": 0.12633752822875977, "learning_rate": 8.046953147756601e-05, "loss": 0.0005, "step": 2986 }, { "epoch": 1.0128851778069767, "grad_norm": 0.02757919207215309, "learning_rate": 8.044436498905222e-05, "loss": 0.0003, "step": 2988 }, { "epoch": 1.0135633450599755, "grad_norm": 0.030746174976229668, "learning_rate": 8.041918623755187e-05, "loss": 0.0004, "step": 2990 }, { "epoch": 1.0142415123129742, "grad_norm": 0.06324061751365662, "learning_rate": 8.039399523320692e-05, "loss": 0.0005, "step": 2992 }, { "epoch": 1.014919679565973, "grad_norm": 0.03737432509660721, "learning_rate": 8.036879198616434e-05, "loss": 0.0008, "step": 2994 }, { "epoch": 1.0155978468189717, "grad_norm": 0.04197308048605919, "learning_rate": 8.034357650657598e-05, "loss": 0.0005, "step": 2996 }, { "epoch": 1.0162760140719704, "grad_norm": 0.021742910146713257, "learning_rate": 8.031834880459863e-05, "loss": 0.0001, "step": 2998 }, { "epoch": 1.0169541813249692, "grad_norm": 0.02395406737923622, "learning_rate": 8.0293108890394e-05, "loss": 0.0003, "step": 3000 }, { "epoch": 1.0176323485779681, "grad_norm": 0.013543182983994484, "learning_rate": 8.026785677412873e-05, "loss": 0.0001, "step": 3002 }, { "epoch": 1.0183105158309669, "grad_norm": 0.2532791495323181, "learning_rate": 8.024259246597439e-05, "loss": 0.0007, "step": 3004 }, { "epoch": 1.0189886830839656, "grad_norm": 0.023439206182956696, "learning_rate": 8.02173159761074e-05, "loss": 0.0001, "step": 3006 }, { "epoch": 1.0196668503369644, "grad_norm": 0.1361963301897049, "learning_rate": 8.019202731470916e-05, "loss": 0.0017, "step": 3008 }, { "epoch": 1.020345017589963, "grad_norm": 0.05129358544945717, "learning_rate": 8.016672649196593e-05, "loss": 0.0008, "step": 3010 }, { "epoch": 1.0210231848429618, "grad_norm": 0.10559432208538055, "learning_rate": 8.014141351806886e-05, "loss": 0.0017, "step": 3012 }, { "epoch": 1.0217013520959606, "grad_norm": 0.05691803991794586, "learning_rate": 8.011608840321405e-05, "loss": 0.0009, "step": 3014 }, { "epoch": 1.0223795193489595, "grad_norm": 0.03549787402153015, "learning_rate": 8.009075115760243e-05, "loss": 0.0007, "step": 3016 }, { "epoch": 1.0230576866019583, "grad_norm": 0.038408488035202026, "learning_rate": 8.006540179143981e-05, "loss": 0.0002, "step": 3018 }, { "epoch": 1.023735853854957, "grad_norm": 0.02867027185857296, "learning_rate": 8.004004031493699e-05, "loss": 0.0005, "step": 3020 }, { "epoch": 1.0244140211079558, "grad_norm": 0.044582683593034744, "learning_rate": 8.001466673830951e-05, "loss": 0.0009, "step": 3022 }, { "epoch": 1.0250921883609545, "grad_norm": 0.05415906384587288, "learning_rate": 7.998928107177783e-05, "loss": 0.0003, "step": 3024 }, { "epoch": 1.0257703556139532, "grad_norm": 0.032624825835227966, "learning_rate": 7.996388332556735e-05, "loss": 0.0007, "step": 3026 }, { "epoch": 1.026448522866952, "grad_norm": 0.04468724876642227, "learning_rate": 7.993847350990824e-05, "loss": 0.0012, "step": 3028 }, { "epoch": 1.0271266901199507, "grad_norm": 0.04837553948163986, "learning_rate": 7.991305163503558e-05, "loss": 0.0008, "step": 3030 }, { "epoch": 1.0278048573729497, "grad_norm": 0.03291107714176178, "learning_rate": 7.988761771118932e-05, "loss": 0.0004, "step": 3032 }, { "epoch": 1.0284830246259484, "grad_norm": 0.042646851390600204, "learning_rate": 7.98621717486142e-05, "loss": 0.0004, "step": 3034 }, { "epoch": 1.0291611918789472, "grad_norm": 0.0066047427244484425, "learning_rate": 7.983671375755985e-05, "loss": 0.0001, "step": 3036 }, { "epoch": 1.029839359131946, "grad_norm": 0.0016902305651456118, "learning_rate": 7.98112437482808e-05, "loss": 0.0002, "step": 3038 }, { "epoch": 1.0305175263849446, "grad_norm": 0.016091756522655487, "learning_rate": 7.978576173103632e-05, "loss": 0.0001, "step": 3040 }, { "epoch": 1.0311956936379434, "grad_norm": 0.020465312525629997, "learning_rate": 7.976026771609057e-05, "loss": 0.0001, "step": 3042 }, { "epoch": 1.0318738608909421, "grad_norm": 0.012479097582399845, "learning_rate": 7.973476171371255e-05, "loss": 0.0002, "step": 3044 }, { "epoch": 1.032552028143941, "grad_norm": 0.032463181763887405, "learning_rate": 7.970924373417608e-05, "loss": 0.0002, "step": 3046 }, { "epoch": 1.0332301953969398, "grad_norm": 0.016862597316503525, "learning_rate": 7.968371378775978e-05, "loss": 0.0, "step": 3048 }, { "epoch": 1.0339083626499386, "grad_norm": 0.023158496245741844, "learning_rate": 7.96581718847471e-05, "loss": 0.0001, "step": 3050 }, { "epoch": 1.0345865299029373, "grad_norm": 0.009301434271037579, "learning_rate": 7.963261803542634e-05, "loss": 0.0, "step": 3052 }, { "epoch": 1.035264697155936, "grad_norm": 0.038685571402311325, "learning_rate": 7.960705225009059e-05, "loss": 0.0001, "step": 3054 }, { "epoch": 1.0359428644089348, "grad_norm": 0.002880866639316082, "learning_rate": 7.958147453903773e-05, "loss": 0.0, "step": 3056 }, { "epoch": 1.0366210316619335, "grad_norm": 0.01774897798895836, "learning_rate": 7.955588491257045e-05, "loss": 0.0, "step": 3058 }, { "epoch": 1.0372991989149325, "grad_norm": 0.009110460057854652, "learning_rate": 7.953028338099627e-05, "loss": 0.0001, "step": 3060 }, { "epoch": 1.0379773661679312, "grad_norm": 0.007355507463216782, "learning_rate": 7.950466995462749e-05, "loss": 0.0, "step": 3062 }, { "epoch": 1.03865553342093, "grad_norm": 0.03832750394940376, "learning_rate": 7.947904464378115e-05, "loss": 0.0003, "step": 3064 }, { "epoch": 1.0393337006739287, "grad_norm": 0.0005527051980607212, "learning_rate": 7.945340745877919e-05, "loss": 0.0, "step": 3066 }, { "epoch": 1.0400118679269275, "grad_norm": 0.009945861995220184, "learning_rate": 7.94277584099482e-05, "loss": 0.0, "step": 3068 }, { "epoch": 1.0406900351799262, "grad_norm": 0.013605031184852123, "learning_rate": 7.940209750761964e-05, "loss": 0.0, "step": 3070 }, { "epoch": 1.041368202432925, "grad_norm": 0.0027010184712707996, "learning_rate": 7.937642476212975e-05, "loss": 0.0, "step": 3072 }, { "epoch": 1.0420463696859237, "grad_norm": 0.018177801743149757, "learning_rate": 7.935074018381945e-05, "loss": 0.0, "step": 3074 }, { "epoch": 1.0427245369389226, "grad_norm": 0.0041480534709990025, "learning_rate": 7.932504378303452e-05, "loss": 0.0, "step": 3076 }, { "epoch": 1.0434027041919214, "grad_norm": 0.00467638298869133, "learning_rate": 7.929933557012545e-05, "loss": 0.0, "step": 3078 }, { "epoch": 1.0440808714449201, "grad_norm": 0.00816649291664362, "learning_rate": 7.927361555544754e-05, "loss": 0.0, "step": 3080 }, { "epoch": 1.0447590386979189, "grad_norm": 0.0006185176898725331, "learning_rate": 7.924788374936078e-05, "loss": 0.0, "step": 3082 }, { "epoch": 1.0454372059509176, "grad_norm": 0.000762543233577162, "learning_rate": 7.922214016222993e-05, "loss": 0.0, "step": 3084 }, { "epoch": 1.0461153732039163, "grad_norm": 0.002773831132799387, "learning_rate": 7.919638480442452e-05, "loss": 0.0, "step": 3086 }, { "epoch": 1.046793540456915, "grad_norm": 0.0035561330150812864, "learning_rate": 7.917061768631882e-05, "loss": 0.0001, "step": 3088 }, { "epoch": 1.047471707709914, "grad_norm": 0.001374011393636465, "learning_rate": 7.914483881829179e-05, "loss": 0.0, "step": 3090 }, { "epoch": 1.0481498749629128, "grad_norm": 0.0007063335506245494, "learning_rate": 7.911904821072717e-05, "loss": 0.0, "step": 3092 }, { "epoch": 1.0488280422159115, "grad_norm": 0.008668769150972366, "learning_rate": 7.909324587401342e-05, "loss": 0.0, "step": 3094 }, { "epoch": 1.0495062094689103, "grad_norm": 0.003817323362454772, "learning_rate": 7.90674318185437e-05, "loss": 0.0, "step": 3096 }, { "epoch": 1.050184376721909, "grad_norm": 0.005347683094441891, "learning_rate": 7.904160605471594e-05, "loss": 0.0, "step": 3098 }, { "epoch": 1.0508625439749077, "grad_norm": 0.0007073975284583867, "learning_rate": 7.901576859293272e-05, "loss": 0.0, "step": 3100 }, { "epoch": 1.0515407112279065, "grad_norm": 0.002511057071387768, "learning_rate": 7.898991944360137e-05, "loss": 0.0, "step": 3102 }, { "epoch": 1.0522188784809055, "grad_norm": 0.00014569952327292413, "learning_rate": 7.896405861713394e-05, "loss": 0.0, "step": 3104 }, { "epoch": 1.0528970457339042, "grad_norm": 0.013416592963039875, "learning_rate": 7.893818612394717e-05, "loss": 0.0, "step": 3106 }, { "epoch": 1.053575212986903, "grad_norm": 0.024336976930499077, "learning_rate": 7.891230197446249e-05, "loss": 0.0002, "step": 3108 }, { "epoch": 1.0542533802399017, "grad_norm": 0.00041308748768642545, "learning_rate": 7.888640617910603e-05, "loss": 0.0, "step": 3110 }, { "epoch": 1.0549315474929004, "grad_norm": 0.00036542725865729153, "learning_rate": 7.886049874830862e-05, "loss": 0.0, "step": 3112 }, { "epoch": 1.0556097147458992, "grad_norm": 0.002658010693266988, "learning_rate": 7.883457969250576e-05, "loss": 0.0001, "step": 3114 }, { "epoch": 1.056287881998898, "grad_norm": 0.0011226636124774814, "learning_rate": 7.880864902213765e-05, "loss": 0.0, "step": 3116 }, { "epoch": 1.0569660492518969, "grad_norm": 0.0011952995555475354, "learning_rate": 7.878270674764917e-05, "loss": 0.0001, "step": 3118 }, { "epoch": 1.0576442165048956, "grad_norm": 0.0014733074931427836, "learning_rate": 7.875675287948982e-05, "loss": 0.0001, "step": 3120 }, { "epoch": 1.0583223837578943, "grad_norm": 0.00036149792140349746, "learning_rate": 7.873078742811388e-05, "loss": 0.0, "step": 3122 }, { "epoch": 1.059000551010893, "grad_norm": 0.002111283363774419, "learning_rate": 7.87048104039802e-05, "loss": 0.0, "step": 3124 }, { "epoch": 1.0596787182638918, "grad_norm": 0.0004321536107454449, "learning_rate": 7.86788218175523e-05, "loss": 0.0003, "step": 3126 }, { "epoch": 1.0603568855168906, "grad_norm": 0.002552892081439495, "learning_rate": 7.865282167929842e-05, "loss": 0.0002, "step": 3128 }, { "epoch": 1.0610350527698893, "grad_norm": 0.0008854964398778975, "learning_rate": 7.862680999969139e-05, "loss": 0.0, "step": 3130 }, { "epoch": 1.061713220022888, "grad_norm": 0.0017830159049481153, "learning_rate": 7.860078678920872e-05, "loss": 0.0, "step": 3132 }, { "epoch": 1.062391387275887, "grad_norm": 0.03791436180472374, "learning_rate": 7.857475205833254e-05, "loss": 0.0003, "step": 3134 }, { "epoch": 1.0630695545288857, "grad_norm": 0.00176812254358083, "learning_rate": 7.854870581754966e-05, "loss": 0.0, "step": 3136 }, { "epoch": 1.0637477217818845, "grad_norm": 0.00034444493940100074, "learning_rate": 7.852264807735147e-05, "loss": 0.0, "step": 3138 }, { "epoch": 1.0644258890348832, "grad_norm": 0.047313202172517776, "learning_rate": 7.849657884823408e-05, "loss": 0.0002, "step": 3140 }, { "epoch": 1.065104056287882, "grad_norm": 0.003105980111286044, "learning_rate": 7.847049814069811e-05, "loss": 0.0001, "step": 3142 }, { "epoch": 1.0657822235408807, "grad_norm": 0.04377178102731705, "learning_rate": 7.844440596524891e-05, "loss": 0.0001, "step": 3144 }, { "epoch": 1.0664603907938794, "grad_norm": 0.014682923443615437, "learning_rate": 7.841830233239638e-05, "loss": 0.0001, "step": 3146 }, { "epoch": 1.0671385580468784, "grad_norm": 0.030811438336968422, "learning_rate": 7.839218725265506e-05, "loss": 0.0002, "step": 3148 }, { "epoch": 1.0678167252998771, "grad_norm": 0.024048537015914917, "learning_rate": 7.836606073654413e-05, "loss": 0.0001, "step": 3150 }, { "epoch": 1.0684948925528759, "grad_norm": 0.05794329568743706, "learning_rate": 7.833992279458732e-05, "loss": 0.0001, "step": 3152 }, { "epoch": 1.0691730598058746, "grad_norm": 0.037551164627075195, "learning_rate": 7.831377343731298e-05, "loss": 0.0002, "step": 3154 }, { "epoch": 1.0698512270588734, "grad_norm": 0.01598585955798626, "learning_rate": 7.828761267525411e-05, "loss": 0.0006, "step": 3156 }, { "epoch": 1.070529394311872, "grad_norm": 0.024345286190509796, "learning_rate": 7.826144051894824e-05, "loss": 0.0001, "step": 3158 }, { "epoch": 1.0712075615648708, "grad_norm": 0.009724217467010021, "learning_rate": 7.823525697893749e-05, "loss": 0.0001, "step": 3160 }, { "epoch": 1.0718857288178698, "grad_norm": 0.0019536882173269987, "learning_rate": 7.820906206576862e-05, "loss": 0.0002, "step": 3162 }, { "epoch": 1.0725638960708685, "grad_norm": 0.0014726163353770971, "learning_rate": 7.81828557899929e-05, "loss": 0.0, "step": 3164 }, { "epoch": 1.0732420633238673, "grad_norm": 0.022503558546304703, "learning_rate": 7.815663816216625e-05, "loss": 0.0001, "step": 3166 }, { "epoch": 1.073920230576866, "grad_norm": 0.011805574409663677, "learning_rate": 7.813040919284913e-05, "loss": 0.0, "step": 3168 }, { "epoch": 1.0745983978298648, "grad_norm": 0.00046225800178945065, "learning_rate": 7.810416889260653e-05, "loss": 0.0, "step": 3170 }, { "epoch": 1.0752765650828635, "grad_norm": 0.0012093356344848871, "learning_rate": 7.807791727200808e-05, "loss": 0.0001, "step": 3172 }, { "epoch": 1.0759547323358623, "grad_norm": 0.0019421727629378438, "learning_rate": 7.80516543416279e-05, "loss": 0.0001, "step": 3174 }, { "epoch": 1.0766328995888612, "grad_norm": 0.004075727425515652, "learning_rate": 7.80253801120447e-05, "loss": 0.0, "step": 3176 }, { "epoch": 1.07731106684186, "grad_norm": 0.0018418336985632777, "learning_rate": 7.799909459384176e-05, "loss": 0.0, "step": 3178 }, { "epoch": 1.0779892340948587, "grad_norm": 0.009764020331203938, "learning_rate": 7.797279779760685e-05, "loss": 0.0, "step": 3180 }, { "epoch": 1.0786674013478574, "grad_norm": 0.0008932505734264851, "learning_rate": 7.794648973393234e-05, "loss": 0.0, "step": 3182 }, { "epoch": 1.0793455686008562, "grad_norm": 0.015295356512069702, "learning_rate": 7.792017041341511e-05, "loss": 0.0001, "step": 3184 }, { "epoch": 1.080023735853855, "grad_norm": 0.001601558760739863, "learning_rate": 7.789383984665657e-05, "loss": 0.0, "step": 3186 }, { "epoch": 1.0807019031068537, "grad_norm": 0.03218120336532593, "learning_rate": 7.786749804426268e-05, "loss": 0.0, "step": 3188 }, { "epoch": 1.0813800703598524, "grad_norm": 0.002657031174749136, "learning_rate": 7.78411450168439e-05, "loss": 0.0, "step": 3190 }, { "epoch": 1.0820582376128514, "grad_norm": 0.001861520460806787, "learning_rate": 7.781478077501525e-05, "loss": 0.0, "step": 3192 }, { "epoch": 1.08273640486585, "grad_norm": 0.12163111567497253, "learning_rate": 7.778840532939622e-05, "loss": 0.0001, "step": 3194 }, { "epoch": 1.0834145721188488, "grad_norm": 0.02530871331691742, "learning_rate": 7.776201869061085e-05, "loss": 0.0001, "step": 3196 }, { "epoch": 1.0840927393718476, "grad_norm": 0.04777170345187187, "learning_rate": 7.773562086928768e-05, "loss": 0.0002, "step": 3198 }, { "epoch": 1.0847709066248463, "grad_norm": 0.03361360356211662, "learning_rate": 7.770921187605973e-05, "loss": 0.0001, "step": 3200 }, { "epoch": 1.085449073877845, "grad_norm": 0.0027512232773005962, "learning_rate": 7.768279172156453e-05, "loss": 0.0001, "step": 3202 }, { "epoch": 1.0861272411308438, "grad_norm": 0.015211574733257294, "learning_rate": 7.765636041644417e-05, "loss": 0.0002, "step": 3204 }, { "epoch": 1.0868054083838428, "grad_norm": 0.011098697781562805, "learning_rate": 7.762991797134514e-05, "loss": 0.0003, "step": 3206 }, { "epoch": 1.0874835756368415, "grad_norm": 0.017032770439982414, "learning_rate": 7.760346439691843e-05, "loss": 0.0001, "step": 3208 }, { "epoch": 1.0881617428898402, "grad_norm": 0.010514136403799057, "learning_rate": 7.757699970381958e-05, "loss": 0.0001, "step": 3210 }, { "epoch": 1.088839910142839, "grad_norm": 0.016751520335674286, "learning_rate": 7.755052390270854e-05, "loss": 0.0001, "step": 3212 }, { "epoch": 1.0895180773958377, "grad_norm": 0.003923425450921059, "learning_rate": 7.752403700424979e-05, "loss": 0.0001, "step": 3214 }, { "epoch": 1.0901962446488365, "grad_norm": 0.0033663345966488123, "learning_rate": 7.74975390191122e-05, "loss": 0.0001, "step": 3216 }, { "epoch": 1.0908744119018352, "grad_norm": 0.016712361946702003, "learning_rate": 7.74710299579692e-05, "loss": 0.0, "step": 3218 }, { "epoch": 1.091552579154834, "grad_norm": 0.005410924553871155, "learning_rate": 7.74445098314986e-05, "loss": 0.0001, "step": 3220 }, { "epoch": 1.092230746407833, "grad_norm": 0.005119035951793194, "learning_rate": 7.741797865038273e-05, "loss": 0.0001, "step": 3222 }, { "epoch": 1.0929089136608316, "grad_norm": 0.0694790631532669, "learning_rate": 7.739143642530833e-05, "loss": 0.0001, "step": 3224 }, { "epoch": 1.0935870809138304, "grad_norm": 0.037400029599666595, "learning_rate": 7.736488316696663e-05, "loss": 0.0001, "step": 3226 }, { "epoch": 1.0942652481668291, "grad_norm": 0.003814551280811429, "learning_rate": 7.733831888605325e-05, "loss": 0.0, "step": 3228 }, { "epoch": 1.0949434154198279, "grad_norm": 0.0038876463659107685, "learning_rate": 7.731174359326829e-05, "loss": 0.0001, "step": 3230 }, { "epoch": 1.0956215826728266, "grad_norm": 0.05716302618384361, "learning_rate": 7.728515729931629e-05, "loss": 0.0003, "step": 3232 }, { "epoch": 1.0962997499258256, "grad_norm": 0.007123335264623165, "learning_rate": 7.725856001490618e-05, "loss": 0.0002, "step": 3234 }, { "epoch": 1.0969779171788243, "grad_norm": 0.016324380412697792, "learning_rate": 7.723195175075136e-05, "loss": 0.0003, "step": 3236 }, { "epoch": 1.097656084431823, "grad_norm": 0.008962440304458141, "learning_rate": 7.720533251756963e-05, "loss": 0.0004, "step": 3238 }, { "epoch": 1.0983342516848218, "grad_norm": 0.004593605175614357, "learning_rate": 7.717870232608322e-05, "loss": 0.0001, "step": 3240 }, { "epoch": 1.0990124189378205, "grad_norm": 0.008814834989607334, "learning_rate": 7.715206118701876e-05, "loss": 0.0005, "step": 3242 }, { "epoch": 1.0996905861908193, "grad_norm": 0.022388102486729622, "learning_rate": 7.71254091111073e-05, "loss": 0.0002, "step": 3244 }, { "epoch": 1.100368753443818, "grad_norm": 0.02076655626296997, "learning_rate": 7.709874610908429e-05, "loss": 0.0001, "step": 3246 }, { "epoch": 1.1010469206968168, "grad_norm": 0.0011612613452598453, "learning_rate": 7.70720721916896e-05, "loss": 0.0001, "step": 3248 }, { "epoch": 1.1017250879498157, "grad_norm": 0.00931808352470398, "learning_rate": 7.704538736966746e-05, "loss": 0.0, "step": 3250 }, { "epoch": 1.1024032552028145, "grad_norm": 0.0014653477119281888, "learning_rate": 7.701869165376653e-05, "loss": 0.0, "step": 3252 }, { "epoch": 1.1030814224558132, "grad_norm": 0.0131069365888834, "learning_rate": 7.699198505473983e-05, "loss": 0.0, "step": 3254 }, { "epoch": 1.103759589708812, "grad_norm": 0.004241168964654207, "learning_rate": 7.696526758334477e-05, "loss": 0.0003, "step": 3256 }, { "epoch": 1.1044377569618107, "grad_norm": 0.019079118967056274, "learning_rate": 7.693853925034315e-05, "loss": 0.0003, "step": 3258 }, { "epoch": 1.1051159242148094, "grad_norm": 0.029437581077218056, "learning_rate": 7.691180006650116e-05, "loss": 0.0002, "step": 3260 }, { "epoch": 1.1057940914678082, "grad_norm": 0.010389408096671104, "learning_rate": 7.68850500425893e-05, "loss": 0.0001, "step": 3262 }, { "epoch": 1.1064722587208071, "grad_norm": 0.0028994681779295206, "learning_rate": 7.685828918938251e-05, "loss": 0.0, "step": 3264 }, { "epoch": 1.1071504259738059, "grad_norm": 0.005432248581200838, "learning_rate": 7.683151751766004e-05, "loss": 0.0, "step": 3266 }, { "epoch": 1.1078285932268046, "grad_norm": 0.012759734876453876, "learning_rate": 7.680473503820553e-05, "loss": 0.0003, "step": 3268 }, { "epoch": 1.1085067604798033, "grad_norm": 0.0006062522297725081, "learning_rate": 7.677794176180695e-05, "loss": 0.0, "step": 3270 }, { "epoch": 1.109184927732802, "grad_norm": 0.001002602744847536, "learning_rate": 7.675113769925663e-05, "loss": 0.0, "step": 3272 }, { "epoch": 1.1098630949858008, "grad_norm": 0.0004955371841788292, "learning_rate": 7.672432286135125e-05, "loss": 0.0, "step": 3274 }, { "epoch": 1.1105412622387996, "grad_norm": 0.0018315280321985483, "learning_rate": 7.669749725889182e-05, "loss": 0.0, "step": 3276 }, { "epoch": 1.1112194294917983, "grad_norm": 0.0031102353241294622, "learning_rate": 7.667066090268366e-05, "loss": 0.0, "step": 3278 }, { "epoch": 1.1118975967447973, "grad_norm": 0.0009504572371952236, "learning_rate": 7.66438138035365e-05, "loss": 0.0, "step": 3280 }, { "epoch": 1.112575763997796, "grad_norm": 0.000422391458414495, "learning_rate": 7.661695597226432e-05, "loss": 0.0, "step": 3282 }, { "epoch": 1.1132539312507947, "grad_norm": 0.001483218977227807, "learning_rate": 7.659008741968546e-05, "loss": 0.0, "step": 3284 }, { "epoch": 1.1139320985037935, "grad_norm": 0.004129413980990648, "learning_rate": 7.656320815662257e-05, "loss": 0.0, "step": 3286 }, { "epoch": 1.1146102657567922, "grad_norm": 0.00036125979386270046, "learning_rate": 7.653631819390261e-05, "loss": 0.0, "step": 3288 }, { "epoch": 1.115288433009791, "grad_norm": 0.0027813103515654802, "learning_rate": 7.650941754235686e-05, "loss": 0.0, "step": 3290 }, { "epoch": 1.1159666002627897, "grad_norm": 0.0009888537460938096, "learning_rate": 7.648250621282086e-05, "loss": 0.0, "step": 3292 }, { "epoch": 1.1166447675157887, "grad_norm": 0.00042577870772220194, "learning_rate": 7.645558421613457e-05, "loss": 0.0, "step": 3294 }, { "epoch": 1.1173229347687874, "grad_norm": 0.0008866444695740938, "learning_rate": 7.64286515631421e-05, "loss": 0.0, "step": 3296 }, { "epoch": 1.1180011020217862, "grad_norm": 0.00012718561629299074, "learning_rate": 7.640170826469195e-05, "loss": 0.0, "step": 3298 }, { "epoch": 1.118679269274785, "grad_norm": 0.002095594769343734, "learning_rate": 7.637475433163685e-05, "loss": 0.0, "step": 3300 }, { "epoch": 1.1193574365277836, "grad_norm": 0.02821212261915207, "learning_rate": 7.634778977483389e-05, "loss": 0.0001, "step": 3302 }, { "epoch": 1.1200356037807824, "grad_norm": 9.883302845992148e-05, "learning_rate": 7.632081460514433e-05, "loss": 0.0, "step": 3304 }, { "epoch": 1.1207137710337811, "grad_norm": 0.00022422336041927338, "learning_rate": 7.629382883343381e-05, "loss": 0.0, "step": 3306 }, { "epoch": 1.12139193828678, "grad_norm": 0.00010571469465503469, "learning_rate": 7.626683247057218e-05, "loss": 0.0, "step": 3308 }, { "epoch": 1.1220701055397788, "grad_norm": 0.00012248066195752472, "learning_rate": 7.623982552743356e-05, "loss": 0.0, "step": 3310 }, { "epoch": 1.1227482727927776, "grad_norm": 0.0009538671583868563, "learning_rate": 7.621280801489637e-05, "loss": 0.0, "step": 3312 }, { "epoch": 1.1234264400457763, "grad_norm": 0.00031743402360007167, "learning_rate": 7.618577994384324e-05, "loss": 0.0, "step": 3314 }, { "epoch": 1.124104607298775, "grad_norm": 0.00019657450320664793, "learning_rate": 7.615874132516107e-05, "loss": 0.0, "step": 3316 }, { "epoch": 1.1247827745517738, "grad_norm": 0.0009447080083191395, "learning_rate": 7.613169216974105e-05, "loss": 0.0, "step": 3318 }, { "epoch": 1.1254609418047725, "grad_norm": 0.00016561873781029135, "learning_rate": 7.610463248847852e-05, "loss": 0.0, "step": 3320 }, { "epoch": 1.1261391090577715, "grad_norm": 0.00016318686539307237, "learning_rate": 7.607756229227316e-05, "loss": 0.0, "step": 3322 }, { "epoch": 1.1268172763107702, "grad_norm": 0.0007923968369141221, "learning_rate": 7.605048159202883e-05, "loss": 0.0, "step": 3324 }, { "epoch": 1.127495443563769, "grad_norm": 0.001106180832721293, "learning_rate": 7.602339039865362e-05, "loss": 0.0, "step": 3326 }, { "epoch": 1.1281736108167677, "grad_norm": 0.00042214279528707266, "learning_rate": 7.599628872305989e-05, "loss": 0.0, "step": 3328 }, { "epoch": 1.1288517780697664, "grad_norm": 0.00034455794957466424, "learning_rate": 7.596917657616414e-05, "loss": 0.0, "step": 3330 }, { "epoch": 1.1295299453227652, "grad_norm": 0.0002697907912079245, "learning_rate": 7.594205396888718e-05, "loss": 0.0, "step": 3332 }, { "epoch": 1.130208112575764, "grad_norm": 0.00040953329880721867, "learning_rate": 7.5914920912154e-05, "loss": 0.0, "step": 3334 }, { "epoch": 1.1308862798287627, "grad_norm": 0.00025164970429614186, "learning_rate": 7.588777741689376e-05, "loss": 0.0, "step": 3336 }, { "epoch": 1.1315644470817616, "grad_norm": 0.0008031830657273531, "learning_rate": 7.586062349403987e-05, "loss": 0.0, "step": 3338 }, { "epoch": 1.1322426143347604, "grad_norm": 8.426888234680519e-05, "learning_rate": 7.583345915452994e-05, "loss": 0.0, "step": 3340 }, { "epoch": 1.132920781587759, "grad_norm": 0.0001466549001634121, "learning_rate": 7.580628440930574e-05, "loss": 0.0, "step": 3342 }, { "epoch": 1.1335989488407578, "grad_norm": 0.0003892593667842448, "learning_rate": 7.577909926931328e-05, "loss": 0.0, "step": 3344 }, { "epoch": 1.1342771160937566, "grad_norm": 9.798775863600895e-05, "learning_rate": 7.575190374550272e-05, "loss": 0.0, "step": 3346 }, { "epoch": 1.1349552833467553, "grad_norm": 0.011068711057305336, "learning_rate": 7.57246978488284e-05, "loss": 0.0, "step": 3348 }, { "epoch": 1.135633450599754, "grad_norm": 0.00015634531155228615, "learning_rate": 7.569748159024887e-05, "loss": 0.0, "step": 3350 }, { "epoch": 1.136311617852753, "grad_norm": 0.00030616766889579594, "learning_rate": 7.567025498072681e-05, "loss": 0.0, "step": 3352 }, { "epoch": 1.1369897851057518, "grad_norm": 0.00010516907786950469, "learning_rate": 7.564301803122915e-05, "loss": 0.0, "step": 3354 }, { "epoch": 1.1376679523587505, "grad_norm": 0.0002856908831745386, "learning_rate": 7.561577075272686e-05, "loss": 0.0, "step": 3356 }, { "epoch": 1.1383461196117493, "grad_norm": 0.0011014806805178523, "learning_rate": 7.558851315619518e-05, "loss": 0.0, "step": 3358 }, { "epoch": 1.139024286864748, "grad_norm": 0.00206442060880363, "learning_rate": 7.556124525261347e-05, "loss": 0.0, "step": 3360 }, { "epoch": 1.1397024541177467, "grad_norm": 0.002881616121158004, "learning_rate": 7.553396705296522e-05, "loss": 0.0, "step": 3362 }, { "epoch": 1.1403806213707455, "grad_norm": 0.018271418288350105, "learning_rate": 7.55066785682381e-05, "loss": 0.0, "step": 3364 }, { "epoch": 1.1410587886237442, "grad_norm": 0.0005267587839625776, "learning_rate": 7.54793798094239e-05, "loss": 0.0, "step": 3366 }, { "epoch": 1.1417369558767432, "grad_norm": 0.00033838770468719304, "learning_rate": 7.545207078751857e-05, "loss": 0.0, "step": 3368 }, { "epoch": 1.142415123129742, "grad_norm": 0.0003050903615076095, "learning_rate": 7.542475151352216e-05, "loss": 0.0, "step": 3370 }, { "epoch": 1.1430932903827407, "grad_norm": 0.022571176290512085, "learning_rate": 7.539742199843889e-05, "loss": 0.0001, "step": 3372 }, { "epoch": 1.1437714576357394, "grad_norm": 0.00010642271081451327, "learning_rate": 7.537008225327706e-05, "loss": 0.0, "step": 3374 }, { "epoch": 1.1444496248887381, "grad_norm": 0.00012813869398087263, "learning_rate": 7.534273228904915e-05, "loss": 0.0, "step": 3376 }, { "epoch": 1.1451277921417369, "grad_norm": 0.0001594992500031367, "learning_rate": 7.531537211677169e-05, "loss": 0.0, "step": 3378 }, { "epoch": 1.1458059593947358, "grad_norm": 0.0005135987303219736, "learning_rate": 7.528800174746539e-05, "loss": 0.0, "step": 3380 }, { "epoch": 1.1464841266477346, "grad_norm": 0.001825116341933608, "learning_rate": 7.526062119215497e-05, "loss": 0.0, "step": 3382 }, { "epoch": 1.1471622939007333, "grad_norm": 0.02989931032061577, "learning_rate": 7.523323046186941e-05, "loss": 0.0001, "step": 3384 }, { "epoch": 1.147840461153732, "grad_norm": 0.006477775983512402, "learning_rate": 7.52058295676416e-05, "loss": 0.0, "step": 3386 }, { "epoch": 1.1485186284067308, "grad_norm": 0.0006138100288808346, "learning_rate": 7.517841852050866e-05, "loss": 0.0, "step": 3388 }, { "epoch": 1.1491967956597295, "grad_norm": 7.73553765611723e-05, "learning_rate": 7.515099733151177e-05, "loss": 0.0, "step": 3390 }, { "epoch": 1.1498749629127283, "grad_norm": 0.0010425514774397016, "learning_rate": 7.512356601169615e-05, "loss": 0.0, "step": 3392 }, { "epoch": 1.150553130165727, "grad_norm": 0.0010554770706221461, "learning_rate": 7.509612457211113e-05, "loss": 0.0, "step": 3394 }, { "epoch": 1.151231297418726, "grad_norm": 0.00047697455738671124, "learning_rate": 7.506867302381015e-05, "loss": 0.0, "step": 3396 }, { "epoch": 1.1519094646717247, "grad_norm": 0.00013069043052382767, "learning_rate": 7.504121137785066e-05, "loss": 0.0, "step": 3398 }, { "epoch": 1.1525876319247235, "grad_norm": 0.0029345303773880005, "learning_rate": 7.501373964529424e-05, "loss": 0.0, "step": 3400 }, { "epoch": 1.1532657991777222, "grad_norm": 0.002666602609679103, "learning_rate": 7.498625783720646e-05, "loss": 0.0, "step": 3402 }, { "epoch": 1.153943966430721, "grad_norm": 0.0011464230483397841, "learning_rate": 7.495876596465703e-05, "loss": 0.0, "step": 3404 }, { "epoch": 1.1546221336837197, "grad_norm": 6.778768147341907e-05, "learning_rate": 7.493126403871964e-05, "loss": 0.0, "step": 3406 }, { "epoch": 1.1553003009367184, "grad_norm": 6.291534373303875e-05, "learning_rate": 7.490375207047209e-05, "loss": 0.0, "step": 3408 }, { "epoch": 1.1559784681897174, "grad_norm": 0.00035271712113171816, "learning_rate": 7.487623007099618e-05, "loss": 0.0, "step": 3410 }, { "epoch": 1.1566566354427161, "grad_norm": 0.0012021423317492008, "learning_rate": 7.484869805137778e-05, "loss": 0.0, "step": 3412 }, { "epoch": 1.1573348026957149, "grad_norm": 4.147122672293335e-05, "learning_rate": 7.482115602270677e-05, "loss": 0.0002, "step": 3414 }, { "epoch": 1.1580129699487136, "grad_norm": 7.595721399411559e-05, "learning_rate": 7.479360399607707e-05, "loss": 0.0, "step": 3416 }, { "epoch": 1.1586911372017124, "grad_norm": 9.155207226285711e-05, "learning_rate": 7.476604198258666e-05, "loss": 0.0, "step": 3418 }, { "epoch": 1.159369304454711, "grad_norm": 7.61376868467778e-05, "learning_rate": 7.473846999333749e-05, "loss": 0.0, "step": 3420 }, { "epoch": 1.1600474717077098, "grad_norm": 6.941679021110758e-05, "learning_rate": 7.471088803943557e-05, "loss": 0.0, "step": 3422 }, { "epoch": 1.1607256389607086, "grad_norm": 9.52499030972831e-05, "learning_rate": 7.468329613199091e-05, "loss": 0.0, "step": 3424 }, { "epoch": 1.1614038062137075, "grad_norm": 8.02976283011958e-05, "learning_rate": 7.465569428211751e-05, "loss": 0.0, "step": 3426 }, { "epoch": 1.1620819734667063, "grad_norm": 0.00013924592349212617, "learning_rate": 7.46280825009334e-05, "loss": 0.0, "step": 3428 }, { "epoch": 1.162760140719705, "grad_norm": 6.39605859760195e-05, "learning_rate": 7.460046079956063e-05, "loss": 0.0, "step": 3430 }, { "epoch": 1.1634383079727038, "grad_norm": 0.000588226830586791, "learning_rate": 7.457282918912516e-05, "loss": 0.0, "step": 3432 }, { "epoch": 1.1641164752257025, "grad_norm": 0.0006162902573123574, "learning_rate": 7.454518768075704e-05, "loss": 0.0, "step": 3434 }, { "epoch": 1.1647946424787012, "grad_norm": 7.87221870268695e-05, "learning_rate": 7.451753628559027e-05, "loss": 0.0, "step": 3436 }, { "epoch": 1.1654728097317002, "grad_norm": 0.00011970425839535892, "learning_rate": 7.44898750147628e-05, "loss": 0.0, "step": 3438 }, { "epoch": 1.166150976984699, "grad_norm": 6.236095941858366e-05, "learning_rate": 7.446220387941661e-05, "loss": 0.0001, "step": 3440 }, { "epoch": 1.1668291442376977, "grad_norm": 5.8419889683136716e-05, "learning_rate": 7.443452289069763e-05, "loss": 0.0, "step": 3442 }, { "epoch": 1.1675073114906964, "grad_norm": 6.64609469822608e-05, "learning_rate": 7.440683205975574e-05, "loss": 0.0, "step": 3444 }, { "epoch": 1.1681854787436952, "grad_norm": 7.750398071948439e-05, "learning_rate": 7.437913139774482e-05, "loss": 0.0, "step": 3446 }, { "epoch": 1.168863645996694, "grad_norm": 0.00014469469897449017, "learning_rate": 7.435142091582268e-05, "loss": 0.0, "step": 3448 }, { "epoch": 1.1695418132496926, "grad_norm": 0.0036504254676401615, "learning_rate": 7.432370062515113e-05, "loss": 0.0, "step": 3450 }, { "epoch": 1.1702199805026914, "grad_norm": 0.00010814583220053464, "learning_rate": 7.429597053689585e-05, "loss": 0.0, "step": 3452 }, { "epoch": 1.1708981477556903, "grad_norm": 0.0001561157696414739, "learning_rate": 7.426823066222656e-05, "loss": 0.0, "step": 3454 }, { "epoch": 1.171576315008689, "grad_norm": 0.00011890140740433708, "learning_rate": 7.424048101231686e-05, "loss": 0.0, "step": 3456 }, { "epoch": 1.1722544822616878, "grad_norm": 0.009851573035120964, "learning_rate": 7.42127215983443e-05, "loss": 0.0, "step": 3458 }, { "epoch": 1.1729326495146866, "grad_norm": 0.00021269686112646013, "learning_rate": 7.418495243149038e-05, "loss": 0.0, "step": 3460 }, { "epoch": 1.1736108167676853, "grad_norm": 0.0004351783136371523, "learning_rate": 7.415717352294051e-05, "loss": 0.0, "step": 3462 }, { "epoch": 1.174288984020684, "grad_norm": 0.00015459170390386134, "learning_rate": 7.412938488388404e-05, "loss": 0.0, "step": 3464 }, { "epoch": 1.1749671512736828, "grad_norm": 0.00015582202468067408, "learning_rate": 7.41015865255142e-05, "loss": 0.0, "step": 3466 }, { "epoch": 1.1756453185266817, "grad_norm": 0.00025736543466337025, "learning_rate": 7.407377845902823e-05, "loss": 0.0, "step": 3468 }, { "epoch": 1.1763234857796805, "grad_norm": 0.00011491354962345213, "learning_rate": 7.404596069562715e-05, "loss": 0.0, "step": 3470 }, { "epoch": 1.1770016530326792, "grad_norm": 5.988680277368985e-05, "learning_rate": 7.401813324651599e-05, "loss": 0.0, "step": 3472 }, { "epoch": 1.177679820285678, "grad_norm": 0.00019077217439189553, "learning_rate": 7.399029612290362e-05, "loss": 0.0, "step": 3474 }, { "epoch": 1.1783579875386767, "grad_norm": 0.00011350025306455791, "learning_rate": 7.396244933600285e-05, "loss": 0.0, "step": 3476 }, { "epoch": 1.1790361547916755, "grad_norm": 0.000465821212856099, "learning_rate": 7.393459289703035e-05, "loss": 0.0, "step": 3478 }, { "epoch": 1.1797143220446742, "grad_norm": 0.0001867363171186298, "learning_rate": 7.39067268172067e-05, "loss": 0.0, "step": 3480 }, { "epoch": 1.180392489297673, "grad_norm": 0.00012939849693793803, "learning_rate": 7.387885110775632e-05, "loss": 0.0, "step": 3482 }, { "epoch": 1.181070656550672, "grad_norm": 0.00019843588233925402, "learning_rate": 7.38509657799076e-05, "loss": 0.0, "step": 3484 }, { "epoch": 1.1817488238036706, "grad_norm": 0.000269518030108884, "learning_rate": 7.38230708448927e-05, "loss": 0.0, "step": 3486 }, { "epoch": 1.1824269910566694, "grad_norm": 0.00014750401896890253, "learning_rate": 7.379516631394772e-05, "loss": 0.0, "step": 3488 }, { "epoch": 1.1831051583096681, "grad_norm": 7.503381493734196e-05, "learning_rate": 7.376725219831258e-05, "loss": 0.0, "step": 3490 }, { "epoch": 1.1837833255626669, "grad_norm": 0.0002959097037091851, "learning_rate": 7.373932850923111e-05, "loss": 0.0, "step": 3492 }, { "epoch": 1.1844614928156656, "grad_norm": 0.00016742225852794945, "learning_rate": 7.371139525795095e-05, "loss": 0.0, "step": 3494 }, { "epoch": 1.1851396600686646, "grad_norm": 6.525198114104569e-05, "learning_rate": 7.368345245572362e-05, "loss": 0.0, "step": 3496 }, { "epoch": 1.1858178273216633, "grad_norm": 8.403266838286072e-05, "learning_rate": 7.365550011380447e-05, "loss": 0.0, "step": 3498 }, { "epoch": 1.186495994574662, "grad_norm": 4.769510996993631e-05, "learning_rate": 7.362753824345272e-05, "loss": 0.0, "step": 3500 }, { "epoch": 1.1871741618276608, "grad_norm": 4.9367710744263604e-05, "learning_rate": 7.359956685593137e-05, "loss": 0.0, "step": 3502 }, { "epoch": 1.1878523290806595, "grad_norm": 3.271954119554721e-05, "learning_rate": 7.357158596250731e-05, "loss": 0.0, "step": 3504 }, { "epoch": 1.1885304963336583, "grad_norm": 0.00010201849363511428, "learning_rate": 7.354359557445126e-05, "loss": 0.0, "step": 3506 }, { "epoch": 1.189208663586657, "grad_norm": 4.488110062084161e-05, "learning_rate": 7.351559570303771e-05, "loss": 0.0, "step": 3508 }, { "epoch": 1.1898868308396557, "grad_norm": 5.867435902473517e-05, "learning_rate": 7.348758635954503e-05, "loss": 0.0, "step": 3510 }, { "epoch": 1.1905649980926545, "grad_norm": 0.00029293273109942675, "learning_rate": 7.345956755525539e-05, "loss": 0.0, "step": 3512 }, { "epoch": 1.1912431653456534, "grad_norm": 0.00039945391472429037, "learning_rate": 7.343153930145473e-05, "loss": 0.0, "step": 3514 }, { "epoch": 1.1919213325986522, "grad_norm": 0.00014375893806573004, "learning_rate": 7.340350160943284e-05, "loss": 0.0, "step": 3516 }, { "epoch": 1.192599499851651, "grad_norm": 0.00047697001718916, "learning_rate": 7.33754544904833e-05, "loss": 0.0, "step": 3518 }, { "epoch": 1.1932776671046497, "grad_norm": 4.121005258639343e-05, "learning_rate": 7.334739795590347e-05, "loss": 0.0, "step": 3520 }, { "epoch": 1.1939558343576484, "grad_norm": 4.2145245970459655e-05, "learning_rate": 7.331933201699457e-05, "loss": 0.0, "step": 3522 }, { "epoch": 1.1946340016106471, "grad_norm": 3.849615313811228e-05, "learning_rate": 7.32912566850615e-05, "loss": 0.0001, "step": 3524 }, { "epoch": 1.195312168863646, "grad_norm": 9.348442836198956e-05, "learning_rate": 7.326317197141304e-05, "loss": 0.0, "step": 3526 }, { "epoch": 1.1959903361166448, "grad_norm": 0.00015607431123498827, "learning_rate": 7.323507788736167e-05, "loss": 0.0, "step": 3528 }, { "epoch": 1.1966685033696436, "grad_norm": 2.9953756893519312e-05, "learning_rate": 7.320697444422372e-05, "loss": 0.0, "step": 3530 }, { "epoch": 1.1973466706226423, "grad_norm": 0.08800560981035233, "learning_rate": 7.317886165331925e-05, "loss": 0.0002, "step": 3532 }, { "epoch": 1.198024837875641, "grad_norm": 0.028783440589904785, "learning_rate": 7.315073952597205e-05, "loss": 0.0001, "step": 3534 }, { "epoch": 1.1987030051286398, "grad_norm": 0.001528909313492477, "learning_rate": 7.312260807350975e-05, "loss": 0.0, "step": 3536 }, { "epoch": 1.1993811723816385, "grad_norm": 0.0027066238690167665, "learning_rate": 7.309446730726367e-05, "loss": 0.0, "step": 3538 }, { "epoch": 1.2000593396346373, "grad_norm": 0.006256472785025835, "learning_rate": 7.306631723856895e-05, "loss": 0.0001, "step": 3540 }, { "epoch": 1.2007375068876363, "grad_norm": 0.051211562007665634, "learning_rate": 7.303815787876438e-05, "loss": 0.0006, "step": 3542 }, { "epoch": 1.201415674140635, "grad_norm": 0.08517817407846451, "learning_rate": 7.300998923919259e-05, "loss": 0.0012, "step": 3544 }, { "epoch": 1.2020938413936337, "grad_norm": 0.04007571563124657, "learning_rate": 7.298181133119987e-05, "loss": 0.0004, "step": 3546 }, { "epoch": 1.2027720086466325, "grad_norm": 0.07620231807231903, "learning_rate": 7.29536241661363e-05, "loss": 0.0007, "step": 3548 }, { "epoch": 1.2034501758996312, "grad_norm": 0.03595748171210289, "learning_rate": 7.292542775535567e-05, "loss": 0.0003, "step": 3550 }, { "epoch": 1.20412834315263, "grad_norm": 0.03171832486987114, "learning_rate": 7.289722211021546e-05, "loss": 0.0001, "step": 3552 }, { "epoch": 1.2048065104056287, "grad_norm": 0.018091239035129547, "learning_rate": 7.286900724207694e-05, "loss": 0.0002, "step": 3554 }, { "epoch": 1.2054846776586277, "grad_norm": 0.06366919726133347, "learning_rate": 7.284078316230504e-05, "loss": 0.0004, "step": 3556 }, { "epoch": 1.2061628449116264, "grad_norm": 0.008984467014670372, "learning_rate": 7.281254988226842e-05, "loss": 0.0, "step": 3558 }, { "epoch": 1.2068410121646251, "grad_norm": 0.02150563895702362, "learning_rate": 7.278430741333941e-05, "loss": 0.0002, "step": 3560 }, { "epoch": 1.2075191794176239, "grad_norm": 0.030671702697873116, "learning_rate": 7.275605576689412e-05, "loss": 0.0002, "step": 3562 }, { "epoch": 1.2081973466706226, "grad_norm": 0.014883170835673809, "learning_rate": 7.272779495431228e-05, "loss": 0.0001, "step": 3564 }, { "epoch": 1.2088755139236214, "grad_norm": 0.01106690801680088, "learning_rate": 7.269952498697734e-05, "loss": 0.0002, "step": 3566 }, { "epoch": 1.20955368117662, "grad_norm": 0.010097106918692589, "learning_rate": 7.267124587627647e-05, "loss": 0.0001, "step": 3568 }, { "epoch": 1.2102318484296188, "grad_norm": 0.019213326275348663, "learning_rate": 7.264295763360046e-05, "loss": 0.0, "step": 3570 }, { "epoch": 1.2109100156826178, "grad_norm": 0.03620114549994469, "learning_rate": 7.261466027034383e-05, "loss": 0.0001, "step": 3572 }, { "epoch": 1.2115881829356165, "grad_norm": 0.00409303605556488, "learning_rate": 7.258635379790474e-05, "loss": 0.0, "step": 3574 }, { "epoch": 1.2122663501886153, "grad_norm": 0.008056599646806717, "learning_rate": 7.255803822768505e-05, "loss": 0.0, "step": 3576 }, { "epoch": 1.212944517441614, "grad_norm": 0.025245072320103645, "learning_rate": 7.252971357109025e-05, "loss": 0.0001, "step": 3578 }, { "epoch": 1.2136226846946128, "grad_norm": 0.002902921987697482, "learning_rate": 7.250137983952952e-05, "loss": 0.0, "step": 3580 }, { "epoch": 1.2143008519476115, "grad_norm": 0.0032983103301376104, "learning_rate": 7.247303704441568e-05, "loss": 0.0, "step": 3582 }, { "epoch": 1.2149790192006105, "grad_norm": 0.012134423479437828, "learning_rate": 7.24446851971652e-05, "loss": 0.0001, "step": 3584 }, { "epoch": 1.2156571864536092, "grad_norm": 0.0009648970444686711, "learning_rate": 7.241632430919822e-05, "loss": 0.0, "step": 3586 }, { "epoch": 1.216335353706608, "grad_norm": 0.0016609688755124807, "learning_rate": 7.238795439193848e-05, "loss": 0.0, "step": 3588 }, { "epoch": 1.2170135209596067, "grad_norm": 0.0020582436118274927, "learning_rate": 7.235957545681342e-05, "loss": 0.0, "step": 3590 }, { "epoch": 1.2176916882126054, "grad_norm": 0.0016364112962037325, "learning_rate": 7.2331187515254e-05, "loss": 0.0001, "step": 3592 }, { "epoch": 1.2183698554656042, "grad_norm": 0.005296518560498953, "learning_rate": 7.230279057869493e-05, "loss": 0.0, "step": 3594 }, { "epoch": 1.219048022718603, "grad_norm": 0.012611468322575092, "learning_rate": 7.227438465857448e-05, "loss": 0.0, "step": 3596 }, { "epoch": 1.2197261899716016, "grad_norm": 0.007326039485633373, "learning_rate": 7.224596976633456e-05, "loss": 0.0, "step": 3598 }, { "epoch": 1.2204043572246006, "grad_norm": 0.00021889481286052614, "learning_rate": 7.221754591342067e-05, "loss": 0.0, "step": 3600 }, { "epoch": 1.2210825244775994, "grad_norm": 0.000410861597629264, "learning_rate": 7.218911311128196e-05, "loss": 0.0, "step": 3602 }, { "epoch": 1.221760691730598, "grad_norm": 0.00032909182482399046, "learning_rate": 7.216067137137112e-05, "loss": 0.0, "step": 3604 }, { "epoch": 1.2224388589835968, "grad_norm": 0.032102398574352264, "learning_rate": 7.213222070514453e-05, "loss": 0.0001, "step": 3606 }, { "epoch": 1.2231170262365956, "grad_norm": 0.00018162255582865328, "learning_rate": 7.210376112406205e-05, "loss": 0.0, "step": 3608 }, { "epoch": 1.2237951934895943, "grad_norm": 0.0013201575493440032, "learning_rate": 7.207529263958726e-05, "loss": 0.0, "step": 3610 }, { "epoch": 1.224473360742593, "grad_norm": 0.0005053295753896236, "learning_rate": 7.204681526318724e-05, "loss": 0.0, "step": 3612 }, { "epoch": 1.225151527995592, "grad_norm": 0.0003318975795991719, "learning_rate": 7.201832900633265e-05, "loss": 0.0, "step": 3614 }, { "epoch": 1.2258296952485908, "grad_norm": 0.007706344127655029, "learning_rate": 7.198983388049778e-05, "loss": 0.0, "step": 3616 }, { "epoch": 1.2265078625015895, "grad_norm": 0.00017018703510984778, "learning_rate": 7.196132989716045e-05, "loss": 0.0, "step": 3618 }, { "epoch": 1.2271860297545882, "grad_norm": 0.0008728935499675572, "learning_rate": 7.193281706780206e-05, "loss": 0.0, "step": 3620 }, { "epoch": 1.227864197007587, "grad_norm": 0.00048533343942835927, "learning_rate": 7.190429540390757e-05, "loss": 0.0, "step": 3622 }, { "epoch": 1.2285423642605857, "grad_norm": 0.00021282030502334237, "learning_rate": 7.187576491696553e-05, "loss": 0.0, "step": 3624 }, { "epoch": 1.2292205315135845, "grad_norm": 0.004707904066890478, "learning_rate": 7.184722561846798e-05, "loss": 0.0, "step": 3626 }, { "epoch": 1.2298986987665832, "grad_norm": 0.0026295706629753113, "learning_rate": 7.181867751991057e-05, "loss": 0.0, "step": 3628 }, { "epoch": 1.2305768660195822, "grad_norm": 0.0006299935048446059, "learning_rate": 7.179012063279247e-05, "loss": 0.0, "step": 3630 }, { "epoch": 1.231255033272581, "grad_norm": 0.00022165761038195342, "learning_rate": 7.176155496861638e-05, "loss": 0.0, "step": 3632 }, { "epoch": 1.2319332005255796, "grad_norm": 0.0006338229868561029, "learning_rate": 7.173298053888855e-05, "loss": 0.0, "step": 3634 }, { "epoch": 1.2326113677785784, "grad_norm": 0.0003351867781020701, "learning_rate": 7.170439735511876e-05, "loss": 0.0, "step": 3636 }, { "epoch": 1.2332895350315771, "grad_norm": 9.762517584022135e-05, "learning_rate": 7.167580542882031e-05, "loss": 0.0, "step": 3638 }, { "epoch": 1.2339677022845759, "grad_norm": 0.04705990478396416, "learning_rate": 7.164720477151002e-05, "loss": 0.0001, "step": 3640 }, { "epoch": 1.2346458695375748, "grad_norm": 0.0005479294923134148, "learning_rate": 7.161859539470824e-05, "loss": 0.0, "step": 3642 }, { "epoch": 1.2353240367905736, "grad_norm": 0.015779433771967888, "learning_rate": 7.158997730993884e-05, "loss": 0.0001, "step": 3644 }, { "epoch": 1.2360022040435723, "grad_norm": 0.009688456542789936, "learning_rate": 7.156135052872915e-05, "loss": 0.0, "step": 3646 }, { "epoch": 1.236680371296571, "grad_norm": 0.0007699349662289023, "learning_rate": 7.153271506261003e-05, "loss": 0.001, "step": 3648 }, { "epoch": 1.2373585385495698, "grad_norm": 0.02694147825241089, "learning_rate": 7.150407092311586e-05, "loss": 0.0009, "step": 3650 }, { "epoch": 1.2380367058025685, "grad_norm": 0.054436396807432175, "learning_rate": 7.147541812178451e-05, "loss": 0.0004, "step": 3652 }, { "epoch": 1.2387148730555673, "grad_norm": 0.011601508595049381, "learning_rate": 7.14467566701573e-05, "loss": 0.0004, "step": 3654 }, { "epoch": 1.239393040308566, "grad_norm": 0.009782589972019196, "learning_rate": 7.141808657977907e-05, "loss": 0.0002, "step": 3656 }, { "epoch": 1.2400712075615647, "grad_norm": 0.036703720688819885, "learning_rate": 7.138940786219813e-05, "loss": 0.0001, "step": 3658 }, { "epoch": 1.2407493748145637, "grad_norm": 0.01584113948047161, "learning_rate": 7.136072052896625e-05, "loss": 0.0003, "step": 3660 }, { "epoch": 1.2414275420675625, "grad_norm": 0.00839573610574007, "learning_rate": 7.133202459163872e-05, "loss": 0.0001, "step": 3662 }, { "epoch": 1.2421057093205612, "grad_norm": 0.015938330441713333, "learning_rate": 7.130332006177421e-05, "loss": 0.0003, "step": 3664 }, { "epoch": 1.24278387657356, "grad_norm": 0.03005986101925373, "learning_rate": 7.127460695093493e-05, "loss": 0.0002, "step": 3666 }, { "epoch": 1.2434620438265587, "grad_norm": 0.007686430588364601, "learning_rate": 7.124588527068652e-05, "loss": 0.0001, "step": 3668 }, { "epoch": 1.2441402110795574, "grad_norm": 0.03607560321688652, "learning_rate": 7.121715503259807e-05, "loss": 0.0002, "step": 3670 }, { "epoch": 1.2448183783325564, "grad_norm": 0.018232079222798347, "learning_rate": 7.118841624824209e-05, "loss": 0.0003, "step": 3672 }, { "epoch": 1.2454965455855551, "grad_norm": 0.014432893134653568, "learning_rate": 7.115966892919459e-05, "loss": 0.0, "step": 3674 }, { "epoch": 1.2461747128385539, "grad_norm": 0.019733788445591927, "learning_rate": 7.113091308703498e-05, "loss": 0.0001, "step": 3676 }, { "epoch": 1.2468528800915526, "grad_norm": 0.013168008998036385, "learning_rate": 7.110214873334611e-05, "loss": 0.0001, "step": 3678 }, { "epoch": 1.2475310473445513, "grad_norm": 0.02215537056326866, "learning_rate": 7.107337587971424e-05, "loss": 0.0001, "step": 3680 }, { "epoch": 1.24820921459755, "grad_norm": 0.014840980060398579, "learning_rate": 7.104459453772909e-05, "loss": 0.0, "step": 3682 }, { "epoch": 1.2488873818505488, "grad_norm": 0.0022126887924969196, "learning_rate": 7.101580471898377e-05, "loss": 0.0, "step": 3684 }, { "epoch": 1.2495655491035476, "grad_norm": 0.0022282488644123077, "learning_rate": 7.098700643507485e-05, "loss": 0.0, "step": 3686 }, { "epoch": 1.2502437163565463, "grad_norm": 0.0018177232705056667, "learning_rate": 7.095819969760221e-05, "loss": 0.0, "step": 3688 }, { "epoch": 1.2509218836095453, "grad_norm": 0.007829871959984303, "learning_rate": 7.092938451816926e-05, "loss": 0.0, "step": 3690 }, { "epoch": 1.251600050862544, "grad_norm": 0.0006072524702176452, "learning_rate": 7.090056090838274e-05, "loss": 0.0, "step": 3692 }, { "epoch": 1.2522782181155427, "grad_norm": 0.030747784301638603, "learning_rate": 7.087172887985276e-05, "loss": 0.0001, "step": 3694 }, { "epoch": 1.2529563853685415, "grad_norm": 0.02318490482866764, "learning_rate": 7.08428884441929e-05, "loss": 0.0001, "step": 3696 }, { "epoch": 1.2536345526215402, "grad_norm": 0.005606907419860363, "learning_rate": 7.081403961302006e-05, "loss": 0.0, "step": 3698 }, { "epoch": 1.2543127198745392, "grad_norm": 0.010471644811332226, "learning_rate": 7.078518239795456e-05, "loss": 0.0, "step": 3700 }, { "epoch": 1.254990887127538, "grad_norm": 0.05316907539963722, "learning_rate": 7.075631681062007e-05, "loss": 0.0006, "step": 3702 }, { "epoch": 1.2556690543805367, "grad_norm": 0.010512227192521095, "learning_rate": 7.072744286264365e-05, "loss": 0.0, "step": 3704 }, { "epoch": 1.2563472216335354, "grad_norm": 0.028978761285543442, "learning_rate": 7.069856056565572e-05, "loss": 0.0001, "step": 3706 }, { "epoch": 1.2570253888865341, "grad_norm": 0.02035476639866829, "learning_rate": 7.066966993129009e-05, "loss": 0.0001, "step": 3708 }, { "epoch": 1.2577035561395329, "grad_norm": 0.0009529913077130914, "learning_rate": 7.064077097118385e-05, "loss": 0.0001, "step": 3710 }, { "epoch": 1.2583817233925316, "grad_norm": 0.0010063213994726539, "learning_rate": 7.061186369697756e-05, "loss": 0.0, "step": 3712 }, { "epoch": 1.2590598906455304, "grad_norm": 0.0006211152067407966, "learning_rate": 7.058294812031503e-05, "loss": 0.0, "step": 3714 }, { "epoch": 1.259738057898529, "grad_norm": 0.05503276735544205, "learning_rate": 7.055402425284346e-05, "loss": 0.0, "step": 3716 }, { "epoch": 1.260416225151528, "grad_norm": 0.021889325231313705, "learning_rate": 7.052509210621337e-05, "loss": 0.0002, "step": 3718 }, { "epoch": 1.2610943924045268, "grad_norm": 0.03572991490364075, "learning_rate": 7.049615169207864e-05, "loss": 0.0002, "step": 3720 }, { "epoch": 1.2617725596575255, "grad_norm": 0.001351168379187584, "learning_rate": 7.046720302209646e-05, "loss": 0.0, "step": 3722 }, { "epoch": 1.2624507269105243, "grad_norm": 0.002800681861117482, "learning_rate": 7.043824610792735e-05, "loss": 0.0, "step": 3724 }, { "epoch": 1.263128894163523, "grad_norm": 0.00028367669438011944, "learning_rate": 7.040928096123516e-05, "loss": 0.0003, "step": 3726 }, { "epoch": 1.2638070614165218, "grad_norm": 0.0004958737408742309, "learning_rate": 7.038030759368703e-05, "loss": 0.0, "step": 3728 }, { "epoch": 1.2644852286695207, "grad_norm": 0.0005568963824771345, "learning_rate": 7.035132601695343e-05, "loss": 0.0, "step": 3730 }, { "epoch": 1.2651633959225195, "grad_norm": 0.04810892790555954, "learning_rate": 7.032233624270816e-05, "loss": 0.0008, "step": 3732 }, { "epoch": 1.2658415631755182, "grad_norm": 0.08710169792175293, "learning_rate": 7.029333828262827e-05, "loss": 0.0001, "step": 3734 }, { "epoch": 1.266519730428517, "grad_norm": 0.040873605757951736, "learning_rate": 7.026433214839416e-05, "loss": 0.0002, "step": 3736 }, { "epoch": 1.2671978976815157, "grad_norm": 0.0023663484025746584, "learning_rate": 7.023531785168948e-05, "loss": 0.0, "step": 3738 }, { "epoch": 1.2678760649345144, "grad_norm": 0.07860898226499557, "learning_rate": 7.020629540420118e-05, "loss": 0.0003, "step": 3740 }, { "epoch": 1.2685542321875132, "grad_norm": 0.011849219910800457, "learning_rate": 7.017726481761951e-05, "loss": 0.0001, "step": 3742 }, { "epoch": 1.269232399440512, "grad_norm": 0.021843140944838524, "learning_rate": 7.014822610363798e-05, "loss": 0.0001, "step": 3744 }, { "epoch": 1.2699105666935107, "grad_norm": 0.06327859312295914, "learning_rate": 7.01191792739534e-05, "loss": 0.0004, "step": 3746 }, { "epoch": 1.2705887339465096, "grad_norm": 0.030594058334827423, "learning_rate": 7.00901243402658e-05, "loss": 0.0005, "step": 3748 }, { "epoch": 1.2712669011995084, "grad_norm": 0.05372355878353119, "learning_rate": 7.006106131427853e-05, "loss": 0.0003, "step": 3750 }, { "epoch": 1.271945068452507, "grad_norm": 0.03993901237845421, "learning_rate": 7.003199020769815e-05, "loss": 0.0005, "step": 3752 }, { "epoch": 1.2726232357055058, "grad_norm": 0.06573836505413055, "learning_rate": 7.000291103223453e-05, "loss": 0.0003, "step": 3754 }, { "epoch": 1.2733014029585046, "grad_norm": 0.01412898674607277, "learning_rate": 6.997382379960071e-05, "loss": 0.0002, "step": 3756 }, { "epoch": 1.2739795702115035, "grad_norm": 0.0644112154841423, "learning_rate": 6.994472852151307e-05, "loss": 0.0004, "step": 3758 }, { "epoch": 1.2746577374645023, "grad_norm": 0.7547671794891357, "learning_rate": 6.991562520969116e-05, "loss": 0.0154, "step": 3760 }, { "epoch": 1.275335904717501, "grad_norm": 0.059369105845689774, "learning_rate": 6.98865138758578e-05, "loss": 0.0276, "step": 3762 }, { "epoch": 1.2760140719704998, "grad_norm": 0.9872779846191406, "learning_rate": 6.985739453173903e-05, "loss": 0.0121, "step": 3764 }, { "epoch": 1.2766922392234985, "grad_norm": 0.18768635392189026, "learning_rate": 6.982826718906412e-05, "loss": 0.0134, "step": 3766 }, { "epoch": 1.2773704064764972, "grad_norm": 3.0843405723571777, "learning_rate": 6.979913185956556e-05, "loss": 0.0152, "step": 3768 }, { "epoch": 1.278048573729496, "grad_norm": 0.3620230257511139, "learning_rate": 6.976998855497905e-05, "loss": 0.0199, "step": 3770 }, { "epoch": 1.2787267409824947, "grad_norm": 0.6195115447044373, "learning_rate": 6.974083728704351e-05, "loss": 0.0219, "step": 3772 }, { "epoch": 1.2794049082354935, "grad_norm": 2.172183036804199, "learning_rate": 6.971167806750107e-05, "loss": 0.036, "step": 3774 }, { "epoch": 1.2800830754884924, "grad_norm": 2.368955373764038, "learning_rate": 6.968251090809708e-05, "loss": 0.1914, "step": 3776 }, { "epoch": 1.2807612427414912, "grad_norm": 2.1724870204925537, "learning_rate": 6.965333582058002e-05, "loss": 0.0416, "step": 3778 }, { "epoch": 1.28143940999449, "grad_norm": 1.5031301975250244, "learning_rate": 6.962415281670167e-05, "loss": 0.0268, "step": 3780 }, { "epoch": 1.2821175772474886, "grad_norm": 1.1724557876586914, "learning_rate": 6.95949619082169e-05, "loss": 0.051, "step": 3782 }, { "epoch": 1.2827957445004874, "grad_norm": 0.24994565546512604, "learning_rate": 6.956576310688382e-05, "loss": 0.0078, "step": 3784 }, { "epoch": 1.2834739117534861, "grad_norm": 0.874370813369751, "learning_rate": 6.953655642446368e-05, "loss": 0.0047, "step": 3786 }, { "epoch": 1.284152079006485, "grad_norm": 0.08019069582223892, "learning_rate": 6.950734187272098e-05, "loss": 0.0028, "step": 3788 }, { "epoch": 1.2848302462594838, "grad_norm": 0.18998371064662933, "learning_rate": 6.947811946342329e-05, "loss": 0.0032, "step": 3790 }, { "epoch": 1.2855084135124826, "grad_norm": 0.08605227619409561, "learning_rate": 6.944888920834142e-05, "loss": 0.9699, "step": 3792 }, { "epoch": 1.2861865807654813, "grad_norm": 0.1060505360364914, "learning_rate": 6.941965111924929e-05, "loss": 0.0012, "step": 3794 }, { "epoch": 1.28686474801848, "grad_norm": 0.13712160289287567, "learning_rate": 6.939040520792402e-05, "loss": 0.0016, "step": 3796 }, { "epoch": 1.2875429152714788, "grad_norm": 0.19173935055732727, "learning_rate": 6.936115148614584e-05, "loss": 0.0016, "step": 3798 }, { "epoch": 1.2882210825244775, "grad_norm": 70.08195495605469, "learning_rate": 6.933188996569817e-05, "loss": 1.3792, "step": 3800 }, { "epoch": 1.2888992497774763, "grad_norm": 0.691962480545044, "learning_rate": 6.930262065836752e-05, "loss": 0.0204, "step": 3802 }, { "epoch": 1.289577417030475, "grad_norm": 0.8632583022117615, "learning_rate": 6.92733435759436e-05, "loss": 0.0146, "step": 3804 }, { "epoch": 1.290255584283474, "grad_norm": 1.6672307252883911, "learning_rate": 6.924405873021918e-05, "loss": 0.0287, "step": 3806 }, { "epoch": 1.2909337515364727, "grad_norm": 0.9258363246917725, "learning_rate": 6.921476613299018e-05, "loss": 0.0152, "step": 3808 }, { "epoch": 1.2916119187894715, "grad_norm": 0.15455372631549835, "learning_rate": 6.918546579605572e-05, "loss": 0.0117, "step": 3810 }, { "epoch": 1.2922900860424702, "grad_norm": 0.1822269707918167, "learning_rate": 6.915615773121791e-05, "loss": 0.0051, "step": 3812 }, { "epoch": 1.292968253295469, "grad_norm": 0.20265448093414307, "learning_rate": 6.912684195028208e-05, "loss": 0.0054, "step": 3814 }, { "epoch": 1.293646420548468, "grad_norm": 0.09864166378974915, "learning_rate": 6.909751846505656e-05, "loss": 0.0038, "step": 3816 }, { "epoch": 1.2943245878014666, "grad_norm": 0.04846733435988426, "learning_rate": 6.906818728735291e-05, "loss": 0.0018, "step": 3818 }, { "epoch": 1.2950027550544654, "grad_norm": 0.07136884331703186, "learning_rate": 6.903884842898569e-05, "loss": 0.0015, "step": 3820 }, { "epoch": 1.2956809223074641, "grad_norm": 0.024747643619775772, "learning_rate": 6.900950190177263e-05, "loss": 0.0005, "step": 3822 }, { "epoch": 1.2963590895604629, "grad_norm": 0.09419432282447815, "learning_rate": 6.898014771753443e-05, "loss": 0.0008, "step": 3824 }, { "epoch": 1.2970372568134616, "grad_norm": 0.0712243989109993, "learning_rate": 6.895078588809502e-05, "loss": 0.0009, "step": 3826 }, { "epoch": 1.2977154240664603, "grad_norm": 0.017767785117030144, "learning_rate": 6.892141642528134e-05, "loss": 0.001, "step": 3828 }, { "epoch": 1.298393591319459, "grad_norm": 0.041735365986824036, "learning_rate": 6.889203934092336e-05, "loss": 0.0006, "step": 3830 }, { "epoch": 1.2990717585724578, "grad_norm": 0.04800105467438698, "learning_rate": 6.88626546468542e-05, "loss": 0.0009, "step": 3832 }, { "epoch": 1.2997499258254566, "grad_norm": 0.06188105419278145, "learning_rate": 6.883326235491001e-05, "loss": 0.0006, "step": 3834 }, { "epoch": 1.3004280930784555, "grad_norm": 0.03689048811793327, "learning_rate": 6.880386247692999e-05, "loss": 0.0005, "step": 3836 }, { "epoch": 1.3011062603314543, "grad_norm": 0.009136232547461987, "learning_rate": 6.877445502475643e-05, "loss": 0.0004, "step": 3838 }, { "epoch": 1.301784427584453, "grad_norm": 0.024388829246163368, "learning_rate": 6.874504001023461e-05, "loss": 0.0004, "step": 3840 }, { "epoch": 1.3024625948374517, "grad_norm": 0.0197377260774374, "learning_rate": 6.871561744521293e-05, "loss": 0.0001, "step": 3842 }, { "epoch": 1.3031407620904505, "grad_norm": 0.01399923861026764, "learning_rate": 6.868618734154276e-05, "loss": 0.0003, "step": 3844 }, { "epoch": 1.3038189293434495, "grad_norm": 0.0143091706559062, "learning_rate": 6.865674971107858e-05, "loss": 0.0002, "step": 3846 }, { "epoch": 1.3044970965964482, "grad_norm": 0.006448931992053986, "learning_rate": 6.862730456567786e-05, "loss": 0.0001, "step": 3848 }, { "epoch": 1.305175263849447, "grad_norm": 0.019587090238928795, "learning_rate": 6.859785191720106e-05, "loss": 0.0003, "step": 3850 } ], "logging_steps": 2, "max_steps": 10000, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.403106472762278e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }